1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern_internal.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object_internal.h>
73 #include <vm/vm_page_internal.h>
74 #include <vm/vm_compressor_xnu.h>
75 #include <vm/vm_pageout_xnu.h>
76 #include <vm/vm_init_xnu.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_memtag.h>
79 #include <kern/misc_protos.h>
80 #include <vm/cpm_internal.h>
81 #include <kern/ledger.h>
82 #include <kern/bits.h>
83 #include <kern/startup.h>
84
85 #include <string.h>
86
87 #include <libkern/OSDebug.h>
88 #include <libkern/crypto/sha2.h>
89 #include <libkern/section_keywords.h>
90 #include <sys/kdebug.h>
91 #include <sys/kdebug_triage.h>
92
93 #include <san/kasan.h>
94 #include <kern/kext_alloc.h>
95 #include <kern/backtrace.h>
96 #include <os/hash.h>
97 #include <kern/zalloc_internal.h>
98 #include <libkern/crypto/rand.h>
99
100 /*
101 * Variables exported by this module.
102 */
103
104 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
105 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
106 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
107
108 static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
109 KMEM_RANGE_ID_NUM_PTR);
110 #define KMEM_GOBJ_THRESHOLD (32ULL << 20)
111 #if DEBUG || DEVELOPMENT
112 #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
113 #define KMEM_OUTLIER_SIZE 0
114 #define KMEM_OUTLIER_ALIGN 1
115 btlog_t kmem_outlier_log;
116 #endif /* DEBUG || DEVELOPMENT */
117
118 __startup_data static vm_map_size_t data_range_size;
119 __startup_data static vm_map_size_t ptr_range_size;
120 __startup_data static vm_map_size_t sprayqtn_range_size;
121
122 #pragma mark helpers
123
124 __attribute__((overloadable))
125 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)126 ANYF(kma_flags_t flags)
127 {
128 return (kmem_flags_t)flags;
129 }
130
131 __attribute__((overloadable))
132 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)133 ANYF(kmr_flags_t flags)
134 {
135 return (kmem_flags_t)flags;
136 }
137
138 __attribute__((overloadable))
139 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)140 ANYF(kmf_flags_t flags)
141 {
142 return (kmem_flags_t)flags;
143 }
144
145 __abortlike
146 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)147 __kmem_invalid_size_panic(
148 vm_map_t map,
149 vm_size_t size,
150 uint32_t flags)
151 {
152 panic("kmem(map=%p, flags=0x%x): invalid size %zd",
153 map, flags, (size_t)size);
154 }
155
156 __abortlike
157 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)158 __kmem_invalid_arguments_panic(
159 const char *what,
160 vm_map_t map,
161 vm_address_t address,
162 vm_size_t size,
163 uint32_t flags)
164 {
165 panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
166 "invalid arguments passed",
167 what, map, (void *)address, (size_t)size, flags);
168 }
169
170 __abortlike
171 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)172 __kmem_failed_panic(
173 vm_map_t map,
174 vm_size_t size,
175 uint32_t flags,
176 kern_return_t kr,
177 const char *what)
178 {
179 panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
180 what, map, (size_t)size, flags, kr);
181 }
182
183 __abortlike
184 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)185 __kmem_entry_not_found_panic(
186 vm_map_t map,
187 vm_offset_t addr)
188 {
189 panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
190 }
191
192 static inline vm_object_t
__kmem_object(kmem_flags_t flags)193 __kmem_object(kmem_flags_t flags)
194 {
195 if (flags & KMEM_COMPRESSOR) {
196 if (flags & KMEM_KOBJECT) {
197 panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
198 }
199 return compressor_object;
200 }
201 if (!(flags & KMEM_KOBJECT)) {
202 panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
203 }
204 return kernel_object_default;
205 }
206
207 static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)208 __kmem_mapping_type(kmem_flags_t flags)
209 {
210 if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) {
211 return PMAP_MAPPING_TYPE_DEFAULT;
212 } else {
213 return PMAP_MAPPING_TYPE_RESTRICTED;
214 }
215 }
216
217 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)218 __kmem_guard_left(kmem_flags_t flags)
219 {
220 return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
221 }
222
223 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)224 __kmem_guard_right(kmem_flags_t flags)
225 {
226 return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
227 }
228
229 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)230 __kmem_guard_size(kmem_flags_t flags)
231 {
232 return __kmem_guard_left(flags) + __kmem_guard_right(flags);
233 }
234
235 __pure2
236 static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)237 __kmem_entry_orig_size(vm_map_entry_t entry)
238 {
239 vm_object_t object = VME_OBJECT(entry);
240
241 if (entry->vme_kernel_object) {
242 return entry->vme_end - entry->vme_start -
243 entry->vme_object_or_delta;
244 } else {
245 return object->vo_size - object->vo_size_delta;
246 }
247 }
248
249
250 #pragma mark kmem range methods
251
252 #if __arm64__
253 // <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
254 #define mach_vm_range_load(r, r_min, r_max) \
255 asm("ldp %[rmin], %[rmax], [%[range]]" \
256 : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
257 : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
258 #else
259 #define mach_vm_range_load(r, rmin, rmax) \
260 ({ rmin = (r)->min_address; rmax = (r)->max_address; })
261 #endif
262
263 __abortlike
264 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)265 __mach_vm_range_overflow(
266 mach_vm_offset_t addr,
267 mach_vm_offset_t size)
268 {
269 panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
270 addr, addr, size);
271 }
272
273 __abortlike
274 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)275 __mach_vm_range_invalid(
276 mach_vm_offset_t min_address,
277 mach_vm_offset_t max_address)
278 {
279 panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
280 min_address, max_address);
281 }
282
283 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)284 mach_vm_range_size(const struct mach_vm_range *r)
285 {
286 mach_vm_offset_t rmin, rmax;
287
288 mach_vm_range_load(r, rmin, rmax);
289 return rmax - rmin;
290 }
291
292 __attribute__((overloadable))
293 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)294 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
295 {
296 mach_vm_offset_t rmin, rmax;
297
298 #if CONFIG_KERNEL_TAGGING
299 if (VM_KERNEL_ADDRESS(addr)) {
300 addr = vm_memtag_canonicalize_address(addr);
301 }
302 #endif /* CONFIG_KERNEL_TAGGING */
303
304 /*
305 * The `&` is not a typo: we really expect the check to pass,
306 * so encourage the compiler to eagerly load and test without branches
307 */
308 mach_vm_range_load(r, rmin, rmax);
309 return (addr >= rmin) & (addr < rmax);
310 }
311
312 __attribute__((overloadable))
313 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)314 mach_vm_range_contains(
315 const struct mach_vm_range *r,
316 mach_vm_offset_t addr,
317 mach_vm_offset_t size)
318 {
319 mach_vm_offset_t rmin, rmax;
320
321 #if CONFIG_KERNEL_TAGGING
322 if (VM_KERNEL_ADDRESS(addr)) {
323 addr = vm_memtag_canonicalize_address(addr);
324 }
325 #endif /* CONFIG_KERNEL_TAGGING */
326
327 /*
328 * The `&` is not a typo: we really expect the check to pass,
329 * so encourage the compiler to eagerly load and test without branches
330 */
331 mach_vm_range_load(r, rmin, rmax);
332 return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
333 }
334
335 __attribute__((overloadable))
336 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)337 mach_vm_range_intersects(
338 const struct mach_vm_range *r1,
339 const struct mach_vm_range *r2)
340 {
341 mach_vm_offset_t r1_min, r1_max;
342 mach_vm_offset_t r2_min, r2_max;
343
344 mach_vm_range_load(r1, r1_min, r1_max);
345 r2_min = r2->min_address;
346 r2_max = r2->max_address;
347
348 if (r1_min > r1_max) {
349 __mach_vm_range_invalid(r1_min, r1_max);
350 }
351
352 if (r2_min > r2_max) {
353 __mach_vm_range_invalid(r2_min, r2_max);
354 }
355
356 return r1_max > r2_min && r1_min < r2_max;
357 }
358
359 __attribute__((overloadable))
360 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)361 mach_vm_range_intersects(
362 const struct mach_vm_range *r1,
363 mach_vm_offset_t addr,
364 mach_vm_offset_t size)
365 {
366 struct mach_vm_range r2;
367
368 #if CONFIG_KERNEL_TAGGING
369 addr = VM_KERNEL_STRIP_UPTR(addr);
370 #endif /* CONFIG_KERNEL_TAGGING */
371
372 r2.min_address = addr;
373 if (os_add_overflow(addr, size, &r2.max_address)) {
374 __mach_vm_range_overflow(addr, size);
375 }
376
377 return mach_vm_range_intersects(r1, &r2);
378 }
379
380 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)381 kmem_range_id_contains(
382 kmem_range_id_t range_id,
383 vm_map_offset_t addr,
384 vm_map_size_t size)
385 {
386 return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
387 }
388
389 __abortlike
390 static void
kmem_range_invalid_panic(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)391 kmem_range_invalid_panic(
392 kmem_range_id_t range_id,
393 vm_map_offset_t addr,
394 vm_map_size_t size)
395 {
396 const struct mach_vm_range *r = &kmem_ranges[range_id];
397 mach_vm_offset_t rmin, rmax;
398
399 mach_vm_range_load(r, rmin, rmax);
400 if (addr + size < rmin) {
401 panic("addr %p + size %llu overflows %p", (void *)addr, size,
402 (void *)(addr + size));
403 }
404 panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
405 (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
406 }
407
408 /*
409 * Return whether the entire allocation is contained in the given range
410 */
411 static bool
kmem_range_contains_fully(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)412 kmem_range_contains_fully(
413 kmem_range_id_t range_id,
414 vm_map_offset_t addr,
415 vm_map_size_t size)
416 {
417 const struct mach_vm_range *r = &kmem_ranges[range_id];
418 mach_vm_offset_t rmin, rmax;
419 bool result = false;
420
421 if (VM_KERNEL_ADDRESS(addr)) {
422 addr = vm_memtag_canonicalize_address(addr);
423 }
424
425 /*
426 * The `&` is not a typo: we really expect the check to pass,
427 * so encourage the compiler to eagerly load and test without branches
428 */
429 mach_vm_range_load(r, rmin, rmax);
430 result = (addr >= rmin) & (addr < rmax);
431 if (__improbable(result
432 && ((addr + size < rmin) || (addr + size > rmax)))) {
433 kmem_range_invalid_panic(range_id, addr, size);
434 }
435 return result;
436 }
437
438 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)439 kmem_range_id_size(kmem_range_id_t range_id)
440 {
441 return mach_vm_range_size(&kmem_ranges[range_id]);
442 }
443
444 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)445 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
446 {
447 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
448
449 for (; range_id < KMEM_RANGE_COUNT; range_id++) {
450 if (kmem_range_contains_fully(range_id, addr, size)) {
451 return range_id;
452 }
453 }
454 return KMEM_RANGE_ID_NONE;
455 }
456
457 bool
kmem_is_ptr_range(vm_map_range_id_t range_id)458 kmem_is_ptr_range(vm_map_range_id_t range_id)
459 {
460 return (range_id >= KMEM_RANGE_ID_FIRST) &&
461 (range_id <= KMEM_RANGE_ID_NUM_PTR);
462 }
463
464 __abortlike
465 static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)466 kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
467 {
468 panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
469 (void *)addr);
470 }
471
472 mach_vm_range_t
kmem_validate_range_for_overwrite(vm_map_offset_t addr,vm_map_size_t size)473 kmem_validate_range_for_overwrite(
474 vm_map_offset_t addr,
475 vm_map_size_t size)
476 {
477 vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
478
479 if (kmem_is_ptr_range(range_id)) {
480 kmem_range_invalid_for_overwrite(addr);
481 }
482
483 return &kmem_ranges[range_id];
484 }
485
486
487 #pragma mark entry parameters
488
489
490 __abortlike
491 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)492 __kmem_entry_validate_panic(
493 vm_map_t map,
494 vm_map_entry_t entry,
495 vm_offset_t addr,
496 vm_size_t size,
497 uint32_t flags,
498 kmem_guard_t guard)
499 {
500 const char *what = "???";
501
502 if (entry->vme_atomic != guard.kmg_atomic) {
503 what = "atomicity";
504 } else if (entry->is_sub_map != guard.kmg_submap) {
505 what = "objectness";
506 } else if (addr != entry->vme_start) {
507 what = "left bound";
508 } else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
509 what = "right bound";
510 } else if (guard.kmg_context != entry->vme_context) {
511 what = "guard";
512 }
513
514 panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
515 "entry:%p %s mismatch guard(0x%08x)",
516 map, (void *)addr, size, flags, entry,
517 what, guard.kmg_context);
518 }
519
520 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)521 __kmem_entry_validate_guard(
522 vm_map_entry_t entry,
523 vm_offset_t addr,
524 vm_size_t size,
525 kmem_flags_t flags,
526 kmem_guard_t guard)
527 {
528 if (entry->vme_atomic != guard.kmg_atomic) {
529 return false;
530 }
531
532 if (!guard.kmg_atomic) {
533 return true;
534 }
535
536 if (entry->is_sub_map != guard.kmg_submap) {
537 return false;
538 }
539
540 if (addr != entry->vme_start) {
541 return false;
542 }
543
544 if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
545 return false;
546 }
547
548 if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
549 return false;
550 }
551
552 return true;
553 }
554
555 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)556 kmem_entry_validate_guard(
557 vm_map_t map,
558 vm_map_entry_t entry,
559 vm_offset_t addr,
560 vm_size_t size,
561 kmem_guard_t guard)
562 {
563 if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
564 __kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
565 }
566 }
567
568 __abortlike
569 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)570 __kmem_entry_validate_object_panic(
571 vm_map_t map,
572 vm_map_entry_t entry,
573 kmem_flags_t flags)
574 {
575 const char *what;
576 const char *verb;
577
578 if (entry->is_sub_map) {
579 panic("kmem(map=%p) entry %p is a submap", map, entry);
580 }
581
582 if (flags & KMEM_KOBJECT) {
583 what = "kernel";
584 verb = "isn't";
585 } else if (flags & KMEM_COMPRESSOR) {
586 what = "compressor";
587 verb = "isn't";
588 } else if (entry->vme_kernel_object) {
589 what = "kernel";
590 verb = "is unexpectedly";
591 } else {
592 what = "compressor";
593 verb = "is unexpectedly";
594 }
595
596 panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
597 map, flags, entry, verb, what);
598 }
599
600 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)601 __kmem_entry_validate_object(
602 vm_map_entry_t entry,
603 kmem_flags_t flags)
604 {
605 if (entry->is_sub_map) {
606 return false;
607 }
608 if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
609 return false;
610 }
611
612 return (bool)(flags & KMEM_COMPRESSOR) ==
613 (VME_OBJECT(entry) == compressor_object);
614 }
615
616 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)617 kmem_size_guard(
618 vm_map_t map,
619 vm_offset_t addr,
620 kmem_guard_t guard)
621 {
622 kmem_flags_t flags = KMEM_GUESS_SIZE;
623 vm_map_entry_t entry;
624 vm_size_t size;
625
626 vm_map_lock_read(map);
627
628 #if KASAN_CLASSIC
629 addr -= PAGE_SIZE;
630 #endif /* KASAN_CLASSIC */
631 addr = vm_memtag_canonicalize_address(addr);
632
633 if (!vm_map_lookup_entry(map, addr, &entry)) {
634 __kmem_entry_not_found_panic(map, addr);
635 }
636
637 if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
638 __kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
639 }
640
641 size = __kmem_entry_orig_size(entry);
642
643 vm_map_unlock_read(map);
644
645 return size;
646 }
647
648 static inline uint16_t
kmem_hash_backtrace(void * fp)649 kmem_hash_backtrace(
650 void *fp)
651 {
652 uint64_t bt_count;
653 uintptr_t bt[8] = {};
654
655 struct backtrace_control ctl = {
656 .btc_frame_addr = (uintptr_t)fp,
657 };
658
659 bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
660 return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
661 }
662
663 static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
664 "Insufficient bits to represent ptr ranges");
665
666 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)667 kmem_adjust_range_id(
668 uint32_t hash)
669 {
670 return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
671 (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
672 }
673
674 static bool
kmem_use_sprayqtn(kma_flags_t kma_flags,vm_map_size_t map_size,vm_offset_t mask)675 kmem_use_sprayqtn(
676 kma_flags_t kma_flags,
677 vm_map_size_t map_size,
678 vm_offset_t mask)
679 {
680 /*
681 * Pointer allocations that are above the guard objects threshold or have
682 * leading guard pages with non standard alignment requests are redirected
683 * to the sprayqtn range.
684 */
685 #if DEBUG || DEVELOPMENT
686 btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
687 BTREF_GET_NOWAIT : 0;
688
689 if ((kma_flags & KMA_SPRAYQTN) == 0) {
690 if (map_size > KMEM_GOBJ_THRESHOLD) {
691 btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
692 btref_get(__builtin_frame_address(0), flags));
693 } else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
694 btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
695 btref_get(__builtin_frame_address(0), flags));
696 }
697 }
698 #endif /* DEBUG || DEVELOPMENT */
699
700 return (kma_flags & KMA_SPRAYQTN) ||
701 (map_size > KMEM_GOBJ_THRESHOLD) ||
702 ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
703 }
704
705 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_size_t map_size,vm_offset_t mask,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)706 kmem_apply_security_policy(
707 vm_map_t map,
708 kma_flags_t kma_flags,
709 kmem_guard_t guard,
710 vm_map_size_t map_size,
711 vm_offset_t mask,
712 vm_map_kernel_flags_t *vmk_flags,
713 bool assert_dir __unused)
714 {
715 kmem_range_id_t range_id;
716 bool from_right;
717 uint16_t type_hash = guard.kmg_type_hash;
718
719 if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
720 return;
721 }
722
723 /*
724 * A non-zero type-hash must be passed by krealloc_type
725 */
726 #if (DEBUG || DEVELOPMENT)
727 if (assert_dir && !(kma_flags & KMA_DATA)) {
728 assert(type_hash != 0);
729 }
730 #endif
731
732 if (kma_flags & KMA_DATA) {
733 range_id = KMEM_RANGE_ID_DATA;
734 /*
735 * As an optimization in KMA_DATA to avoid fragmentation,
736 * allocate static carveouts at the end of the DATA range.
737 */
738 from_right = (bool)(kma_flags & KMA_PERMANENT);
739 } else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
740 range_id = KMEM_RANGE_ID_SPRAYQTN;
741 from_right = (bool)(kma_flags & KMA_PERMANENT);
742 } else if (type_hash) {
743 range_id = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
744 from_right = type_hash & KMEM_DIRECTION_MASK;
745 } else {
746 /*
747 * Range id needs to correspond to one of the PTR ranges
748 */
749 type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
750 range_id = kmem_adjust_range_id(type_hash);
751 from_right = type_hash & KMEM_DIRECTION_MASK;
752 }
753
754 vmk_flags->vmkf_range_id = range_id;
755 vmk_flags->vmkf_last_free = from_right;
756 }
757
758 #pragma mark allocation
759
760 static kmem_return_t
761 kmem_alloc_guard_internal(
762 vm_map_t map,
763 vm_size_t size,
764 vm_offset_t mask,
765 kma_flags_t flags,
766 kmem_guard_t guard,
767 kern_return_t (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
768 {
769 vm_object_t object;
770 vm_offset_t delta = 0;
771 vm_map_entry_t entry = NULL;
772 vm_map_offset_t map_addr, fill_start;
773 vm_map_size_t map_size, fill_size;
774 vm_page_t guard_left = VM_PAGE_NULL;
775 vm_page_t guard_right = VM_PAGE_NULL;
776 vm_page_t wired_page_list = VM_PAGE_NULL;
777 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
778 bool skip_guards;
779 kmem_return_t kmr = { };
780
781 assert(kernel_map && map->pmap == kernel_pmap);
782
783 #if DEBUG || DEVELOPMENT
784 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
785 size, 0, 0, 0);
786 #endif
787
788
789 if (size == 0 ||
790 (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
791 (size < __kmem_guard_size(ANYF(flags)))) {
792 __kmem_invalid_size_panic(map, size, flags);
793 }
794
795 /*
796 * limit the size of a single extent of wired memory
797 * to try and limit the damage to the system if
798 * too many pages get wired down
799 * limit raised to 2GB with 128GB max physical limit,
800 * but scaled by installed memory above this
801 *
802 * Note: kmem_alloc_contig_guard() is immune to this check.
803 */
804 if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
805 alloc_pages == NULL &&
806 size > MAX(1ULL << 31, sane_size / 64))) {
807 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
808 goto out_error;
809 }
810
811 /*
812 * Guard pages:
813 *
814 * Guard pages are implemented as fictitious pages.
815 *
816 * However, some maps, and some objects are known
817 * to manage their memory explicitly, and do not need
818 * those to be materialized, which saves memory.
819 *
820 * By placing guard pages on either end of a stack,
821 * they can help detect cases where a thread walks
822 * off either end of its stack.
823 *
824 * They are allocated and set up here and attempts
825 * to access those pages are trapped in vm_fault_page().
826 *
827 * The map_size we were passed may include extra space for
828 * guard pages. fill_size represents the actual size to populate.
829 * Similarly, fill_start indicates where the actual pages
830 * will begin in the range.
831 */
832
833 map_size = round_page(size);
834 fill_start = 0;
835 fill_size = map_size - __kmem_guard_size(ANYF(flags));
836
837 #if KASAN_CLASSIC
838 if (flags & KMA_KASAN_GUARD) {
839 assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
840 flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
841 delta = ptoa(2);
842 map_size += delta;
843 }
844 #else
845 (void)delta;
846 #endif /* KASAN_CLASSIC */
847
848 skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
849 map->never_faults;
850
851 if (flags & KMA_GUARD_FIRST) {
852 vmk_flags.vmkf_guard_before = true;
853 fill_start += PAGE_SIZE;
854 }
855 if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
856 guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
857 if (__improbable(guard_left == VM_PAGE_NULL)) {
858 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
859 goto out_error;
860 }
861 }
862 if ((flags & KMA_GUARD_LAST) && !skip_guards) {
863 guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
864 if (__improbable(guard_right == VM_PAGE_NULL)) {
865 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
866 goto out_error;
867 }
868 }
869
870 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
871 if (alloc_pages) {
872 kmr.kmr_return = alloc_pages(fill_size, flags,
873 &wired_page_list);
874 } else {
875 kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
876 &wired_page_list);
877 }
878 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
879 goto out_error;
880 }
881 }
882
883 /*
884 * Allocate a new object (if necessary). We must do this before
885 * locking the map, or risk deadlock with the default pager.
886 */
887 if (flags & KMA_KOBJECT) {
888 {
889 object = kernel_object_default;
890 }
891 vm_object_reference(object);
892 } else if (flags & KMA_COMPRESSOR) {
893 object = compressor_object;
894 vm_object_reference(object);
895 } else {
896 object = vm_object_allocate(map_size);
897 vm_object_lock(object);
898 vm_object_set_size(object, map_size, size);
899 /* stabilize the object to prevent shadowing */
900 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
901 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
902 vm_object_unlock(object);
903 }
904
905 if (flags & KMA_LAST_FREE) {
906 vmk_flags.vmkf_last_free = true;
907 }
908 if (flags & KMA_PERMANENT) {
909 vmk_flags.vmf_permanent = true;
910 }
911 kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
912 false);
913
914 kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
915 vmk_flags, &entry);
916 if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
917 vm_object_deallocate(object);
918 goto out_error;
919 }
920
921 map_addr = entry->vme_start;
922 VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
923 VME_ALIAS_SET(entry, guard.kmg_tag);
924 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
925 VME_OFFSET_SET(entry, map_addr);
926 }
927
928 #if KASAN
929 if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
930 entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
931 }
932 #endif /* KASAN */
933
934 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
935 entry->wired_count = 1;
936 vme_btref_consider_and_set(entry, __builtin_frame_address(0));
937 }
938
939 if (guard_left || guard_right || wired_page_list) {
940 vm_object_offset_t offset = 0ull;
941
942 vm_object_lock(object);
943 vm_map_unlock(map);
944
945 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
946 offset = map_addr;
947 }
948
949 if (guard_left) {
950 vm_page_insert(guard_left, object, offset);
951 guard_left->vmp_busy = FALSE;
952 guard_left = VM_PAGE_NULL;
953 }
954
955 if (guard_right) {
956 vm_page_insert(guard_right, object,
957 offset + fill_start + fill_size);
958 guard_right->vmp_busy = FALSE;
959 guard_right = VM_PAGE_NULL;
960 }
961
962 if (wired_page_list) {
963 kernel_memory_populate_object_and_unlock(object,
964 map_addr + fill_start, offset + fill_start, fill_size,
965 wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
966 __kmem_mapping_type(ANYF(flags)));
967 } else {
968 vm_object_unlock(object);
969 }
970 } else {
971 vm_map_unlock(map);
972 }
973
974 /*
975 * now that the pages are wired, we no longer have to fear coalesce
976 */
977 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
978 vm_map_simplify(map, map_addr);
979 }
980
981 #if DEBUG || DEVELOPMENT
982 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
983 atop(fill_size), 0, 0, 0);
984 #endif /* DEBUG || DEVELOPMENT */
985 kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
986
987 #if KASAN
988 if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
989 /*
990 * We need to allow the range for pageable memory,
991 * or faulting will not be allowed.
992 */
993 kasan_notify_address(map_addr, map_size);
994 }
995 #endif /* KASAN */
996 #if KASAN_CLASSIC
997 if (flags & KMA_KASAN_GUARD) {
998 kmr.kmr_address += PAGE_SIZE;
999 kasan_alloc_large(kmr.kmr_address, size);
1000 }
1001 #endif /* KASAN_CLASSIC */
1002 #if CONFIG_KERNEL_TAGGING
1003 if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
1004 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size);
1005 vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size);
1006 #if KASAN_TBI
1007 kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size);
1008 #endif /* KASAN_TBI */
1009 }
1010 #endif /* CONFIG_KERNEL_TAGGING */
1011 return kmr;
1012
1013 out_error:
1014 if (flags & KMA_NOFAIL) {
1015 __kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
1016 }
1017 if (guard_left) {
1018 guard_left->vmp_snext = wired_page_list;
1019 wired_page_list = guard_left;
1020 }
1021 if (guard_right) {
1022 guard_right->vmp_snext = wired_page_list;
1023 wired_page_list = guard_right;
1024 }
1025 if (wired_page_list) {
1026 vm_page_free_list(wired_page_list, FALSE);
1027 }
1028
1029 #if DEBUG || DEVELOPMENT
1030 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1031 0, 0, 0, 0);
1032 #endif /* DEBUG || DEVELOPMENT */
1033
1034 return kmr;
1035 }
1036
1037 kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)1038 kmem_alloc_guard(
1039 vm_map_t map,
1040 vm_size_t size,
1041 vm_offset_t mask,
1042 kma_flags_t flags,
1043 kmem_guard_t guard)
1044 {
1045 return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1046 }
1047
1048 kmem_return_t
kmem_alloc_contig_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,kmem_guard_t guard)1049 kmem_alloc_contig_guard(
1050 vm_map_t map,
1051 vm_size_t size,
1052 vm_offset_t mask,
1053 ppnum_t max_pnum,
1054 ppnum_t pnum_mask,
1055 kma_flags_t flags,
1056 kmem_guard_t guard)
1057 {
1058 __auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1059 return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
1060 };
1061
1062 return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1063 }
1064
1065 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1066 kmem_suballoc(
1067 vm_map_t parent,
1068 mach_vm_offset_t *addr,
1069 vm_size_t size,
1070 vm_map_create_options_t vmc_options,
1071 int vm_flags,
1072 kms_flags_t flags,
1073 vm_tag_t tag)
1074 {
1075 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1076 vm_map_offset_t map_addr = 0;
1077 kmem_return_t kmr = { };
1078 vm_map_t map;
1079
1080 assert(page_aligned(size));
1081 assert(parent->pmap == kernel_pmap);
1082
1083 vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
1084
1085 if (parent == kernel_map) {
1086 assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
1087 }
1088
1089 if (vmk_flags.vmf_fixed) {
1090 map_addr = trunc_page(*addr);
1091 }
1092
1093 pmap_reference(vm_map_pmap(parent));
1094 map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1095
1096 /*
1097 * 1. vm_map_enter() will consume one ref on success.
1098 *
1099 * 2. make the entry atomic as kernel submaps should never be split.
1100 *
1101 * 3. instruct vm_map_enter() that it is a fresh submap
1102 * that needs to be taught its bounds as it inserted.
1103 */
1104 vm_map_reference(map);
1105
1106 vmk_flags.vmkf_submap = true;
1107 if ((flags & KMS_DATA) == 0) {
1108 /* FIXME: IOKit submaps get fragmented and can't be atomic */
1109 vmk_flags.vmkf_submap_atomic = true;
1110 }
1111 vmk_flags.vmkf_submap_adjust = true;
1112 if (flags & KMS_LAST_FREE) {
1113 vmk_flags.vmkf_last_free = true;
1114 }
1115 if (flags & KMS_PERMANENT) {
1116 vmk_flags.vmf_permanent = true;
1117 }
1118 if (flags & KMS_DATA) {
1119 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1120 }
1121
1122 kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1123 vmk_flags, (vm_object_t)map, 0, FALSE,
1124 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1125
1126 if (kmr.kmr_return != KERN_SUCCESS) {
1127 if (flags & KMS_NOFAIL) {
1128 panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1129 parent, size, kmr.kmr_return);
1130 }
1131 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1132 vm_map_deallocate(map);
1133 vm_map_deallocate(map); /* also removes ref to pmap */
1134 return kmr;
1135 }
1136
1137 /*
1138 * For kmem_suballocs that register a claim and are assigned a range, ensure
1139 * that the exact same range is returned.
1140 */
1141 if (*addr != 0 && parent == kernel_map &&
1142 startup_phase > STARTUP_SUB_KMEM) {
1143 assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1144 } else {
1145 *addr = map_addr;
1146 }
1147
1148 kmr.kmr_submap = map;
1149 return kmr;
1150 }
1151
1152 /*
1153 * kmem_alloc:
1154 *
1155 * Allocate wired-down memory in the kernel's address map
1156 * or a submap. The memory is not zero-filled.
1157 */
1158
1159 __exported kern_return_t
1160 kmem_alloc_external(
1161 vm_map_t map,
1162 vm_offset_t *addrp,
1163 vm_size_t size);
1164 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1165 kmem_alloc_external(
1166 vm_map_t map,
1167 vm_offset_t *addrp,
1168 vm_size_t size)
1169 {
1170 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1171 return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1172 }
1173 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1174 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1175 }
1176
1177
1178 /*
1179 * kmem_alloc_kobject:
1180 *
1181 * Allocate wired-down memory in the kernel's address map
1182 * or a submap. The memory is not zero-filled.
1183 *
1184 * The memory is allocated in the kernel_object.
1185 * It may not be copied with vm_map_copy, and
1186 * it may not be reallocated with kmem_realloc.
1187 */
1188
1189 __exported kern_return_t
1190 kmem_alloc_kobject_external(
1191 vm_map_t map,
1192 vm_offset_t *addrp,
1193 vm_size_t size);
1194 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1195 kmem_alloc_kobject_external(
1196 vm_map_t map,
1197 vm_offset_t *addrp,
1198 vm_size_t size)
1199 {
1200 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1201 return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1202 }
1203 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1204 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1205 }
1206
1207 /*
1208 * kmem_alloc_pageable:
1209 *
1210 * Allocate pageable memory in the kernel's address map.
1211 */
1212
1213 __exported kern_return_t
1214 kmem_alloc_pageable_external(
1215 vm_map_t map,
1216 vm_offset_t *addrp,
1217 vm_size_t size);
1218 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1219 kmem_alloc_pageable_external(
1220 vm_map_t map,
1221 vm_offset_t *addrp,
1222 vm_size_t size)
1223 {
1224 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1225 return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
1226 }
1227 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1228 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1229 }
1230
1231 static __attribute__((always_inline, warn_unused_result))
1232 kern_return_t
mach_vm_allocate_kernel_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * map_addr,vm_map_size_t * map_size)1233 mach_vm_allocate_kernel_sanitize(
1234 vm_map_t map,
1235 mach_vm_offset_ut addr_u,
1236 mach_vm_size_ut size_u,
1237 vm_map_kernel_flags_t vmk_flags,
1238 vm_map_offset_t *map_addr,
1239 vm_map_size_t *map_size)
1240 {
1241 kern_return_t result;
1242 vm_map_offset_t map_end;
1243
1244 if (vmk_flags.vmf_fixed) {
1245 result = vm_sanitize_addr_size(addr_u, size_u,
1246 VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
1247 map,
1248 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
1249 map_addr, &map_end, map_size);
1250 if (__improbable(result != KERN_SUCCESS)) {
1251 return result;
1252 }
1253 } else {
1254 *map_addr = 0;
1255 result = vm_sanitize_size(0, size_u,
1256 VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
1257 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
1258 map_size);
1259 if (__improbable(result != KERN_SUCCESS)) {
1260 return result;
1261 }
1262 }
1263
1264 return KERN_SUCCESS;
1265 }
1266
1267 kern_return_t
mach_vm_allocate_kernel(vm_map_t map,mach_vm_offset_ut * addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags)1268 mach_vm_allocate_kernel(
1269 vm_map_t map,
1270 mach_vm_offset_ut *addr_u,
1271 mach_vm_size_ut size_u,
1272 vm_map_kernel_flags_t vmk_flags)
1273 {
1274 vm_map_offset_t map_addr;
1275 vm_map_size_t map_size;
1276 kern_return_t result;
1277
1278 if (map == VM_MAP_NULL) {
1279 ktriage_record(thread_tid(current_thread()),
1280 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1281 KDBG_TRIAGE_RESERVED,
1282 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
1283 KERN_INVALID_ARGUMENT /* arg */);
1284 return KERN_INVALID_ARGUMENT;
1285 }
1286
1287 if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
1288 VM_FLAGS_USER_ALLOCATE)) {
1289 return KERN_INVALID_ARGUMENT;
1290 }
1291
1292 result = mach_vm_allocate_kernel_sanitize(map,
1293 *addr_u,
1294 size_u,
1295 vmk_flags,
1296 &map_addr,
1297 &map_size);
1298 if (__improbable(result != KERN_SUCCESS)) {
1299 result = vm_sanitize_get_kr(result);
1300 if (result == KERN_SUCCESS) {
1301 *addr_u = vm_sanitize_wrap_addr(0);
1302 } else {
1303 ktriage_record(thread_tid(current_thread()),
1304 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1305 KDBG_TRIAGE_RESERVED,
1306 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
1307 KERN_INVALID_ARGUMENT /* arg */);
1308 }
1309 return result;
1310 }
1311
1312 vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
1313
1314 result = vm_map_enter(
1315 map,
1316 &map_addr,
1317 map_size,
1318 (vm_map_offset_t)0,
1319 vmk_flags,
1320 VM_OBJECT_NULL,
1321 (vm_object_offset_t)0,
1322 FALSE,
1323 VM_PROT_DEFAULT,
1324 VM_PROT_ALL,
1325 VM_INHERIT_DEFAULT);
1326
1327 if (result == KERN_SUCCESS) {
1328 #if KASAN
1329 if (map->pmap == kernel_pmap) {
1330 kasan_notify_address(map_addr, map_size);
1331 }
1332 #endif
1333 *addr_u = vm_sanitize_wrap_addr(map_addr);
1334 } else {
1335 ktriage_record(thread_tid(current_thread()),
1336 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1337 KDBG_TRIAGE_RESERVED,
1338 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
1339 result /* arg */);
1340 }
1341 return result;
1342 }
1343
1344 #pragma mark population
1345
1346 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags,pmap_mapping_type_t mapping_type)1347 kernel_memory_populate_pmap_enter(
1348 vm_object_t object,
1349 vm_address_t addr,
1350 vm_object_offset_t offset,
1351 vm_page_t mem,
1352 vm_prot_t prot,
1353 int pe_flags,
1354 pmap_mapping_type_t mapping_type)
1355 {
1356 kern_return_t pe_result;
1357 int pe_options;
1358
1359 if (VMP_ERROR_GET(mem)) {
1360 panic("VM page %p should not have an error", mem);
1361 }
1362
1363 pe_options = PMAP_OPTIONS_NOWAIT;
1364 if (object->internal) {
1365 pe_options |= PMAP_OPTIONS_INTERNAL;
1366 }
1367 if (mem->vmp_reusable || object->all_reusable) {
1368 pe_options |= PMAP_OPTIONS_REUSABLE;
1369 }
1370
1371 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1372 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1373 pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1374
1375 if (pe_result == KERN_RESOURCE_SHORTAGE) {
1376 vm_object_unlock(object);
1377
1378 pe_options &= ~PMAP_OPTIONS_NOWAIT;
1379
1380 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1381 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1382 pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1383
1384 vm_object_lock(object);
1385 }
1386
1387 assert(pe_result == KERN_SUCCESS);
1388 }
1389
1390 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot,pmap_mapping_type_t mapping_type)1391 kernel_memory_populate_object_and_unlock(
1392 vm_object_t object, /* must be locked */
1393 vm_address_t addr,
1394 vm_offset_t offset,
1395 vm_size_t size,
1396 vm_page_t page_list,
1397 kma_flags_t flags,
1398 vm_tag_t tag,
1399 vm_prot_t prot,
1400 pmap_mapping_type_t mapping_type)
1401 {
1402 vm_page_t mem;
1403 int pe_flags;
1404 bool gobbled_list = page_list && page_list->vmp_gobbled;
1405
1406 assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1407 assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1408
1409
1410 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1411 assert3u(offset, ==, addr);
1412 } else {
1413 /*
1414 * kernel_memory_populate_pmap_enter() might drop the object
1415 * lock, and the caller might not own a reference anymore
1416 * and rely on holding the vm object lock for liveness.
1417 */
1418 vm_object_reference_locked(object);
1419 }
1420
1421 if (flags & KMA_KSTACK) {
1422 pe_flags = VM_MEM_STACK;
1423 } else {
1424 pe_flags = 0;
1425 }
1426
1427
1428 for (vm_object_offset_t pg_offset = 0;
1429 pg_offset < size;
1430 pg_offset += PAGE_SIZE_64) {
1431 if (page_list == NULL) {
1432 panic("%s: page_list too short", __func__);
1433 }
1434
1435 mem = page_list;
1436 page_list = mem->vmp_snext;
1437 mem->vmp_snext = NULL;
1438
1439 assert(mem->vmp_wire_count == 0);
1440 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1441 assert(!mem->vmp_fictitious && !mem->vmp_private);
1442
1443 if (flags & KMA_COMPRESSOR) {
1444 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1445 /*
1446 * Background processes doing I/O accounting can call
1447 * into NVME driver to do some work which results in
1448 * an allocation here and so we want to make sure
1449 * that the pages used by compressor, regardless of
1450 * process context, are never on the special Q.
1451 */
1452 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1453
1454 vm_page_insert(mem, object, offset + pg_offset);
1455 } else {
1456 mem->vmp_q_state = VM_PAGE_IS_WIRED;
1457 mem->vmp_wire_count = 1;
1458
1459 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1460 }
1461
1462 mem->vmp_gobbled = false;
1463 mem->vmp_busy = false;
1464 mem->vmp_pmapped = true;
1465 mem->vmp_wpmapped = true;
1466
1467 /*
1468 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1469 * for the kernel and compressor objects.
1470 */
1471 kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1472 mem, prot, pe_flags, mapping_type);
1473
1474 if (flags & KMA_NOENCRYPT) {
1475 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1476 }
1477 }
1478
1479 if (page_list) {
1480 panic("%s: page_list too long", __func__);
1481 }
1482
1483 vm_object_unlock(object);
1484 if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1485 vm_object_deallocate(object);
1486 }
1487
1488 /*
1489 * Update the accounting:
1490 * - the compressor "wired" pages don't really count as wired
1491 * - kmem_alloc_contig_guard() gives gobbled pages,
1492 * which already count as wired but need to be ungobbled.
1493 */
1494 if (gobbled_list) {
1495 vm_page_lockspin_queues();
1496 if (flags & KMA_COMPRESSOR) {
1497 vm_page_wire_count -= atop(size);
1498 }
1499 vm_page_gobble_count -= atop(size);
1500 vm_page_unlock_queues();
1501 } else if ((flags & KMA_COMPRESSOR) == 0) {
1502 vm_page_lockspin_queues();
1503 vm_page_wire_count += atop(size);
1504 vm_page_unlock_queues();
1505 }
1506
1507 if (flags & KMA_KOBJECT) {
1508 /* vm_page_insert_wired() handles regular objects already */
1509 vm_tag_update_size(tag, size, NULL);
1510 }
1511
1512 #if KASAN
1513 if (flags & KMA_COMPRESSOR) {
1514 kasan_notify_address_nopoison(addr, size);
1515 } else {
1516 kasan_notify_address(addr, size);
1517 }
1518 #endif /* KASAN */
1519 }
1520
1521
1522 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1523 kernel_memory_populate(
1524 vm_offset_t addr,
1525 vm_size_t size,
1526 kma_flags_t flags,
1527 vm_tag_t tag)
1528 {
1529 kern_return_t kr = KERN_SUCCESS;
1530 vm_page_t page_list = NULL;
1531 vm_size_t page_count = atop_64(size);
1532 vm_object_t object = __kmem_object(ANYF(flags));
1533
1534 #if DEBUG || DEVELOPMENT
1535 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1536 size, 0, 0, 0);
1537 #endif /* DEBUG || DEVELOPMENT */
1538
1539
1540 kr = vm_page_alloc_list(page_count, flags, &page_list);
1541 if (kr == KERN_SUCCESS) {
1542 vm_object_lock(object);
1543 kernel_memory_populate_object_and_unlock(object, addr,
1544 addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1545 __kmem_mapping_type(ANYF(flags)));
1546 }
1547
1548 #if DEBUG || DEVELOPMENT
1549 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1550 page_count, 0, 0, 0);
1551 #endif /* DEBUG || DEVELOPMENT */
1552 return kr;
1553 }
1554
1555 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1556 kernel_memory_depopulate(
1557 vm_offset_t addr,
1558 vm_size_t size,
1559 kma_flags_t flags,
1560 vm_tag_t tag)
1561 {
1562 vm_object_t object = __kmem_object(ANYF(flags));
1563 vm_object_offset_t offset = addr;
1564 vm_page_t mem;
1565 vm_page_t local_freeq = NULL;
1566 unsigned int pages_unwired = 0;
1567
1568 vm_object_lock(object);
1569
1570 pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1571
1572 for (vm_object_offset_t pg_offset = 0;
1573 pg_offset < size;
1574 pg_offset += PAGE_SIZE_64) {
1575 mem = vm_page_lookup(object, offset + pg_offset);
1576
1577 assert(mem);
1578
1579 if (flags & KMA_COMPRESSOR) {
1580 assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1581 } else {
1582 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1583 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1584 pages_unwired++;
1585 }
1586
1587 mem->vmp_busy = TRUE;
1588
1589 assert(mem->vmp_tabled);
1590 vm_page_remove(mem, TRUE);
1591 assert(mem->vmp_busy);
1592
1593 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1594
1595 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1596 mem->vmp_snext = local_freeq;
1597 local_freeq = mem;
1598 }
1599
1600 vm_object_unlock(object);
1601
1602 vm_page_free_list(local_freeq, TRUE);
1603
1604 if (!(flags & KMA_COMPRESSOR)) {
1605 vm_page_lockspin_queues();
1606 vm_page_wire_count -= pages_unwired;
1607 vm_page_unlock_queues();
1608 }
1609
1610 if (flags & KMA_KOBJECT) {
1611 /* vm_page_remove() handles regular objects already */
1612 vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
1613 }
1614 }
1615
1616 #pragma mark reallocation
1617
1618 __abortlike
1619 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1620 __kmem_realloc_invalid_object_size_panic(
1621 vm_map_t map,
1622 vm_address_t address,
1623 vm_size_t size,
1624 vm_map_entry_t entry)
1625 {
1626 vm_object_t object = VME_OBJECT(entry);
1627 vm_size_t objsize = __kmem_entry_orig_size(entry);
1628
1629 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1630 "object %p has unexpected size %ld",
1631 map, (void *)address, (size_t)size, entry, object, objsize);
1632 }
1633
1634 __abortlike
1635 static void
__kmem_realloc_invalid_pager_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1636 __kmem_realloc_invalid_pager_panic(
1637 vm_map_t map,
1638 vm_address_t address,
1639 vm_size_t size,
1640 vm_map_entry_t entry)
1641 {
1642 vm_object_t object = VME_OBJECT(entry);
1643 memory_object_t pager = object->pager;
1644 bool pager_created = object->pager_created;
1645 bool pager_initialized = object->pager_initialized;
1646 bool pager_ready = object->pager_ready;
1647
1648 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1649 "object %p has unexpected pager %p (%d,%d,%d)",
1650 map, (void *)address, (size_t)size, entry, object,
1651 pager, pager_created, pager_initialized, pager_ready);
1652 }
1653
1654 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1655 kmem_realloc_shrink_guard(
1656 vm_map_t map,
1657 vm_offset_t req_oldaddr,
1658 vm_size_t req_oldsize,
1659 vm_size_t req_newsize,
1660 kmr_flags_t flags,
1661 kmem_guard_t guard,
1662 vm_map_entry_t entry)
1663 {
1664 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1665 vm_object_t object;
1666 vm_offset_t delta = 0;
1667 kmem_return_t kmr;
1668 bool was_atomic;
1669 vm_size_t oldsize = round_page(req_oldsize);
1670 vm_size_t newsize = round_page(req_newsize);
1671 vm_address_t oldaddr = req_oldaddr;
1672
1673 #if KASAN_CLASSIC
1674 if (flags & KMR_KASAN_GUARD) {
1675 assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1676 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1677 oldaddr -= PAGE_SIZE;
1678 delta = ptoa(2);
1679 oldsize += delta;
1680 newsize += delta;
1681 }
1682 #endif /* KASAN_CLASSIC */
1683
1684 if (flags & KMR_TAG) {
1685 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1686 }
1687
1688 vm_map_lock_assert_exclusive(map);
1689
1690 if ((flags & KMR_KOBJECT) == 0) {
1691 object = VME_OBJECT(entry);
1692 vm_object_reference(object);
1693 }
1694
1695 /*
1696 * Shrinking an atomic entry starts with splitting it,
1697 * and removing the second half.
1698 */
1699 was_atomic = entry->vme_atomic;
1700 entry->vme_atomic = false;
1701 vm_map_clip_end(map, entry, entry->vme_start + newsize);
1702 entry->vme_atomic = was_atomic;
1703
1704 #if KASAN
1705 if (entry->vme_kernel_object && was_atomic) {
1706 entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1707 }
1708 #if KASAN_CLASSIC
1709 if (flags & KMR_KASAN_GUARD) {
1710 kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1711 ASAN_VALID);
1712 }
1713 #endif
1714 #if KASAN_TBI
1715 if (flags & KMR_TAG) {
1716 kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize);
1717 }
1718 #endif /* KASAN_TBI */
1719 #endif /* KASAN */
1720 (void)vm_map_remove_and_unlock(map,
1721 oldaddr + newsize, oldaddr + oldsize,
1722 vmr_flags, KMEM_GUARD_NONE);
1723
1724
1725 /*
1726 * Lastly, if there are guard pages, deal with them.
1727 *
1728 * The kernel object just needs to depopulate,
1729 * regular objects require freeing the last page
1730 * and replacing it with a guard.
1731 */
1732 if (flags & KMR_KOBJECT) {
1733 if (flags & KMR_GUARD_LAST) {
1734 kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1735 PAGE_SIZE, KMA_KOBJECT, guard.kmg_tag);
1736 }
1737 } else {
1738 vm_page_t guard_right = VM_PAGE_NULL;
1739 vm_offset_t remove_start = newsize;
1740
1741 if (flags & KMR_GUARD_LAST) {
1742 if (!map->never_faults) {
1743 guard_right = vm_page_grab_guard(true);
1744 }
1745 remove_start -= PAGE_SIZE;
1746 }
1747
1748 vm_object_lock(object);
1749
1750 if (object->vo_size != oldsize) {
1751 __kmem_realloc_invalid_object_size_panic(map,
1752 req_oldaddr, req_oldsize + delta, entry);
1753 }
1754 vm_object_set_size(object, newsize, req_newsize);
1755
1756 vm_object_page_remove(object, remove_start, oldsize);
1757
1758 if (guard_right) {
1759 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1760 guard_right->vmp_busy = false;
1761 }
1762 vm_object_unlock(object);
1763 vm_object_deallocate(object);
1764 }
1765
1766 kmr.kmr_address = req_oldaddr;
1767 kmr.kmr_return = 0;
1768 #if KASAN_CLASSIC
1769 if (flags & KMA_KASAN_GUARD) {
1770 kasan_alloc_large(kmr.kmr_address, req_newsize);
1771 }
1772 #endif /* KASAN_CLASSIC */
1773 #if KASAN_TBI
1774 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1775 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1776 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1777 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1778 }
1779 #endif /* KASAN_TBI */
1780
1781 return kmr;
1782 }
1783
1784 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard)1785 kmem_realloc_guard(
1786 vm_map_t map,
1787 vm_offset_t req_oldaddr,
1788 vm_size_t req_oldsize,
1789 vm_size_t req_newsize,
1790 kmr_flags_t flags,
1791 kmem_guard_t guard)
1792 {
1793 vm_object_t object;
1794 vm_size_t oldsize;
1795 vm_size_t newsize;
1796 vm_offset_t delta = 0;
1797 vm_map_offset_t oldaddr;
1798 vm_map_offset_t newaddr;
1799 vm_object_offset_t newoffs;
1800 vm_map_entry_t oldentry;
1801 vm_map_entry_t newentry;
1802 vm_page_t page_list = NULL;
1803 bool needs_wakeup = false;
1804 kmem_return_t kmr = { };
1805 unsigned int last_timestamp;
1806 vm_map_kernel_flags_t vmk_flags = {
1807 .vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1808 };
1809
1810 assert(KMEM_REALLOC_FLAGS_VALID(flags));
1811 if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
1812 __kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1813 req_oldsize, flags);
1814 }
1815
1816 if (req_oldaddr == 0ul) {
1817 return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
1818 }
1819
1820 if (req_newsize == 0ul) {
1821 kmem_free_guard(map, req_oldaddr, req_oldsize,
1822 (kmf_flags_t)flags, guard);
1823 return kmr;
1824 }
1825
1826 if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1827 __kmem_invalid_size_panic(map, req_newsize, flags);
1828 }
1829 if (req_newsize < __kmem_guard_size(ANYF(flags))) {
1830 __kmem_invalid_size_panic(map, req_newsize, flags);
1831 }
1832
1833 oldsize = round_page(req_oldsize);
1834 newsize = round_page(req_newsize);
1835 oldaddr = req_oldaddr;
1836 #if KASAN_CLASSIC
1837 if (flags & KMR_KASAN_GUARD) {
1838 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1839 oldaddr -= PAGE_SIZE;
1840 delta = ptoa(2);
1841 oldsize += delta;
1842 newsize += delta;
1843 }
1844 #endif /* KASAN_CLASSIC */
1845 #if CONFIG_KERNEL_TAGGING
1846 if (flags & KMR_TAG) {
1847 vm_memtag_verify_tag(req_oldaddr);
1848 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1849 }
1850 #endif /* CONFIG_KERNEL_TAGGING */
1851
1852 #if !KASAN
1853 /*
1854 * If not on a KASAN variant and no difference in requested size,
1855 * just return.
1856 *
1857 * Otherwise we want to validate the size and re-tag for KASAN_TBI.
1858 */
1859 if (oldsize == newsize) {
1860 kmr.kmr_address = req_oldaddr;
1861 return kmr;
1862 }
1863 #endif /* !KASAN */
1864
1865 /*
1866 * If we're growing the allocation,
1867 * then reserve the pages we'll need,
1868 * and find a spot for its new place.
1869 */
1870 if (oldsize < newsize) {
1871 #if DEBUG || DEVELOPMENT
1872 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1873 DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1874 newsize - oldsize, 0, 0, 0);
1875 #endif /* DEBUG || DEVELOPMENT */
1876 kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1877 (kma_flags_t)flags, &page_list);
1878 if (kmr.kmr_return == KERN_SUCCESS) {
1879 kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1880 newsize, 0, &vmk_flags, true);
1881 kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1882 vmk_flags, &newentry);
1883 }
1884 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1885 if (flags & KMR_REALLOCF) {
1886 kmem_free_guard(map, req_oldaddr, req_oldsize,
1887 KMF_NONE, guard);
1888 }
1889 if (page_list) {
1890 vm_page_free_list(page_list, FALSE);
1891 }
1892 #if DEBUG || DEVELOPMENT
1893 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1894 DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1895 0, 0, 0, 0);
1896 #endif /* DEBUG || DEVELOPMENT */
1897 return kmr;
1898 }
1899
1900 /* map is locked */
1901 } else {
1902 vm_map_lock(map);
1903 }
1904
1905
1906 /*
1907 * Locate the entry:
1908 * - wait for it to quiesce.
1909 * - validate its guard,
1910 * - learn its correct tag,
1911 */
1912 again:
1913 if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1914 __kmem_entry_not_found_panic(map, req_oldaddr);
1915 }
1916 if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1917 oldentry->needs_wakeup = true;
1918 vm_map_entry_wait(map, THREAD_UNINT);
1919 goto again;
1920 }
1921 kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1922 if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1923 __kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1924 }
1925 /*
1926 * TODO: We should validate for non atomic entries that the range
1927 * we are acting on is what we expect here.
1928 */
1929 #if KASAN
1930 if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1931 __kmem_realloc_invalid_object_size_panic(map,
1932 req_oldaddr, req_oldsize + delta, oldentry);
1933 }
1934
1935 if (oldsize == newsize) {
1936 kmr.kmr_address = req_oldaddr;
1937 if (oldentry->vme_kernel_object) {
1938 oldentry->vme_object_or_delta = delta +
1939 (-req_newsize & PAGE_MASK);
1940 } else {
1941 object = VME_OBJECT(oldentry);
1942 vm_object_lock(object);
1943 vm_object_set_size(object, newsize, req_newsize);
1944 vm_object_unlock(object);
1945 }
1946 vm_map_unlock(map);
1947
1948 #if KASAN_CLASSIC
1949 if (flags & KMA_KASAN_GUARD) {
1950 kasan_alloc_large(kmr.kmr_address, req_newsize);
1951 }
1952 #endif /* KASAN_CLASSIC */
1953 #if KASAN_TBI
1954 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1955 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1956 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1957 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1958 }
1959 #endif /* KASAN_TBI */
1960 return kmr;
1961 }
1962 #endif /* KASAN */
1963
1964 guard.kmg_tag = VME_ALIAS(oldentry);
1965
1966 if (newsize < oldsize) {
1967 return kmem_realloc_shrink_guard(map, req_oldaddr,
1968 req_oldsize, req_newsize, flags, guard, oldentry);
1969 }
1970
1971
1972 /*
1973 * We are growing the entry
1974 *
1975 * For regular objects we use the object `vo_size` updates
1976 * as a guarantee that no 2 kmem_realloc() can happen
1977 * concurrently (by doing it before the map is unlocked.
1978 *
1979 * For the kernel object, prevent the entry from being
1980 * reallocated or changed by marking it "in_transition".
1981 */
1982
1983 object = VME_OBJECT(oldentry);
1984 vm_object_lock(object);
1985 vm_object_reference_locked(object);
1986
1987 newaddr = newentry->vme_start;
1988 newoffs = oldsize;
1989
1990 VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
1991 VME_ALIAS_SET(newentry, guard.kmg_tag);
1992 if (flags & KMR_KOBJECT) {
1993 oldentry->in_transition = true;
1994 VME_OFFSET_SET(newentry, newaddr);
1995 newentry->wired_count = 1;
1996 vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
1997 newoffs = newaddr + oldsize;
1998 #if KASAN
1999 newentry->vme_object_or_delta = delta +
2000 (-req_newsize & PAGE_MASK);
2001 #endif /* KASAN */
2002 } else {
2003 if (object->pager_created || object->pager) {
2004 /*
2005 * We can't "realloc/grow" the pager, so pageable
2006 * allocations should not go through this path.
2007 */
2008 __kmem_realloc_invalid_pager_panic(map,
2009 req_oldaddr, req_oldsize + delta, oldentry);
2010 }
2011 if (object->vo_size != oldsize) {
2012 __kmem_realloc_invalid_object_size_panic(map,
2013 req_oldaddr, req_oldsize + delta, oldentry);
2014 }
2015 vm_object_set_size(object, newsize, req_newsize);
2016 }
2017
2018 last_timestamp = map->timestamp;
2019 vm_map_unlock(map);
2020
2021
2022 /*
2023 * Now proceed with the population of pages.
2024 *
2025 * Kernel objects can use the kmem population helpers.
2026 *
2027 * Regular objects will insert pages manually,
2028 * then wire the memory into the new range.
2029 */
2030
2031 vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
2032
2033 if (flags & KMR_KOBJECT) {
2034 pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
2035
2036 pmap_protect(kernel_pmap,
2037 oldaddr, oldaddr + oldsize - guard_right_size,
2038 VM_PROT_NONE);
2039
2040 for (vm_object_offset_t offset = 0;
2041 offset < oldsize - guard_right_size;
2042 offset += PAGE_SIZE_64) {
2043 vm_page_t mem;
2044
2045 mem = vm_page_lookup(object, oldaddr + offset);
2046 if (mem == VM_PAGE_NULL) {
2047 continue;
2048 }
2049
2050 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2051
2052 mem->vmp_busy = true;
2053 vm_page_remove(mem, true);
2054 vm_page_insert_wired(mem, object, newaddr + offset,
2055 guard.kmg_tag);
2056 mem->vmp_busy = false;
2057
2058 kernel_memory_populate_pmap_enter(object, newaddr,
2059 offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
2060 }
2061
2062 kernel_memory_populate_object_and_unlock(object,
2063 newaddr + oldsize - guard_right_size,
2064 newoffs - guard_right_size,
2065 newsize - oldsize,
2066 page_list, (kma_flags_t)flags,
2067 guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
2068 } else {
2069 vm_page_t guard_right = VM_PAGE_NULL;
2070
2071 /*
2072 * Note: we are borrowing the new entry reference
2073 * on the object for the duration of this code,
2074 * which works because we keep the object locked
2075 * throughout.
2076 */
2077 if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
2078 guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
2079 assert(guard_right->vmp_fictitious);
2080 guard_right->vmp_busy = true;
2081 vm_page_remove(guard_right, true);
2082 }
2083
2084 if (flags & KMR_FREEOLD) {
2085 /*
2086 * Freeing the old mapping will make
2087 * the old pages become pageable until
2088 * the new mapping makes them wired again.
2089 * Let's take an extra "wire_count" to
2090 * prevent any accidental "page out".
2091 * We'll have to undo that after wiring
2092 * the new mapping.
2093 */
2094 vm_object_reference_locked(object); /* keep object alive */
2095 for (vm_object_offset_t offset = 0;
2096 offset < oldsize - guard_right_size;
2097 offset += PAGE_SIZE_64) {
2098 vm_page_t mem;
2099
2100 mem = vm_page_lookup(object, offset);
2101 assert(mem != VM_PAGE_NULL);
2102 assertf(!VM_PAGE_PAGEABLE(mem),
2103 "mem %p qstate %d",
2104 mem, mem->vmp_q_state);
2105 if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
2106 /* guard pages are not wired */
2107 } else {
2108 assertf(VM_PAGE_WIRED(mem),
2109 "mem %p qstate %d wirecount %d",
2110 mem,
2111 mem->vmp_q_state,
2112 mem->vmp_wire_count);
2113 assertf(mem->vmp_wire_count >= 1,
2114 "mem %p wirecount %d",
2115 mem, mem->vmp_wire_count);
2116 mem->vmp_wire_count++;
2117 }
2118 }
2119 }
2120
2121 for (vm_object_offset_t offset = oldsize - guard_right_size;
2122 offset < newsize - guard_right_size;
2123 offset += PAGE_SIZE_64) {
2124 vm_page_t mem = page_list;
2125
2126 page_list = mem->vmp_snext;
2127 mem->vmp_snext = VM_PAGE_NULL;
2128 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2129 assert(!VM_PAGE_PAGEABLE(mem));
2130
2131 vm_page_insert(mem, object, offset);
2132 mem->vmp_busy = false;
2133 }
2134
2135 if (guard_right) {
2136 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
2137 guard_right->vmp_busy = false;
2138 }
2139
2140 vm_object_unlock(object);
2141 }
2142
2143 /*
2144 * Mark the entry as idle again,
2145 * and honor KMR_FREEOLD if needed.
2146 */
2147
2148 vm_map_lock(map);
2149 if (last_timestamp + 1 != map->timestamp &&
2150 !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
2151 __kmem_entry_not_found_panic(map, req_oldaddr);
2152 }
2153
2154 if (flags & KMR_KOBJECT) {
2155 assert(oldentry->in_transition);
2156 oldentry->in_transition = false;
2157 if (oldentry->needs_wakeup) {
2158 needs_wakeup = true;
2159 oldentry->needs_wakeup = false;
2160 }
2161 }
2162
2163 if (flags & KMR_FREEOLD) {
2164 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2165
2166 #if KASAN_CLASSIC
2167 if (flags & KMR_KASAN_GUARD) {
2168 kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2169 }
2170 #endif
2171 #if KASAN_TBI
2172 if (flags & KMR_TAG) {
2173 kasan_tbi_mark_free_space(req_oldaddr, oldsize);
2174 }
2175 #endif /* KASAN_TBI */
2176 if (flags & KMR_GUARD_LAST) {
2177 vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2178 }
2179 (void)vm_map_remove_and_unlock(map,
2180 oldaddr, oldaddr + oldsize,
2181 vmr_flags, guard);
2182 } else {
2183 vm_map_unlock(map);
2184 }
2185
2186 if ((flags & KMR_KOBJECT) == 0) {
2187 kern_return_t kr;
2188 /*
2189 * This must happen _after_ we do the KMR_FREEOLD,
2190 * because wiring the pages will call into the pmap,
2191 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2192 * this would cause a second mapping of the page and panic.
2193 */
2194 kr = vm_map_wire_kernel(map,
2195 vm_sanitize_wrap_addr(newaddr),
2196 vm_sanitize_wrap_addr(newaddr + newsize),
2197 vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
2198 guard.kmg_tag, FALSE);
2199 assert(kr == KERN_SUCCESS);
2200
2201 if (flags & KMR_FREEOLD) {
2202 /*
2203 * Undo the extra "wiring" we made above
2204 * and release the extra reference we took
2205 * on the object.
2206 */
2207 vm_object_lock(object);
2208 for (vm_object_offset_t offset = 0;
2209 offset < oldsize - guard_right_size;
2210 offset += PAGE_SIZE_64) {
2211 vm_page_t mem;
2212
2213 mem = vm_page_lookup(object, offset);
2214 assert(mem != VM_PAGE_NULL);
2215 assertf(!VM_PAGE_PAGEABLE(mem),
2216 "mem %p qstate %d",
2217 mem, mem->vmp_q_state);
2218 if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
2219 /* guard pages are not wired */
2220 } else {
2221 assertf(VM_PAGE_WIRED(mem),
2222 "mem %p qstate %d wirecount %d",
2223 mem,
2224 mem->vmp_q_state,
2225 mem->vmp_wire_count);
2226 assertf(mem->vmp_wire_count >= 2,
2227 "mem %p wirecount %d",
2228 mem, mem->vmp_wire_count);
2229 mem->vmp_wire_count--;
2230 assert(VM_PAGE_WIRED(mem));
2231 assert(mem->vmp_wire_count >= 1);
2232 }
2233 }
2234 vm_object_unlock(object);
2235 vm_object_deallocate(object); /* release extra ref */
2236 }
2237 }
2238
2239 if (needs_wakeup) {
2240 vm_map_entry_wakeup(map);
2241 }
2242
2243 #if DEBUG || DEVELOPMENT
2244 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
2245 atop(newsize - oldsize), 0, 0, 0);
2246 #endif /* DEBUG || DEVELOPMENT */
2247 kmr.kmr_address = newaddr;
2248
2249 #if KASAN
2250 kasan_notify_address(kmr.kmr_address, newsize);
2251 #endif /* KASAN */
2252 #if KASAN_CLASSIC
2253 if (flags & KMR_KASAN_GUARD) {
2254 kmr.kmr_address += PAGE_SIZE;
2255 kasan_alloc_large(kmr.kmr_address, req_newsize);
2256 }
2257 #endif /* KASAN_CLASSIC */
2258 #if KASAN_TBI
2259 if (flags & KMR_TAG) {
2260 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
2261 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
2262 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
2263 }
2264 #endif /* KASAN_TBI */
2265
2266 return kmr;
2267 }
2268
2269 #pragma mark map/remap/wire
2270
2271 kern_return_t
mach_vm_map_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut initial_size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,memory_object_offset_ut offset,boolean_t copy,vm_prot_ut cur_protection,vm_prot_ut max_protection,vm_inherit_ut inheritance)2272 mach_vm_map_kernel(
2273 vm_map_t target_map,
2274 mach_vm_offset_ut *address,
2275 mach_vm_size_ut initial_size,
2276 mach_vm_offset_ut mask,
2277 vm_map_kernel_flags_t vmk_flags,
2278 ipc_port_t port,
2279 memory_object_offset_ut offset,
2280 boolean_t copy,
2281 vm_prot_ut cur_protection,
2282 vm_prot_ut max_protection,
2283 vm_inherit_ut inheritance)
2284 {
2285 /* range_id is set by vm_map_enter_mem_object */
2286 return vm_map_enter_mem_object(target_map,
2287 address,
2288 initial_size,
2289 mask,
2290 vmk_flags,
2291 port,
2292 offset,
2293 copy,
2294 cur_protection,
2295 max_protection,
2296 inheritance,
2297 NULL,
2298 0);
2299 }
2300
2301 kern_return_t
mach_vm_remap_new_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,mach_vm_offset_ut memory_address,boolean_t copy,vm_prot_ut * cur_protection,vm_prot_ut * max_protection,vm_inherit_ut inheritance)2302 mach_vm_remap_new_kernel(
2303 vm_map_t target_map,
2304 mach_vm_offset_ut *address,
2305 mach_vm_size_ut size,
2306 mach_vm_offset_ut mask,
2307 vm_map_kernel_flags_t vmk_flags,
2308 vm_map_t src_map,
2309 mach_vm_offset_ut memory_address,
2310 boolean_t copy,
2311 vm_prot_ut *cur_protection, /* IN/OUT */
2312 vm_prot_ut *max_protection, /* IN/OUT */
2313 vm_inherit_ut inheritance)
2314 {
2315 if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
2316 VM_FLAGS_USER_REMAP)) {
2317 return KERN_INVALID_ARGUMENT;
2318 }
2319
2320
2321 vmk_flags.vmf_return_data_addr = true;
2322
2323 /* range_id is set by vm_map_remap */
2324 return vm_map_remap(target_map,
2325 address,
2326 size,
2327 mask,
2328 vmk_flags,
2329 src_map,
2330 memory_address,
2331 copy,
2332 cur_protection,
2333 max_protection,
2334 inheritance);
2335 }
2336
2337 #pragma mark free
2338
2339 #if KASAN
2340
2341 __abortlike
2342 static void
__kmem_free_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)2343 __kmem_free_invalid_object_size_panic(
2344 vm_map_t map,
2345 vm_address_t address,
2346 vm_size_t size,
2347 vm_map_entry_t entry)
2348 {
2349 vm_object_t object = VME_OBJECT(entry);
2350 vm_size_t objsize = __kmem_entry_orig_size(entry);
2351
2352 panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2353 "object %p has unexpected size %ld",
2354 map, (void *)address, (size_t)size, entry, object, objsize);
2355 }
2356
2357 #endif /* KASAN */
2358
2359 vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t req_addr,vm_size_t req_size,kmf_flags_t flags,kmem_guard_t guard)2360 kmem_free_guard(
2361 vm_map_t map,
2362 vm_offset_t req_addr,
2363 vm_size_t req_size,
2364 kmf_flags_t flags,
2365 kmem_guard_t guard)
2366 {
2367 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2368 vm_address_t addr = req_addr;
2369 vm_offset_t delta = 0;
2370 vm_size_t size;
2371 #if KASAN
2372 vm_map_entry_t entry;
2373 #endif /* KASAN */
2374
2375 assert(map->pmap == kernel_pmap);
2376
2377 #if KASAN_CLASSIC
2378 if (flags & KMF_KASAN_GUARD) {
2379 addr -= PAGE_SIZE;
2380 delta = ptoa(2);
2381 }
2382 #endif /* KASAN_CLASSIC */
2383 #if CONFIG_KERNEL_TAGGING
2384 if (flags & KMF_TAG) {
2385 vm_memtag_verify_tag(req_addr);
2386 addr = vm_memtag_canonicalize_address(req_addr);
2387 }
2388 #endif /* CONFIG_KERNEL_TAGGING */
2389
2390 if (flags & KMF_GUESS_SIZE) {
2391 vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2392 size = PAGE_SIZE;
2393 } else if (req_size == 0) {
2394 __kmem_invalid_size_panic(map, req_size, flags);
2395 } else {
2396 size = round_page(req_size) + delta;
2397 }
2398
2399 vm_map_lock(map);
2400
2401 #if KASAN
2402 if (!vm_map_lookup_entry(map, addr, &entry)) {
2403 __kmem_entry_not_found_panic(map, req_addr);
2404 }
2405 if (flags & KMF_GUESS_SIZE) {
2406 vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2407 req_size = __kmem_entry_orig_size(entry);
2408 size = round_page(req_size + delta);
2409 } else if (guard.kmg_atomic && entry->vme_kernel_object &&
2410 __kmem_entry_orig_size(entry) != req_size) {
2411 /*
2412 * We can't make a strict check for regular
2413 * VM objects because it could be:
2414 *
2415 * - the kmem_guard_free() of a kmem_realloc_guard() without
2416 * KMR_FREEOLD, and in that case the object size won't match.
2417 *
2418 * - a submap, in which case there is no "orig size".
2419 */
2420 __kmem_free_invalid_object_size_panic(map,
2421 req_addr, req_size + delta, entry);
2422 }
2423 #endif /* KASAN */
2424 #if KASAN_CLASSIC
2425 if (flags & KMR_KASAN_GUARD) {
2426 kasan_poison_range(addr, size, ASAN_VALID);
2427 }
2428 #endif
2429 #if KASAN_TBI
2430 if (flags & KMF_TAG) {
2431 kasan_tbi_mark_free_space(req_addr, size);
2432 }
2433 #endif /* KASAN_TBI */
2434
2435 /*
2436 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2437 * unwires the kernel mapping. The page won't be mapped any longer so
2438 * there is no extra step that is required for memory tagging to "clear"
2439 * it -- the page will be later laundered when reused.
2440 */
2441 return vm_map_remove_and_unlock(map, addr, addr + size,
2442 vmr_flags, guard).kmr_size - delta;
2443 }
2444
2445 __exported void
2446 kmem_free_external(
2447 vm_map_t map,
2448 vm_offset_t addr,
2449 vm_size_t size);
2450 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)2451 kmem_free_external(
2452 vm_map_t map,
2453 vm_offset_t addr,
2454 vm_size_t size)
2455 {
2456 if (size) {
2457 kmem_free(map, trunc_page(addr), size);
2458 #if MACH_ASSERT
2459 } else {
2460 printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2461 map, (void *)addr, __builtin_return_address(0));
2462 #endif
2463 }
2464 }
2465
2466 #pragma mark kmem metadata
2467
2468 /*
2469 * Guard objects for kmem pointer allocation:
2470 *
2471 * Guard objects introduce size slabs to kmem pointer allocations that are
2472 * allocated in chunks of n * sizeclass. When an allocation of a specific
2473 * sizeclass is requested a random slot from [0, n) is returned.
2474 * Allocations are returned from that chunk until m slots are left. The
2475 * remaining m slots are referred to as guard objects. They don't get
2476 * allocated and the chunk is now considered full. When an allocation is
2477 * freed to the chunk 1 slot is now available from m + 1 for the next
2478 * allocation of that sizeclass.
2479 *
2480 * Guard objects are intended to make exploitation of use after frees harder
2481 * as allocations that are freed can no longer be reliable reallocated.
2482 * They also make exploitation of OOBs harder as overflowing out of an
2483 * allocation can no longer be safe even with sufficient spraying.
2484 */
2485
2486 #define KMEM_META_PRIMARY UINT8_MAX
2487 #define KMEM_META_START (UINT8_MAX - 1)
2488 #define KMEM_META_FREE (UINT8_MAX - 2)
2489 #if __ARM_16K_PG__
2490 #define KMEM_MIN_SIZE PAGE_SIZE
2491 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2492 #else /* __ARM_16K_PG__ */
2493 /*
2494 * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2495 * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2496 * Therefore populate sizeclasses from 4k for those devices.
2497 */
2498 #define KMEM_MIN_SIZE (4 * 1024)
2499 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2500 #endif /* __ARM_16K_PG__ */
2501 #define KMEM_MAX_SIZE (32ULL << 20)
2502 #define KMEM_START_IDX (kmem_log2down(KMEM_MIN_SIZE))
2503 #define KMEM_LAST_IDX (kmem_log2down(KMEM_MAX_SIZE))
2504 #define KMEM_NUM_SIZECLASS (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2505 #define KMEM_FRONTS (KMEM_RANGE_ID_NUM_PTR * 2)
2506 #define KMEM_NUM_GUARDS 2
2507
2508 struct kmem_page_meta {
2509 union {
2510 /*
2511 * On primary allocated chunk with KMEM_META_PRIMARY marker
2512 */
2513 uint32_t km_bitmap;
2514 /*
2515 * On start and end of free chunk with KMEM_META_FREE marker
2516 */
2517 uint32_t km_free_chunks;
2518 };
2519 /*
2520 * KMEM_META_PRIMARY: Start meta of allocated chunk
2521 * KMEM_META_FREE : Start and end meta of free chunk
2522 * KMEM_META_START : Meta region start and end
2523 */
2524 uint8_t km_page_marker;
2525 uint8_t km_sizeclass;
2526 union {
2527 /*
2528 * On primary allocated chunk with KMEM_META_PRIMARY marker
2529 */
2530 uint16_t km_chunk_len;
2531 /*
2532 * On secondary allocated chunks
2533 */
2534 uint16_t km_page_idx;
2535 };
2536 LIST_ENTRY(kmem_page_meta) km_link;
2537 } kmem_page_meta_t;
2538
2539 typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2540 struct kmem_sizeclass {
2541 vm_map_size_t ks_size;
2542 uint32_t ks_num_chunk;
2543 uint32_t ks_num_elem;
2544 crypto_random_ctx_t __zpercpu ks_rng_ctx;
2545 kmem_list_head_t ks_allfree_head[KMEM_FRONTS];
2546 kmem_list_head_t ks_partial_head[KMEM_FRONTS];
2547 kmem_list_head_t ks_full_head[KMEM_FRONTS];
2548 };
2549
2550 static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2551
2552 /*
2553 * Locks to synchronize metadata population
2554 */
2555 static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2556 static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2557 #define kmem_meta_lock() lck_mtx_lock(&kmem_meta_region_lck)
2558 #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2559
2560 static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2561 kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2562 static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2563 kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2564 /*
2565 * Keeps track of metadata high water mark for each front
2566 */
2567 static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2568 static SECURITY_READ_ONLY_LATE(vm_map_t)
2569 kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2570 static vm_map_size_t kmem_meta_size;
2571
2572 static uint32_t
kmem_get_front(kmem_range_id_t range_id,bool from_right)2573 kmem_get_front(
2574 kmem_range_id_t range_id,
2575 bool from_right)
2576 {
2577 assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2578 (range_id <= KMEM_RANGE_ID_NUM_PTR));
2579 return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2580 }
2581
2582 static inline uint32_t
kmem_slot_idx_to_bit(uint32_t slot_idx,uint32_t size_idx __unused)2583 kmem_slot_idx_to_bit(
2584 uint32_t slot_idx,
2585 uint32_t size_idx __unused)
2586 {
2587 assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2588 return 1ull << slot_idx;
2589 }
2590
2591 static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)2592 kmem_get_idx_from_size(vm_map_size_t size)
2593 {
2594 assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2595 return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2596 }
2597
2598 __abortlike
2599 static void
kmem_invalid_size_idx(uint32_t idx)2600 kmem_invalid_size_idx(uint32_t idx)
2601 {
2602 panic("Invalid sizeclass idx %u", idx);
2603 }
2604
2605 static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)2606 kmem_get_size_from_idx(uint32_t idx)
2607 {
2608 if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2609 kmem_invalid_size_idx(idx);
2610 }
2611 return 1ul << (idx + KMEM_START_IDX);
2612 }
2613
2614 static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta * meta)2615 kmem_get_page_idx(struct kmem_page_meta *meta)
2616 {
2617 uint8_t page_marker = meta->km_page_marker;
2618
2619 return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2620 }
2621
2622 __abortlike
2623 static void
kmem_invalid_chunk_len(struct kmem_page_meta * meta)2624 kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2625 {
2626 panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2627 meta);
2628 }
2629
2630 static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta * meta)2631 kmem_get_chunk_len(struct kmem_page_meta *meta)
2632 {
2633 if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2634 kmem_invalid_chunk_len(meta);
2635 }
2636
2637 return meta->km_chunk_len;
2638 }
2639
2640 __abortlike
2641 static void
kmem_invalid_free_chunk_len(struct kmem_page_meta * meta)2642 kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2643 {
2644 panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2645 meta);
2646 }
2647
2648 static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta * meta)2649 kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2650 {
2651 if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2652 kmem_invalid_free_chunk_len(meta);
2653 }
2654
2655 return meta->km_free_chunks;
2656 }
2657
2658 /*
2659 * Return the metadata corresponding to the specified address
2660 */
2661 static struct kmem_page_meta *
kmem_addr_to_meta(vm_map_offset_t addr,vm_map_range_id_t range_id,vm_map_offset_t * range_start,uint64_t * meta_idx)2662 kmem_addr_to_meta(
2663 vm_map_offset_t addr,
2664 vm_map_range_id_t range_id,
2665 vm_map_offset_t *range_start,
2666 uint64_t *meta_idx)
2667 {
2668 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2669
2670 *range_start = kmem_ranges[range_id].min_address;
2671 *meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2672 return &meta_base[*meta_idx];
2673 }
2674
2675 /*
2676 * Return the metadata start of the chunk that the address belongs to
2677 */
2678 static struct kmem_page_meta *
kmem_addr_to_meta_start(vm_address_t addr,vm_map_range_id_t range_id,vm_map_offset_t * chunk_start)2679 kmem_addr_to_meta_start(
2680 vm_address_t addr,
2681 vm_map_range_id_t range_id,
2682 vm_map_offset_t *chunk_start)
2683 {
2684 vm_map_offset_t range_start;
2685 uint64_t meta_idx;
2686 struct kmem_page_meta *meta;
2687
2688 meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
2689 meta_idx -= kmem_get_page_idx(meta);
2690 meta -= kmem_get_page_idx(meta);
2691 assert(meta->km_page_marker == KMEM_META_PRIMARY);
2692 *chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2693 return meta;
2694 }
2695
2696 __startup_func
2697 static void
kmem_init_meta_front(struct kmem_page_meta * meta,kmem_range_id_t range_id,bool from_right)2698 kmem_init_meta_front(
2699 struct kmem_page_meta *meta,
2700 kmem_range_id_t range_id,
2701 bool from_right)
2702 {
2703 kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2704 KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2705 meta->km_page_marker = KMEM_META_START;
2706 if (!from_right) {
2707 meta++;
2708 kmem_meta_base[range_id] = meta;
2709 }
2710 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2711 }
2712
2713 __startup_func
2714 static void
kmem_metadata_init(void)2715 kmem_metadata_init(void)
2716 {
2717 for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2718 vm_map_offset_t addr = kmem_meta_range[i].min_address;
2719 struct kmem_page_meta *meta;
2720 uint64_t meta_idx;
2721
2722 vm_map_will_allocate_early_map(&kmem_meta_map[i]);
2723 kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
2724 VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2725 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL,
2726 VM_KERN_MEMORY_OSFMK).kmr_submap;
2727
2728 kmem_meta_range[i].min_address = addr;
2729 kmem_meta_range[i].max_address = addr + kmem_meta_size;
2730
2731 meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2732 kmem_init_meta_front(meta, i, 0);
2733
2734 meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
2735 &meta_idx);
2736 kmem_init_meta_front(meta, i, 1);
2737 }
2738 }
2739
2740 __startup_func
2741 static void
kmem_init_front_head(struct kmem_sizeclass * ks,uint32_t front)2742 kmem_init_front_head(
2743 struct kmem_sizeclass *ks,
2744 uint32_t front)
2745 {
2746 LIST_INIT(&ks->ks_allfree_head[front]);
2747 LIST_INIT(&ks->ks_partial_head[front]);
2748 LIST_INIT(&ks->ks_full_head[front]);
2749 }
2750
2751 __startup_func
2752 static void
kmem_sizeclass_init(void)2753 kmem_sizeclass_init(void)
2754 {
2755 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2756 struct kmem_sizeclass *ks = &kmem_size_array[i];
2757 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2758
2759 ks->ks_size = kmem_get_size_from_idx(i);
2760 ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
2761 KMEM_CHUNK_SIZE_MIN;
2762 ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2763 assert(ks->ks_num_elem <=
2764 (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2765 for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2766 kmem_init_front_head(ks, kmem_get_front(range_id, 0));
2767 kmem_init_front_head(ks, kmem_get_front(range_id, 1));
2768 }
2769 }
2770 }
2771
2772 /*
2773 * This is done during EARLY_BOOT as it needs the corecrypto module to be
2774 * set up.
2775 */
2776 __startup_func
2777 static void
kmem_crypto_init(void)2778 kmem_crypto_init(void)
2779 {
2780 vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2781
2782 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2783 struct kmem_sizeclass *ks = &kmem_size_array[i];
2784
2785 ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
2786 zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2787 crypto_random_kmem_init(ctx);
2788 }
2789 }
2790 }
2791 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2792
2793 __abortlike
2794 static void
kmem_validate_slot_panic(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t slot_idx,uint32_t size_idx)2795 kmem_validate_slot_panic(
2796 vm_map_offset_t addr,
2797 struct kmem_page_meta *meta,
2798 uint32_t slot_idx,
2799 uint32_t size_idx)
2800 {
2801 if (meta->km_page_marker != KMEM_META_PRIMARY) {
2802 panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2803 }
2804 if (meta->km_sizeclass != size_idx) {
2805 panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2806 meta, meta->km_sizeclass, size_idx);
2807 }
2808 panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2809 slot_idx, meta, (void *)addr);
2810 }
2811
2812 __abortlike
2813 static void
kmem_invalid_slot_for_addr(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end)2814 kmem_invalid_slot_for_addr(
2815 mach_vm_range_t slot,
2816 vm_map_offset_t start,
2817 vm_map_offset_t end)
2818 {
2819 panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2820 (void *)slot->min_address, (void *)slot->max_address,
2821 (void *)start, (void *)end);
2822 }
2823
2824 void
kmem_validate_slot(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2825 kmem_validate_slot(
2826 vm_map_offset_t addr,
2827 struct kmem_page_meta *meta,
2828 uint32_t size_idx,
2829 uint32_t slot_idx)
2830 {
2831 if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2832 (meta->km_sizeclass != size_idx) ||
2833 ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2834 kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
2835 }
2836 }
2837
2838 static void
kmem_validate_slot_initial(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2839 kmem_validate_slot_initial(
2840 mach_vm_range_t slot,
2841 vm_map_offset_t start,
2842 vm_map_offset_t end,
2843 struct kmem_page_meta *meta,
2844 uint32_t size_idx,
2845 uint32_t slot_idx)
2846 {
2847 if ((slot->min_address == 0) || (slot->max_address == 0) ||
2848 (start < slot->min_address) || (start >= slot->max_address) ||
2849 (end > slot->max_address)) {
2850 kmem_invalid_slot_for_addr(slot, start, end);
2851 }
2852
2853 kmem_validate_slot(start, meta, size_idx, slot_idx);
2854 }
2855
2856 uint32_t
kmem_addr_get_slot_idx(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,struct kmem_page_meta ** meta,uint32_t * size_idx,mach_vm_range_t slot)2857 kmem_addr_get_slot_idx(
2858 vm_map_offset_t start,
2859 vm_map_offset_t end,
2860 vm_map_range_id_t range_id,
2861 struct kmem_page_meta **meta,
2862 uint32_t *size_idx,
2863 mach_vm_range_t slot)
2864 {
2865 vm_map_offset_t chunk_start;
2866 vm_map_size_t slot_size;
2867 uint32_t slot_idx;
2868
2869 *meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
2870 *size_idx = (*meta)->km_sizeclass;
2871 slot_size = kmem_get_size_from_idx(*size_idx);
2872 slot_idx = (start - chunk_start) / slot_size;
2873 slot->min_address = chunk_start + slot_idx * slot_size;
2874 slot->max_address = slot->min_address + slot_size;
2875
2876 kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
2877
2878 return slot_idx;
2879 }
2880
2881 static bool
kmem_populate_needed(vm_offset_t from,vm_offset_t to)2882 kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2883 {
2884 #if KASAN
2885 #pragma unused(from, to)
2886 return true;
2887 #else
2888 vm_offset_t page_addr = trunc_page(from);
2889
2890 for (; page_addr < to; page_addr += PAGE_SIZE) {
2891 /*
2892 * This can race with another thread doing a populate on the same metadata
2893 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2894 * fault in the shadow when we first access the metadata page. Avoid this
2895 * by always synchronizing on the kmem_meta_lock with KASan.
2896 */
2897 if (!pmap_find_phys(kernel_pmap, page_addr)) {
2898 return true;
2899 }
2900 }
2901
2902 return false;
2903 #endif /* !KASAN */
2904 }
2905
2906 static void
kmem_populate_meta_locked(vm_offset_t from,vm_offset_t to)2907 kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2908 {
2909 vm_offset_t page_addr = trunc_page(from);
2910
2911 vm_map_unlock(kernel_map);
2912
2913 for (; page_addr < to; page_addr += PAGE_SIZE) {
2914 for (;;) {
2915 kern_return_t ret = KERN_SUCCESS;
2916
2917 /*
2918 * All updates to kmem metadata are done under the kmem_meta_lock
2919 */
2920 kmem_meta_lock();
2921 if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
2922 ret = kernel_memory_populate(page_addr,
2923 PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
2924 VM_KERN_MEMORY_OSFMK);
2925 }
2926 kmem_meta_unlock();
2927
2928 if (ret == KERN_SUCCESS) {
2929 break;
2930 }
2931
2932 /*
2933 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
2934 * to bad system deadlocks, so if the allocation failed,
2935 * we need to do the VM_PAGE_WAIT() outside of the lock.
2936 */
2937 VM_PAGE_WAIT();
2938 }
2939 }
2940
2941 vm_map_lock(kernel_map);
2942 }
2943
2944 __abortlike
2945 static void
kmem_invalid_meta_panic(struct kmem_page_meta * meta,uint32_t slot_idx,struct kmem_sizeclass sizeclass)2946 kmem_invalid_meta_panic(
2947 struct kmem_page_meta *meta,
2948 uint32_t slot_idx,
2949 struct kmem_sizeclass sizeclass)
2950 {
2951 uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
2952
2953 if (slot_idx >= sizeclass.ks_num_elem) {
2954 panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
2955 sizeclass.ks_num_elem, meta);
2956 }
2957 if (meta->km_sizeclass != size_idx) {
2958 panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
2959 meta->km_sizeclass, meta);
2960 }
2961 panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
2962 }
2963
2964 __abortlike
2965 static void
kmem_slot_has_entry_panic(vm_map_entry_t entry,vm_map_offset_t addr)2966 kmem_slot_has_entry_panic(
2967 vm_map_entry_t entry,
2968 vm_map_offset_t addr)
2969 {
2970 panic("Entry (%p) already exists for addr (%p) being returned",
2971 entry, (void *)addr);
2972 }
2973
2974 __abortlike
2975 static void
kmem_slot_not_found(struct kmem_page_meta * meta,uint32_t slot_idx)2976 kmem_slot_not_found(
2977 struct kmem_page_meta *meta,
2978 uint32_t slot_idx)
2979 {
2980 panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
2981 meta->km_bitmap);
2982 }
2983
2984 /*
2985 * Returns a 16bit random number between 0 and
2986 * upper_limit (inclusive)
2987 */
2988 __startup_func
2989 uint16_t
kmem_get_random16(uint16_t upper_limit)2990 kmem_get_random16(
2991 uint16_t upper_limit)
2992 {
2993 static uint64_t random_entropy;
2994 assert(upper_limit < UINT16_MAX);
2995 if (random_entropy == 0) {
2996 random_entropy = early_random();
2997 }
2998 uint32_t result = random_entropy & UINT32_MAX;
2999 random_entropy >>= 32;
3000 return (uint16_t)(result % (upper_limit + 1));
3001 }
3002
3003 static uint32_t
kmem_get_nth_free_slot(struct kmem_page_meta * meta,uint32_t n,uint32_t bitmap)3004 kmem_get_nth_free_slot(
3005 struct kmem_page_meta *meta,
3006 uint32_t n,
3007 uint32_t bitmap)
3008 {
3009 uint32_t zeros_seen = 0, ones_seen = 0;
3010
3011 while (bitmap) {
3012 uint32_t count = __builtin_ctz(bitmap);
3013
3014 zeros_seen += count;
3015 bitmap >>= count;
3016 if (__probable(~bitmap)) {
3017 count = __builtin_ctz(~bitmap);
3018 } else {
3019 count = 32;
3020 }
3021 if (count + ones_seen > n) {
3022 return zeros_seen + n;
3023 }
3024 ones_seen += count;
3025 bitmap >>= count;
3026 }
3027
3028 kmem_slot_not_found(meta, n);
3029 }
3030
3031
3032 static uint32_t
kmem_get_next_slot(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t bitmap)3033 kmem_get_next_slot(
3034 struct kmem_page_meta *meta,
3035 struct kmem_sizeclass sizeclass,
3036 uint32_t bitmap)
3037 {
3038 uint32_t num_slots = __builtin_popcount(bitmap);
3039 uint64_t slot_idx = 0;
3040
3041 assert(num_slots > 0);
3042 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
3043 /*
3044 * Use early random prior to early boot as the ks_rng_ctx requires
3045 * the corecrypto module to be setup before it is initialized and
3046 * used.
3047 *
3048 * num_slots can't be 0 as we take this path when we have more than
3049 * one slot left.
3050 */
3051 slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
3052 } else {
3053 crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
3054 &slot_idx);
3055 }
3056
3057 return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
3058 }
3059
3060 /*
3061 * Returns an unallocated slot from the given metadata
3062 */
3063 static vm_map_offset_t
kmem_get_addr_from_meta(struct kmem_page_meta * meta,vm_map_range_id_t range_id,struct kmem_sizeclass sizeclass,vm_map_entry_t * entry)3064 kmem_get_addr_from_meta(
3065 struct kmem_page_meta *meta,
3066 vm_map_range_id_t range_id,
3067 struct kmem_sizeclass sizeclass,
3068 vm_map_entry_t *entry)
3069 {
3070 vm_map_offset_t addr;
3071 vm_map_size_t size = sizeclass.ks_size;
3072 uint32_t size_idx = kmem_get_idx_from_size(size);
3073 uint64_t meta_idx = meta - kmem_meta_base[range_id];
3074 mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
3075 uint32_t slot_bit;
3076 uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
3077
3078 if ((slot_idx >= sizeclass.ks_num_elem) ||
3079 (meta->km_sizeclass != size_idx) ||
3080 (meta->km_page_marker != KMEM_META_PRIMARY)) {
3081 kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
3082 }
3083
3084 slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
3085 meta->km_bitmap &= ~slot_bit;
3086
3087 addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
3088 assert(kmem_range_contains_fully(range_id, addr, size));
3089 if (vm_map_lookup_entry(kernel_map, addr, entry)) {
3090 kmem_slot_has_entry_panic(*entry, addr);
3091 }
3092 if ((*entry != vm_map_to_entry(kernel_map)) &&
3093 ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
3094 ((*entry)->vme_next->vme_start < (addr + size))) {
3095 kmem_slot_has_entry_panic(*entry, addr);
3096 }
3097 return addr;
3098 }
3099
3100 __abortlike
3101 static void
kmem_range_out_of_va(kmem_range_id_t range_id,uint32_t num_chunks)3102 kmem_range_out_of_va(
3103 kmem_range_id_t range_id,
3104 uint32_t num_chunks)
3105 {
3106 panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
3107 }
3108
3109 static void
kmem_init_allocated_chunk(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t size_idx)3110 kmem_init_allocated_chunk(
3111 struct kmem_page_meta *meta,
3112 struct kmem_sizeclass sizeclass,
3113 uint32_t size_idx)
3114 {
3115 uint32_t meta_num = sizeclass.ks_num_chunk;
3116 uint32_t num_elem = sizeclass.ks_num_elem;
3117
3118 meta->km_bitmap = (1ull << num_elem) - 1;
3119 meta->km_chunk_len = (uint16_t)meta_num;
3120 assert(LIST_NEXT(meta, km_link) == NULL);
3121 assert(meta->km_link.le_prev == NULL);
3122 meta->km_sizeclass = (uint8_t)size_idx;
3123 meta->km_page_marker = KMEM_META_PRIMARY;
3124 meta++;
3125 for (uint32_t i = 1; i < meta_num; i++) {
3126 meta->km_page_idx = (uint16_t)i;
3127 meta->km_sizeclass = (uint8_t)size_idx;
3128 meta->km_page_marker = 0;
3129 meta->km_bitmap = 0;
3130 meta++;
3131 }
3132 }
3133
3134 static uint32_t
kmem_get_additional_meta(struct kmem_page_meta * meta,uint32_t meta_req,bool from_right,struct kmem_page_meta ** adj_free_meta)3135 kmem_get_additional_meta(
3136 struct kmem_page_meta *meta,
3137 uint32_t meta_req,
3138 bool from_right,
3139 struct kmem_page_meta **adj_free_meta)
3140 {
3141 struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
3142
3143 if (meta_prev->km_page_marker == KMEM_META_FREE) {
3144 uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
3145
3146 *adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
3147 meta_req -= chunk_len;
3148 } else {
3149 *adj_free_meta = NULL;
3150 }
3151
3152 return meta_req;
3153 }
3154
3155
3156 static struct kmem_page_meta *
kmem_get_new_chunk(vm_map_range_id_t range_id,bool from_right,uint32_t size_idx)3157 kmem_get_new_chunk(
3158 vm_map_range_id_t range_id,
3159 bool from_right,
3160 uint32_t size_idx)
3161 {
3162 struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
3163 struct kmem_page_meta *start, *end, *meta_update;
3164 struct kmem_page_meta *adj_free_meta = NULL;
3165 uint32_t meta_req = sizeclass.ks_num_chunk;
3166
3167 for (;;) {
3168 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3169 struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3170 struct kmem_page_meta *meta;
3171 vm_offset_t start_addr, end_addr;
3172 uint32_t meta_num;
3173
3174 meta = from_right ? metab : metaf;
3175 meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
3176 &adj_free_meta);
3177
3178 if (metaf + meta_num >= metab) {
3179 kmem_range_out_of_va(range_id, meta_num);
3180 }
3181
3182 start = from_right ? (metab - meta_num) : metaf;
3183 end = from_right ? metab : (metaf + meta_num);
3184
3185 start_addr = (vm_offset_t)start;
3186 end_addr = (vm_offset_t)end;
3187
3188 /*
3189 * If the new high watermark stays on the same page,
3190 * no need to populate and drop the lock.
3191 */
3192 if (!page_aligned(from_right ? end_addr : start_addr) &&
3193 trunc_page(start_addr) == trunc_page(end_addr - 1)) {
3194 break;
3195 }
3196 if (!kmem_populate_needed(start_addr, end_addr)) {
3197 break;
3198 }
3199
3200 kmem_populate_meta_locked(start_addr, end_addr);
3201
3202 /*
3203 * Since we dropped the lock, reassess conditions still hold:
3204 * - the HWM we are changing must not have moved
3205 * - the other HWM must not intersect with ours
3206 * - in case of coalescing, the adjacent free meta must still
3207 * be free and of the same size.
3208 *
3209 * If we failed to grow, reevaluate whether freelists have
3210 * entries now by returning NULL.
3211 */
3212 metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3213 metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3214 if (meta != (from_right ? metab : metaf)) {
3215 return NULL;
3216 }
3217 if (metaf + meta_num >= metab) {
3218 kmem_range_out_of_va(range_id, meta_num);
3219 }
3220 if (adj_free_meta) {
3221 if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3222 kmem_get_free_chunk_len(adj_free_meta) !=
3223 meta_req - meta_num) {
3224 return NULL;
3225 }
3226 }
3227
3228 break;
3229 }
3230
3231 /*
3232 * If there is an adjacent free chunk remove it from free list
3233 */
3234 if (adj_free_meta) {
3235 LIST_REMOVE(adj_free_meta, km_link);
3236 LIST_NEXT(adj_free_meta, km_link) = NULL;
3237 adj_free_meta->km_link.le_prev = NULL;
3238 }
3239
3240 /*
3241 * Update hwm
3242 */
3243 meta_update = from_right ? start : end;
3244 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3245
3246 /*
3247 * Initialize metadata
3248 */
3249 start = from_right ? start : (end - meta_req);
3250 kmem_init_allocated_chunk(start, sizeclass, size_idx);
3251
3252 return start;
3253 }
3254
3255 static void
kmem_requeue_meta(struct kmem_page_meta * meta,struct kmem_list_head * head)3256 kmem_requeue_meta(
3257 struct kmem_page_meta *meta,
3258 struct kmem_list_head *head)
3259 {
3260 LIST_REMOVE(meta, km_link);
3261 LIST_INSERT_HEAD(head, meta, km_link);
3262 }
3263
3264 /*
3265 * Return corresponding sizeclass to stash free chunks in
3266 */
3267 __abortlike
3268 static void
kmem_invalid_chunk_num(uint32_t chunks)3269 kmem_invalid_chunk_num(uint32_t chunks)
3270 {
3271 panic("Invalid number of chunks %u\n", chunks);
3272 }
3273
3274 static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)3275 kmem_get_size_idx_for_chunks(uint32_t chunks)
3276 {
3277 for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3278 if (chunks >= kmem_size_array[i].ks_num_chunk) {
3279 return i;
3280 }
3281 }
3282 kmem_invalid_chunk_num(chunks);
3283 }
3284
3285 static void
kmem_clear_meta_range(struct kmem_page_meta * meta,uint32_t count)3286 kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3287 {
3288 bzero(meta, count * sizeof(struct kmem_page_meta));
3289 }
3290
3291 static void
kmem_check_meta_range_is_clear(struct kmem_page_meta * meta,uint32_t count)3292 kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3293 {
3294 #if MACH_ASSERT
3295 size_t size = count * sizeof(struct kmem_page_meta);
3296
3297 assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3298 #else
3299 #pragma unused(meta, count)
3300 #endif
3301 }
3302
3303 /*!
3304 * @function kmem_init_free_chunk()
3305 *
3306 * @discussion
3307 * This function prepares a range of chunks to be put on a free list.
3308 * The first and last metadata might be dirty, but the "inner" ones
3309 * must be zero filled by the caller prior to calling this function.
3310 */
3311 static void
kmem_init_free_chunk(struct kmem_page_meta * meta,uint32_t num_chunks,uint32_t front)3312 kmem_init_free_chunk(
3313 struct kmem_page_meta *meta,
3314 uint32_t num_chunks,
3315 uint32_t front)
3316 {
3317 struct kmem_sizeclass *sizeclass;
3318 uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
3319
3320 if (num_chunks > 2) {
3321 kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
3322 }
3323
3324 meta[0] = (struct kmem_page_meta){
3325 .km_free_chunks = num_chunks,
3326 .km_page_marker = KMEM_META_FREE,
3327 .km_sizeclass = (uint8_t)size_idx,
3328 };
3329 if (num_chunks > 1) {
3330 meta[num_chunks - 1] = (struct kmem_page_meta){
3331 .km_free_chunks = num_chunks,
3332 .km_page_marker = KMEM_META_FREE,
3333 .km_sizeclass = (uint8_t)size_idx,
3334 };
3335 }
3336
3337 sizeclass = &kmem_size_array[size_idx];
3338 LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3339 }
3340
3341 static struct kmem_page_meta *
kmem_get_free_chunk_from_list(struct kmem_sizeclass * org_sizeclass,uint32_t size_idx,uint32_t front)3342 kmem_get_free_chunk_from_list(
3343 struct kmem_sizeclass *org_sizeclass,
3344 uint32_t size_idx,
3345 uint32_t front)
3346 {
3347 struct kmem_sizeclass *sizeclass;
3348 uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3349 struct kmem_page_meta *meta;
3350 uint32_t idx = size_idx;
3351
3352 while (idx < KMEM_NUM_SIZECLASS) {
3353 sizeclass = &kmem_size_array[idx];
3354 meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3355 if (meta) {
3356 break;
3357 }
3358 idx++;
3359 }
3360
3361 /*
3362 * Trim if larger in size
3363 */
3364 if (meta) {
3365 uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3366
3367 assert(meta->km_page_marker == KMEM_META_FREE);
3368 LIST_REMOVE(meta, km_link);
3369 LIST_NEXT(meta, km_link) = NULL;
3370 meta->km_link.le_prev = NULL;
3371 if (num_chunks_free > num_chunks) {
3372 num_chunks_free -= num_chunks;
3373 kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
3374 }
3375
3376 kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
3377 }
3378
3379 return meta;
3380 }
3381
3382 kern_return_t
kmem_locate_space(vm_map_size_t size,vm_map_range_id_t range_id,bool from_right,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)3383 kmem_locate_space(
3384 vm_map_size_t size,
3385 vm_map_range_id_t range_id,
3386 bool from_right,
3387 vm_map_offset_t *start_inout,
3388 vm_map_entry_t *entry_out)
3389 {
3390 vm_map_entry_t entry;
3391 uint32_t size_idx = kmem_get_idx_from_size(size);
3392 uint32_t front = kmem_get_front(range_id, from_right);
3393 struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3394 struct kmem_page_meta *meta;
3395
3396 assert(size <= sizeclass->ks_size);
3397 again:
3398 if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3399 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3400 /*
3401 * Requeue to full if necessary
3402 */
3403 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3404 if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
3405 kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
3406 }
3407 } else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
3408 front)) != NULL) {
3409 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3410 /*
3411 * Queue to partial
3412 */
3413 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3414 assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
3415 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3416 } else {
3417 meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3418 if (meta == NULL) {
3419 goto again;
3420 }
3421 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3422 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3423 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3424 }
3425
3426 if (entry_out) {
3427 *entry_out = entry;
3428 }
3429
3430 return KERN_SUCCESS;
3431 }
3432
3433 /*
3434 * Determine whether the given metadata was allocated from the right
3435 */
3436 static bool
kmem_meta_is_from_right(kmem_range_id_t range_id,struct kmem_page_meta * meta)3437 kmem_meta_is_from_right(
3438 kmem_range_id_t range_id,
3439 struct kmem_page_meta *meta)
3440 {
3441 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3442 __assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3443 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3444 struct kmem_page_meta *meta_end;
3445
3446 meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3447
3448 if ((meta >= meta_base) && (meta < metaf)) {
3449 return false;
3450 }
3451
3452 assert(meta >= metab && meta < meta_end);
3453 return true;
3454 }
3455
3456 static void
kmem_free_chunk(kmem_range_id_t range_id,struct kmem_page_meta * meta,bool from_right)3457 kmem_free_chunk(
3458 kmem_range_id_t range_id,
3459 struct kmem_page_meta *meta,
3460 bool from_right)
3461 {
3462 struct kmem_page_meta *meta_coalesce = meta - 1;
3463 struct kmem_page_meta *meta_start = meta;
3464 uint32_t num_chunks = kmem_get_chunk_len(meta);
3465 uint32_t add_chunks;
3466 struct kmem_page_meta *meta_end = meta + num_chunks;
3467 struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3468 uint32_t front = kmem_get_front(range_id, from_right);
3469
3470 meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3471 meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3472
3473 LIST_REMOVE(meta, km_link);
3474 kmem_clear_meta_range(meta, num_chunks);
3475
3476 /*
3477 * Coalesce left
3478 */
3479 if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3480 (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3481 meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
3482 add_chunks = kmem_get_free_chunk_len(meta_start);
3483 num_chunks += add_chunks;
3484 LIST_REMOVE(meta_start, km_link);
3485 kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
3486 }
3487
3488 /*
3489 * Coalesce right
3490 */
3491 if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3492 (meta_end->km_page_marker == KMEM_META_FREE)) {
3493 add_chunks = kmem_get_free_chunk_len(meta_end);
3494 LIST_REMOVE(meta_end, km_link);
3495 kmem_clear_meta_range(meta_end, 1);
3496 meta_end = meta_end + add_chunks;
3497 num_chunks += add_chunks;
3498 }
3499
3500 kmem_init_free_chunk(meta_start, num_chunks, front);
3501 }
3502
3503 static void
kmem_free_slot(kmem_range_id_t range_id,mach_vm_range_t slot)3504 kmem_free_slot(
3505 kmem_range_id_t range_id,
3506 mach_vm_range_t slot)
3507 {
3508 struct kmem_page_meta *meta;
3509 vm_map_offset_t chunk_start;
3510 uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3511 struct kmem_sizeclass *sizeclass;
3512 vm_map_size_t slot_size;
3513
3514 meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
3515 size_idx = meta->km_sizeclass;
3516 slot_size = kmem_get_size_from_idx(size_idx);
3517 slot_idx = (slot->min_address - chunk_start) / slot_size;
3518 assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3519 meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3520
3521 sizeclass = &kmem_size_array[size_idx];
3522 chunk_elem = sizeclass->ks_num_elem;
3523 num_elem = __builtin_popcount(meta->km_bitmap);
3524
3525 if (num_elem == chunk_elem) {
3526 /*
3527 * If entire chunk empty add to emtpy list
3528 */
3529 bool from_right = kmem_meta_is_from_right(range_id, meta);
3530
3531 kmem_free_chunk(range_id, meta, from_right);
3532 } else if (num_elem == KMEM_NUM_GUARDS + 1) {
3533 /*
3534 * If we freed to full chunk move it to partial
3535 */
3536 uint32_t front = kmem_get_front(range_id,
3537 kmem_meta_is_from_right(range_id, meta));
3538
3539 kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
3540 }
3541 }
3542
3543 void
kmem_free_space(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,mach_vm_range_t slot)3544 kmem_free_space(
3545 vm_map_offset_t start,
3546 vm_map_offset_t end,
3547 vm_map_range_id_t range_id,
3548 mach_vm_range_t slot)
3549 {
3550 bool entry_present = false;
3551 vm_map_entry_t prev_entry;
3552 vm_map_entry_t next_entry;
3553
3554 if ((slot->min_address == start) && (slot->max_address == end)) {
3555 /*
3556 * Entire slot is being freed at once
3557 */
3558 return kmem_free_slot(range_id, slot);
3559 }
3560
3561 entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
3562 assert(!entry_present);
3563 next_entry = prev_entry->vme_next;
3564
3565 if (((prev_entry == vm_map_to_entry(kernel_map) ||
3566 prev_entry->vme_end <= slot->min_address)) &&
3567 (next_entry == vm_map_to_entry(kernel_map) ||
3568 (next_entry->vme_start >= slot->max_address))) {
3569 /*
3570 * Free entire slot
3571 */
3572 kmem_free_slot(range_id, slot);
3573 }
3574 }
3575
3576 #pragma mark kmem init
3577
3578 /*
3579 * The default percentage of memory that can be mlocked is scaled based on the total
3580 * amount of memory in the system. These percentages are caclulated
3581 * offline and stored in this table. We index this table by
3582 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3583 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3584 *
3585 * Note that these values were picked for mac.
3586 * If we ever have very large memory config arm devices, we may want to revisit
3587 * since the kernel overhead is smaller there due to the larger page size.
3588 */
3589
3590 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3591 #define VM_USER_WIREABLE_MIN_CONFIG 32
3592 #if CONFIG_JETSAM
3593 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
3594 * pressure.
3595 */
3596 static vm_map_size_t wire_limit_percents[] =
3597 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3598 #else
3599 static vm_map_size_t wire_limit_percents[] =
3600 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3601 #endif /* CONFIG_JETSAM */
3602
3603 /*
3604 * Sets the default global user wire limit which limits the amount of
3605 * memory that can be locked via mlock() based on the above algorithm..
3606 * This can be overridden via a sysctl.
3607 */
3608 static void
kmem_set_user_wire_limits(void)3609 kmem_set_user_wire_limits(void)
3610 {
3611 uint64_t available_mem_log;
3612 uint64_t max_wire_percent;
3613 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3614 sizeof(vm_map_size_t);
3615 vm_map_size_t limit;
3616 uint64_t config_memsize = max_mem;
3617 #if defined(XNU_TARGET_OS_OSX)
3618 config_memsize = max_mem_actual;
3619 #endif /* defined(XNU_TARGET_OS_OSX) */
3620
3621 available_mem_log = bit_floor(config_memsize);
3622
3623 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3624 available_mem_log = 0;
3625 } else {
3626 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3627 }
3628 if (available_mem_log >= wire_limit_percents_length) {
3629 available_mem_log = wire_limit_percents_length - 1;
3630 }
3631 max_wire_percent = wire_limit_percents[available_mem_log];
3632
3633 limit = config_memsize * max_wire_percent / 100;
3634 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3635 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3636 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3637 }
3638
3639 vm_global_user_wire_limit = limit;
3640 /* the default per task limit is the same as the global limit */
3641 vm_per_task_user_wire_limit = limit;
3642 vm_add_wire_count_over_global_limit = 0;
3643 vm_add_wire_count_over_user_limit = 0;
3644 }
3645
3646 #define KMEM_MAX_CLAIMS 50
3647 __startup_data
3648 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3649 __startup_data
3650 uint32_t kmem_claim_count = 0;
3651
3652 __startup_func
3653 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)3654 kmem_range_startup_init(
3655 struct kmem_range_startup_spec *sp)
3656 {
3657 assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3658 if (sp->kc_calculate_sz) {
3659 sp->kc_size = (sp->kc_calculate_sz)();
3660 }
3661 if (sp->kc_size) {
3662 kmem_claims[kmem_claim_count] = *sp;
3663 kmem_claim_count++;
3664 }
3665 }
3666
3667 static vm_offset_t
kmem_fuzz_start(void)3668 kmem_fuzz_start(void)
3669 {
3670 vm_offset_t kmapoff_kaddr = 0;
3671 uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3672 vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3673
3674 kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
3675 KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3676 VM_KERN_MEMORY_OSFMK);
3677 return kmapoff_kaddr + kmapoff_size;
3678 }
3679
3680 /*
3681 * Generate a randomly shuffled array of indices from 0 to count - 1
3682 */
3683 __startup_func
3684 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)3685 kmem_shuffle(
3686 uint16_t *shuffle_buf,
3687 uint16_t count)
3688 {
3689 for (uint16_t i = 0; i < count; i++) {
3690 uint16_t j = kmem_get_random16(i);
3691 if (j != i) {
3692 shuffle_buf[i] = shuffle_buf[j];
3693 }
3694 shuffle_buf[j] = i;
3695 }
3696 }
3697
3698 __startup_func
3699 static void
kmem_shuffle_claims(void)3700 kmem_shuffle_claims(void)
3701 {
3702 uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3703 uint16_t limit = (uint16_t)kmem_claim_count;
3704
3705 kmem_shuffle(&shuffle_buf[0], limit);
3706 for (uint16_t i = 0; i < limit; i++) {
3707 struct kmem_range_startup_spec tmp = kmem_claims[i];
3708 kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3709 kmem_claims[shuffle_buf[i]] = tmp;
3710 }
3711 }
3712
3713 __startup_func
3714 static void
kmem_readjust_ranges(uint32_t cur_idx)3715 kmem_readjust_ranges(
3716 uint32_t cur_idx)
3717 {
3718 assert(cur_idx != 0);
3719 uint32_t j = cur_idx - 1, random;
3720 struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3721 struct mach_vm_range *sp_range = sp.kc_range;
3722
3723 /*
3724 * Find max index where restriction is met
3725 */
3726 for (; j > 0; j--) {
3727 struct kmem_range_startup_spec spj = kmem_claims[j];
3728 vm_map_offset_t max_start = spj.kc_range->min_address;
3729 if (spj.kc_flags & KC_NO_MOVE) {
3730 panic("kmem_range_init: Can't scramble with multiple constraints");
3731 }
3732 if (max_start <= sp_range->min_address) {
3733 break;
3734 }
3735 }
3736
3737 /*
3738 * Pick a random index from 0 to max index and shift claims to the right
3739 * to make room for restricted claim
3740 */
3741 random = kmem_get_random16((uint16_t)j);
3742 assert(random <= j);
3743
3744 sp_range->min_address = kmem_claims[random].kc_range->min_address;
3745 sp_range->max_address = sp_range->min_address + sp.kc_size;
3746
3747 for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3748 struct kmem_range_startup_spec spj = kmem_claims[j];
3749 struct mach_vm_range *range = spj.kc_range;
3750 range->min_address += sp.kc_size;
3751 range->max_address += sp.kc_size;
3752 kmem_claims[j + 1] = spj;
3753 }
3754
3755 sp.kc_flags = KC_NO_MOVE;
3756 kmem_claims[random] = sp;
3757 }
3758
3759 __startup_func
3760 static vm_map_size_t
kmem_add_ptr_claims(void)3761 kmem_add_ptr_claims(void)
3762 {
3763 uint64_t kmem_meta_num, kmem_ptr_chunks;
3764 vm_map_size_t org_ptr_range_size = ptr_range_size;
3765
3766 ptr_range_size -= PAGE_SIZE;
3767 ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3768 ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3769
3770 kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3771 ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3772
3773 kmem_meta_num = kmem_ptr_chunks + 2;
3774 kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
3775
3776 assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3777 /*
3778 * Add claims for kmem's ranges
3779 */
3780 for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3781 struct kmem_range_startup_spec kmem_spec = {
3782 .kc_name = "kmem_ptr_range",
3783 .kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3784 .kc_size = ptr_range_size,
3785 .kc_flags = KC_NO_ENTRY,
3786 };
3787 kmem_claims[kmem_claim_count++] = kmem_spec;
3788
3789 struct kmem_range_startup_spec kmem_meta_spec = {
3790 .kc_name = "kmem_ptr_range_meta",
3791 .kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3792 .kc_size = kmem_meta_size,
3793 .kc_flags = KC_NONE,
3794 };
3795 kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3796 }
3797 return (org_ptr_range_size - ptr_range_size - kmem_meta_size) *
3798 kmem_ptr_ranges;
3799 }
3800
3801 __startup_func
3802 static void
kmem_add_extra_claims(void)3803 kmem_add_extra_claims(void)
3804 {
3805 vm_map_size_t largest_free_size = 0, total_claims = 0;
3806
3807 vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
3808 largest_free_size = trunc_page(largest_free_size);
3809
3810 /*
3811 * kasan and configs w/o *TRR need to have just one ptr range due to
3812 * resource constraints.
3813 */
3814 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3815 kmem_ptr_ranges = 1;
3816 #endif
3817 /*
3818 * Determine size of data and pointer kmem_ranges
3819 */
3820 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3821 total_claims += kmem_claims[i].kc_size;
3822 }
3823 assert((total_claims & PAGE_MASK) == 0);
3824 largest_free_size -= total_claims;
3825
3826 /*
3827 * Use half the total available VA for all pointer allocations (this
3828 * includes the kmem_sprayqtn range). Given that we have 4 total
3829 * ranges divide the available VA by 8.
3830 */
3831 ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
3832 sprayqtn_range_size = ptr_range_size;
3833
3834 if (sprayqtn_range_size > (sane_size / 2)) {
3835 sprayqtn_range_size = sane_size / 2;
3836 }
3837
3838 ptr_range_size = round_page(ptr_range_size);
3839 sprayqtn_range_size = round_page(sprayqtn_range_size);
3840
3841
3842 data_range_size = largest_free_size
3843 - (ptr_range_size * kmem_ptr_ranges)
3844 - sprayqtn_range_size;
3845
3846 /*
3847 * Add claims for kmem's ranges
3848 */
3849 data_range_size += kmem_add_ptr_claims();
3850 assert(data_range_size + sprayqtn_range_size +
3851 ((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <=
3852 largest_free_size);
3853
3854 struct kmem_range_startup_spec kmem_spec_sprayqtn = {
3855 .kc_name = "kmem_sprayqtn_range",
3856 .kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
3857 .kc_size = sprayqtn_range_size,
3858 .kc_flags = KC_NO_ENTRY,
3859 };
3860 kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
3861
3862 struct kmem_range_startup_spec kmem_spec_data = {
3863 .kc_name = "kmem_data_range",
3864 .kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
3865 .kc_size = data_range_size,
3866 .kc_flags = KC_NO_ENTRY,
3867 };
3868 kmem_claims[kmem_claim_count++] = kmem_spec_data;
3869 }
3870
3871 __startup_func
3872 static void
kmem_scramble_ranges(void)3873 kmem_scramble_ranges(void)
3874 {
3875 vm_map_offset_t start = 0;
3876
3877 /*
3878 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
3879 * the vm can find the requested ranges.
3880 */
3881 kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
3882 VM_MAP_PAGE_SIZE(kernel_map));
3883 kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
3884
3885 /*
3886 * Allocating the g_kext_map prior to randomizing the remaining submaps as
3887 * this map is 2G in size and starts at the end of kernel_text on x86. It
3888 * could overflow into the heap.
3889 */
3890 kext_alloc_init();
3891
3892 /*
3893 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
3894 * stack addresses. (With a 4K page and 9 bits of randomness, this
3895 * eats about 2M of VA from the map)
3896 *
3897 * Note that we always need to slide by at least one page because the VM
3898 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
3899 * do not admit this address to be part of any zone submap.
3900 */
3901 start = kmem_fuzz_start();
3902
3903 /*
3904 * Add claims for ptr and data kmem_ranges
3905 */
3906 kmem_add_extra_claims();
3907
3908 /*
3909 * Shuffle registered claims
3910 */
3911 assert(kmem_claim_count < UINT16_MAX);
3912 kmem_shuffle_claims();
3913
3914 /*
3915 * Apply restrictions and determine range for each claim
3916 */
3917 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3918 vm_map_offset_t end = 0;
3919 struct kmem_range_startup_spec sp = kmem_claims[i];
3920 struct mach_vm_range *sp_range = sp.kc_range;
3921
3922 if (vm_map_locate_space_anywhere(kernel_map, sp.kc_size, 0,
3923 VM_MAP_KERNEL_FLAGS_ANYWHERE(), &start, NULL) != KERN_SUCCESS) {
3924 panic("kmem_range_init: vm_map_locate_space failing for claim %s",
3925 sp.kc_name);
3926 }
3927
3928 end = start + sp.kc_size;
3929 /*
3930 * Re-adjust ranges if restriction not met
3931 */
3932 if (sp_range->min_address && start > sp_range->min_address) {
3933 kmem_readjust_ranges(i);
3934 } else {
3935 sp_range->min_address = start;
3936 sp_range->max_address = end;
3937 }
3938 start = end;
3939 }
3940
3941 /*
3942 * We have settled on the ranges, now create temporary entries for the
3943 * claims
3944 */
3945 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3946 struct kmem_range_startup_spec sp = kmem_claims[i];
3947 vm_map_entry_t entry = NULL;
3948 if (sp.kc_flags & KC_NO_ENTRY) {
3949 continue;
3950 }
3951 if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
3952 VM_MAP_KERNEL_FLAGS_ANYWHERE(), &entry) != KERN_SUCCESS) {
3953 panic("kmem_range_init: vm_map_find_space failing for claim %s",
3954 sp.kc_name);
3955 }
3956 vm_object_reference(kernel_object_default);
3957 VME_OBJECT_SET(entry, kernel_object_default, false, 0);
3958 VME_OFFSET_SET(entry, entry->vme_start);
3959 vm_map_unlock(kernel_map);
3960 }
3961 /*
3962 * Now that we are done assigning all the ranges, reset
3963 * kmem_ranges[KMEM_RANGE_ID_NONE]
3964 */
3965 kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
3966
3967 #if DEBUG || DEVELOPMENT
3968 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3969 struct kmem_range_startup_spec sp = kmem_claims[i];
3970
3971 printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
3972 (void *)sp.kc_range->min_address,
3973 (void *)sp.kc_range->max_address,
3974 mach_vm_size_pretty(sp.kc_size),
3975 mach_vm_size_unit(sp.kc_size));
3976 }
3977 #endif /* DEBUG || DEVELOPMENT */
3978 }
3979
3980 __startup_func
3981 static void
kmem_range_init(void)3982 kmem_range_init(void)
3983 {
3984 vm_size_t range_adjustment;
3985
3986 kmem_scramble_ranges();
3987
3988 range_adjustment = sprayqtn_range_size >> 3;
3989 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
3990 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
3991 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
3992 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
3993
3994 range_adjustment = data_range_size >> 3;
3995 kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
3996 kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
3997 kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
3998 kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
3999
4000 pmap_init();
4001 kmem_metadata_init();
4002 kmem_sizeclass_init();
4003
4004 #if DEBUG || DEVELOPMENT
4005 for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
4006 vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
4007 printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n", i,
4008 (void *)kmem_large_ranges[i].min_address,
4009 (void *)kmem_large_ranges[i].max_address,
4010 mach_vm_size_pretty(range_size),
4011 mach_vm_size_unit(range_size));
4012 }
4013 #endif
4014 }
4015 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
4016
4017 #if DEBUG || DEVELOPMENT
4018 __startup_func
4019 static void
kmem_log_init(void)4020 kmem_log_init(void)
4021 {
4022 /*
4023 * Log can only be created after the the kmem subsystem is initialized as
4024 * btlog creation uses kmem
4025 */
4026 kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
4027 }
4028 STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
4029
4030 kmem_gobj_stats
kmem_get_gobj_stats(void)4031 kmem_get_gobj_stats(void)
4032 {
4033 kmem_gobj_stats stats = {};
4034
4035 vm_map_lock(kernel_map);
4036 for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
4037 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
4038 struct mach_vm_range range = kmem_ranges[range_id];
4039 struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
4040 struct kmem_page_meta *meta_end;
4041 uint64_t meta_idx = meta - kmem_meta_base[range_id];
4042 vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
4043 vm_map_offset_t addr;
4044 vm_map_entry_t entry;
4045
4046 /*
4047 * Left front
4048 */
4049 va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
4050 meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
4051
4052 /*
4053 * Right front
4054 */
4055 meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
4056 meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
4057 &meta_idx);
4058 meta_idx = meta_end - meta;
4059 meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
4060 va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
4061
4062 /*
4063 * Compute VA allocated in entire range
4064 */
4065 if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
4066 entry = entry->vme_next;
4067 }
4068 while (entry != vm_map_to_entry(kernel_map) &&
4069 entry->vme_start < range.max_address) {
4070 used += (entry->vme_end - entry->vme_start);
4071 entry = entry->vme_next;
4072 }
4073
4074 pte_sz = round_page(atop(va - used) * 8);
4075
4076 stats.total_used += used;
4077 stats.total_va += va;
4078 stats.pte_sz += pte_sz;
4079 stats.meta_sz += meta_sz;
4080 }
4081 vm_map_unlock(kernel_map);
4082
4083 return stats;
4084 }
4085
4086 #endif /* DEBUG || DEVELOPMENT */
4087
4088 /*
4089 * kmem_init:
4090 *
4091 * Initialize the kernel's virtual memory map, taking
4092 * into account all memory allocated up to this time.
4093 */
4094 __startup_func
4095 void
kmem_init(vm_offset_t start,vm_offset_t end)4096 kmem_init(
4097 vm_offset_t start,
4098 vm_offset_t end)
4099 {
4100 vm_map_offset_t map_start;
4101 vm_map_offset_t map_end;
4102
4103 map_start = vm_map_trunc_page(start,
4104 VM_MAP_PAGE_MASK(kernel_map));
4105 map_end = vm_map_round_page(end,
4106 VM_MAP_PAGE_MASK(kernel_map));
4107
4108 vm_map_will_allocate_early_map(&kernel_map);
4109 #if defined(__arm64__)
4110 kernel_map = vm_map_create_options(pmap_kernel(),
4111 VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4112 VM_MAX_KERNEL_ADDRESS,
4113 VM_MAP_CREATE_DEFAULT);
4114 /*
4115 * Reserve virtual memory allocated up to this time.
4116 */
4117 {
4118 unsigned int region_select = 0;
4119 vm_map_offset_t region_start;
4120 vm_map_size_t region_size;
4121 vm_map_offset_t map_addr;
4122 kern_return_t kr;
4123
4124 while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
4125 map_addr = region_start;
4126 kr = vm_map_enter(kernel_map, &map_addr,
4127 vm_map_round_page(region_size,
4128 VM_MAP_PAGE_MASK(kernel_map)),
4129 (vm_map_offset_t) 0,
4130 VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true),
4131 VM_OBJECT_NULL,
4132 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
4133 VM_INHERIT_DEFAULT);
4134
4135 if (kr != KERN_SUCCESS) {
4136 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4137 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
4138 (uint64_t) region_size, kr);
4139 }
4140
4141 region_select++;
4142 }
4143 }
4144 #else
4145 kernel_map = vm_map_create_options(pmap_kernel(),
4146 VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
4147 VM_MAP_CREATE_DEFAULT);
4148 /*
4149 * Reserve virtual memory allocated up to this time.
4150 */
4151 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
4152 vm_map_offset_t map_addr;
4153 kern_return_t kr;
4154
4155 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4156 kr = vm_map_enter(kernel_map,
4157 &map_addr,
4158 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4159 (vm_map_offset_t) 0,
4160 VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
4161 VM_OBJECT_NULL,
4162 (vm_object_offset_t) 0, FALSE,
4163 VM_PROT_NONE, VM_PROT_NONE,
4164 VM_INHERIT_DEFAULT);
4165
4166 if (kr != KERN_SUCCESS) {
4167 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4168 (uint64_t) start, (uint64_t) end,
4169 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4170 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4171 kr);
4172 }
4173 }
4174 #endif
4175
4176 kmem_set_user_wire_limits();
4177 }
4178
4179
4180 #pragma mark map copyio
4181 /*
4182 * Note: semantic types aren't used as `copyio` already validates.
4183 */
4184
4185 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)4186 copyinmap(
4187 vm_map_t map,
4188 vm_map_offset_t fromaddr,
4189 void *todata,
4190 vm_size_t length)
4191 {
4192 kern_return_t kr = KERN_SUCCESS;
4193 vm_map_t oldmap;
4194
4195 if (vm_map_pmap(map) == pmap_kernel()) {
4196 /* assume a correct copy */
4197 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
4198 } else if (current_map() == map) {
4199 if (copyin(fromaddr, todata, length) != 0) {
4200 kr = KERN_INVALID_ADDRESS;
4201 }
4202 } else {
4203 vm_map_reference(map);
4204 oldmap = vm_map_switch(map);
4205 if (copyin(fromaddr, todata, length) != 0) {
4206 kr = KERN_INVALID_ADDRESS;
4207 }
4208 vm_map_switch(oldmap);
4209 vm_map_deallocate(map);
4210 }
4211 return kr;
4212 }
4213
4214 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)4215 copyoutmap(
4216 vm_map_t map,
4217 void *fromdata,
4218 vm_map_address_t toaddr,
4219 vm_size_t length)
4220 {
4221 kern_return_t kr = KERN_SUCCESS;
4222 vm_map_t oldmap;
4223
4224 if (vm_map_pmap(map) == pmap_kernel()) {
4225 /* assume a correct copy */
4226 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
4227 } else if (current_map() == map) {
4228 if (copyout(fromdata, toaddr, length) != 0) {
4229 ktriage_record(thread_tid(current_thread()),
4230 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4231 KDBG_TRIAGE_RESERVED,
4232 KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
4233 KERN_INVALID_ADDRESS /* arg */);
4234 kr = KERN_INVALID_ADDRESS;
4235 }
4236 } else {
4237 vm_map_reference(map);
4238 oldmap = vm_map_switch(map);
4239 if (copyout(fromdata, toaddr, length) != 0) {
4240 ktriage_record(thread_tid(current_thread()),
4241 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4242 KDBG_TRIAGE_RESERVED,
4243 KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
4244 KERN_INVALID_ADDRESS /* arg */);
4245 kr = KERN_INVALID_ADDRESS;
4246 }
4247 vm_map_switch(oldmap);
4248 vm_map_deallocate(map);
4249 }
4250 return kr;
4251 }
4252
4253 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)4254 copyoutmap_atomic32(
4255 vm_map_t map,
4256 uint32_t value,
4257 vm_map_address_t toaddr)
4258 {
4259 kern_return_t kr = KERN_SUCCESS;
4260 vm_map_t oldmap;
4261
4262 if (vm_map_pmap(map) == pmap_kernel()) {
4263 /* assume a correct toaddr */
4264 *(uint32_t *)toaddr = value;
4265 } else if (current_map() == map) {
4266 if (copyout_atomic32(value, toaddr) != 0) {
4267 kr = KERN_INVALID_ADDRESS;
4268 }
4269 } else {
4270 vm_map_reference(map);
4271 oldmap = vm_map_switch(map);
4272 if (copyout_atomic32(value, toaddr) != 0) {
4273 kr = KERN_INVALID_ADDRESS;
4274 }
4275 vm_map_switch(oldmap);
4276 vm_map_deallocate(map);
4277 }
4278 return kr;
4279 }
4280
4281 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)4282 copyoutmap_atomic64(
4283 vm_map_t map,
4284 uint64_t value,
4285 vm_map_address_t toaddr)
4286 {
4287 kern_return_t kr = KERN_SUCCESS;
4288 vm_map_t oldmap;
4289
4290 if (vm_map_pmap(map) == pmap_kernel()) {
4291 /* assume a correct toaddr */
4292 *(uint64_t *)toaddr = value;
4293 } else if (current_map() == map) {
4294 if (copyout_atomic64(value, toaddr) != 0) {
4295 kr = KERN_INVALID_ADDRESS;
4296 }
4297 } else {
4298 vm_map_reference(map);
4299 oldmap = vm_map_switch(map);
4300 if (copyout_atomic64(value, toaddr) != 0) {
4301 kr = KERN_INVALID_ADDRESS;
4302 }
4303 vm_map_switch(oldmap);
4304 vm_map_deallocate(map);
4305 }
4306 return kr;
4307 }
4308
4309
4310 #pragma mark pointer obfuscation / packing
4311
4312 /*
4313 *
4314 * The following two functions are to be used when exposing kernel
4315 * addresses to userspace via any of the various debug or info
4316 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4317 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4318 * are exported to KEXTs.
4319 *
4320 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4321 */
4322
4323 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)4324 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4325 {
4326 assert(salt != 0);
4327
4328 if (addr == 0) {
4329 return 0ul;
4330 }
4331
4332 if (VM_KERNEL_IS_SLID(addr)) {
4333 return VM_KERNEL_UNSLIDE(addr);
4334 }
4335
4336 addr = VM_KERNEL_STRIP_UPTR(addr);
4337
4338 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4339 SHA256_CTX sha_ctx;
4340
4341 SHA256_Init(&sha_ctx);
4342 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
4343 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
4344 SHA256_Final(sha_digest, &sha_ctx);
4345
4346 return sha_digest[0];
4347 }
4348
4349 __exported vm_offset_t
4350 vm_kernel_addrhash_external(vm_offset_t addr);
4351 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)4352 vm_kernel_addrhash_external(vm_offset_t addr)
4353 {
4354 return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
4355 }
4356
4357 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)4358 vm_kernel_addrhide(
4359 vm_offset_t addr,
4360 vm_offset_t *hide_addr)
4361 {
4362 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
4363 }
4364
4365 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)4366 vm_kernel_addrperm_external(
4367 vm_offset_t addr,
4368 vm_offset_t *perm_addr)
4369 {
4370 if (VM_KERNEL_IS_SLID(addr)) {
4371 *perm_addr = VM_KERNEL_UNSLIDE(addr);
4372 } else if (VM_KERNEL_ADDRESS(addr)) {
4373 *perm_addr = addr + vm_kernel_addrperm_ext;
4374 } else {
4375 *perm_addr = addr;
4376 }
4377 }
4378
4379 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)4380 vm_kernel_unslide_or_perm_external(
4381 vm_offset_t addr,
4382 vm_offset_t *up_addr)
4383 {
4384 vm_kernel_addrperm_external(addr, up_addr);
4385 }
4386
4387 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)4388 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4389 {
4390 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4391 panic("pointer %p can't be packed: low %d bits aren't 0",
4392 (void *)ptr, params.vmpp_shift);
4393 } else if (ptr <= params.vmpp_base) {
4394 panic("pointer %p can't be packed: below base %p",
4395 (void *)ptr, (void *)params.vmpp_base);
4396 } else {
4397 panic("pointer %p can't be packed: maximum encodable pointer is %p",
4398 (void *)ptr, (void *)vm_packing_max_packable(params));
4399 }
4400 }
4401
4402 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)4403 vm_packing_verify_range(
4404 const char *subsystem,
4405 vm_offset_t min_address,
4406 vm_offset_t max_address,
4407 vm_packing_params_t params)
4408 {
4409 if (min_address > max_address) {
4410 panic("%s: %s range invalid min:%p > max:%p",
4411 __func__, subsystem, (void *)min_address, (void *)max_address);
4412 }
4413
4414 if (!params.vmpp_base_relative) {
4415 return;
4416 }
4417
4418 if (min_address <= params.vmpp_base) {
4419 panic("%s: %s range invalid min:%p <= base:%p",
4420 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4421 }
4422
4423 if (max_address > vm_packing_max_packable(params)) {
4424 panic("%s: %s range invalid max:%p >= max packable:%p",
4425 __func__, subsystem, (void *)max_address,
4426 (void *)vm_packing_max_packable(params));
4427 }
4428 }
4429
4430 #pragma mark tests
4431 #if MACH_ASSERT
4432 #include <sys/errno.h>
4433
4434 static void
4435 kmem_test_for_entry(
4436 vm_map_t map,
4437 vm_offset_t addr,
4438 void (^block)(vm_map_entry_t))
4439 {
4440 vm_map_entry_t entry;
4441
4442 vm_map_lock(map);
4443 block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4444 vm_map_unlock(map);
4445 }
4446
4447 #define kmem_test_assert_map(map, pg, entries) ({ \
4448 assert3u((map)->size, ==, ptoa(pg)); \
4449 assert3u((map)->hdr.nentries, ==, entries); \
4450 })
4451
4452 static bool
can_write_at(vm_offset_t offs,uint32_t page)4453 can_write_at(vm_offset_t offs, uint32_t page)
4454 {
4455 static const int zero;
4456
4457 return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4458 }
4459 #define assert_writeable(offs, page) \
4460 assertf(can_write_at(offs, page), \
4461 "can write at %p + ptoa(%d)", (void *)offs, page)
4462
4463 #define assert_faults(offs, page) \
4464 assertf(!can_write_at(offs, page), \
4465 "can write at %p + ptoa(%d)", (void *)offs, page)
4466
4467 #define peek(offs, page) \
4468 (*(uint32_t *)((offs) + ptoa(page)))
4469
4470 #define poke(offs, page, v) \
4471 (*(uint32_t *)((offs) + ptoa(page)) = (v))
4472
4473 __attribute__((noinline))
4474 static void
kmem_alloc_basic_test(vm_map_t map)4475 kmem_alloc_basic_test(vm_map_t map)
4476 {
4477 kmem_guard_t guard = {
4478 .kmg_tag = VM_KERN_MEMORY_DIAG,
4479 };
4480 vm_offset_t addr;
4481
4482 /*
4483 * Test wired basics:
4484 * - KMA_KOBJECT
4485 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4486 * - allocation alignment
4487 */
4488 addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4489 KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4490 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4491 assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4492 kmem_test_assert_map(map, 10, 1);
4493
4494 kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
4495 assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4496 assert(e->vme_kernel_object);
4497 assert(!e->vme_atomic);
4498 assert3u(e->vme_start, <=, addr);
4499 assert3u(addr + ptoa(10), <=, e->vme_end);
4500 });
4501
4502 assert_faults(addr, 0);
4503 for (int i = 1; i < 9; i++) {
4504 assert_writeable(addr, i);
4505 }
4506 assert_faults(addr, 9);
4507
4508 kmem_free(map, addr, ptoa(10));
4509 kmem_test_assert_map(map, 0, 0);
4510
4511 /*
4512 * Test pageable basics.
4513 */
4514 addr = kmem_alloc_guard(map, ptoa(10), 0,
4515 KMA_PAGEABLE, guard).kmr_address;
4516 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4517 kmem_test_assert_map(map, 10, 1);
4518
4519 for (int i = 0; i < 9; i++) {
4520 assert_faults(addr, i);
4521 poke(addr, i, 42);
4522 assert_writeable(addr, i);
4523 }
4524
4525 kmem_free(map, addr, ptoa(10));
4526 kmem_test_assert_map(map, 0, 0);
4527 }
4528
4529 __attribute__((noinline))
4530 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)4531 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4532 {
4533 kmem_guard_t guard = {
4534 .kmg_atomic = !(kind & KMR_DATA),
4535 .kmg_tag = VM_KERN_MEMORY_DIAG,
4536 .kmg_context = 0xefface,
4537 };
4538 vm_offset_t addr, newaddr;
4539 const int N = 10;
4540
4541 /*
4542 * This isn't something kmem_realloc_guard() _needs_ to do,
4543 * we could conceive an implementation where it grows in place
4544 * if there's space after it.
4545 *
4546 * However, this is what the implementation does today.
4547 */
4548 bool realloc_growth_changes_address = true;
4549 bool GL = (kind & KMR_GUARD_LAST);
4550
4551 /*
4552 * Initial N page allocation
4553 */
4554 addr = kmem_alloc_guard(map, ptoa(N), 0,
4555 (kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO,
4556 guard).kmr_address;
4557 assert3u(addr, !=, 0);
4558 kmem_test_assert_map(map, N, 1);
4559 for (int pg = 0; pg < N - GL; pg++) {
4560 poke(addr, pg, 42 + pg);
4561 }
4562 for (int pg = N - GL; pg < N; pg++) {
4563 assert_faults(addr, pg);
4564 }
4565
4566
4567 /*
4568 * Grow to N + 3 pages
4569 */
4570 newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4571 kind | KMR_ZERO, guard).kmr_address;
4572 assert3u(newaddr, !=, 0);
4573 if (realloc_growth_changes_address) {
4574 assert3u(addr, !=, newaddr);
4575 }
4576 if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4577 kmem_test_assert_map(map, N + 3, 1);
4578 } else {
4579 kmem_test_assert_map(map, 2 * N + 3, 2);
4580 }
4581 for (int pg = 0; pg < N - GL; pg++) {
4582 assert3u(peek(newaddr, pg), ==, 42 + pg);
4583 }
4584 if ((kind & KMR_FREEOLD) == 0) {
4585 for (int pg = 0; pg < N - GL; pg++) {
4586 assert3u(peek(addr, pg), ==, 42 + pg);
4587 }
4588 /* check for tru-share */
4589 poke(addr + 16, 0, 1234);
4590 assert3u(peek(newaddr + 16, 0), ==, 1234);
4591 kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
4592 kmem_test_assert_map(map, N + 3, 1);
4593 }
4594 if (addr != newaddr) {
4595 for (int pg = 0; pg < N - GL; pg++) {
4596 assert_faults(addr, pg);
4597 }
4598 }
4599 for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4600 assert3u(peek(newaddr, pg), ==, 0);
4601 }
4602 for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4603 assert_faults(newaddr, pg);
4604 }
4605 addr = newaddr;
4606
4607
4608 /*
4609 * Shrink to N - 2 pages
4610 */
4611 newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4612 kind | KMR_ZERO, guard).kmr_address;
4613 assert3u(map->size, ==, ptoa(N - 2));
4614 assert3u(newaddr, ==, addr);
4615 kmem_test_assert_map(map, N - 2, 1);
4616
4617 for (int pg = 0; pg < N - 2 - GL; pg++) {
4618 assert3u(peek(addr, pg), ==, 42 + pg);
4619 }
4620 for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4621 assert_faults(addr, pg);
4622 }
4623
4624 kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
4625 kmem_test_assert_map(map, 0, 0);
4626 }
4627
4628 static int
kmem_basic_test(__unused int64_t in,int64_t * out)4629 kmem_basic_test(__unused int64_t in, int64_t *out)
4630 {
4631 mach_vm_offset_t addr;
4632 vm_map_t map;
4633
4634 printf("%s: test running\n", __func__);
4635
4636 map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4637 VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4638 KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
4639
4640 printf("%s: kmem_alloc ...\n", __func__);
4641 kmem_alloc_basic_test(map);
4642 printf("%s: PASS\n", __func__);
4643
4644 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
4645 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
4646 printf("%s: PASS\n", __func__);
4647
4648 printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
4649 kmem_realloc_basic_test(map, KMR_FREEOLD);
4650 printf("%s: PASS\n", __func__);
4651
4652 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4653 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
4654 printf("%s: PASS\n", __func__);
4655
4656 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4657 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
4658 printf("%s: PASS\n", __func__);
4659
4660 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4661 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4662 printf("%s: PASS\n", __func__);
4663
4664 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4665 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
4666 printf("%s: PASS\n", __func__);
4667
4668 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4669 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
4670 printf("%s: PASS\n", __func__);
4671
4672 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4673 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4674 printf("%s: PASS\n", __func__);
4675
4676 /* using KMR_DATA signals to test the non atomic realloc path */
4677 printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
4678 kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
4679 printf("%s: PASS\n", __func__);
4680
4681 printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
4682 kmem_realloc_basic_test(map, KMR_DATA);
4683 printf("%s: PASS\n", __func__);
4684
4685 kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
4686 vm_map_deallocate(map);
4687
4688 printf("%s: test passed\n", __func__);
4689 *out = 1;
4690 return 0;
4691 }
4692 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
4693
4694 static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)4695 kmem_test_get_size_idx_for_chunks(uint32_t chunks)
4696 {
4697 __assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
4698
4699 assert(chunks >= kmem_size_array[idx].ks_num_chunk);
4700 }
4701
4702 __attribute__((noinline))
4703 static void
kmem_test_get_size_idx_for_all_chunks()4704 kmem_test_get_size_idx_for_all_chunks()
4705 {
4706 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
4707 uint32_t chunks = kmem_size_array[i].ks_num_chunk;
4708
4709 if (chunks != 1) {
4710 kmem_test_get_size_idx_for_chunks(chunks - 1);
4711 }
4712 kmem_test_get_size_idx_for_chunks(chunks);
4713 kmem_test_get_size_idx_for_chunks(chunks + 1);
4714 }
4715 }
4716
4717 static int
kmem_guard_obj_test(__unused int64_t in,int64_t * out)4718 kmem_guard_obj_test(__unused int64_t in, int64_t *out)
4719 {
4720 printf("%s: test running\n", __func__);
4721
4722 printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
4723 kmem_test_get_size_idx_for_all_chunks();
4724 printf("%s: PASS\n", __func__);
4725
4726 printf("%s: test passed\n", __func__);
4727 *out = 1;
4728 return 0;
4729 }
4730 SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
4731 #endif /* MACH_ASSERT */
4732