1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_init.h>
77 #include <vm/vm_fault.h>
78 #include <kern/misc_protos.h>
79 #include <vm/cpm.h>
80 #include <kern/ledger.h>
81 #include <kern/bits.h>
82 #include <kern/startup.h>
83
84 #include <string.h>
85
86 #include <libkern/OSDebug.h>
87 #include <libkern/crypto/sha2.h>
88 #include <libkern/section_keywords.h>
89 #include <sys/kdebug.h>
90
91 #include <san/kasan.h>
92 #include <kern/kext_alloc.h>
93 #include <kern/backtrace.h>
94 #include <os/hash.h>
95 #include <kern/zalloc_internal.h>
96
97 /*
98 * Variables exported by this module.
99 */
100
101 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
102 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT] = {};
103 TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges", 2);
104 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
105 __startup_data
106 vm_map_size_t data_range_size, ptr_range_size;
107 SECURITY_READ_ONLY_LATE(struct mach_vm_range)
108 kmem_large_ranges[KMEM_RANGE_COUNT] = {};
109 #endif
110
111 #pragma mark helpers
112
113 __attribute__((overloadable))
114 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)115 ANYF(kma_flags_t flags)
116 {
117 return (kmem_flags_t)flags;
118 }
119
120 __attribute__((overloadable))
121 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)122 ANYF(kmr_flags_t flags)
123 {
124 return (kmem_flags_t)flags;
125 }
126
127 __attribute__((overloadable))
128 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)129 ANYF(kmf_flags_t flags)
130 {
131 return (kmem_flags_t)flags;
132 }
133
134 __abortlike
135 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)136 __kmem_invalid_size_panic(
137 vm_map_t map,
138 vm_size_t size,
139 uint32_t flags)
140 {
141 panic("kmem(map=%p, flags=0x%x): invalid size %zd",
142 map, flags, (size_t)size);
143 }
144
145 __abortlike
146 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)147 __kmem_invalid_arguments_panic(
148 const char *what,
149 vm_map_t map,
150 vm_address_t address,
151 vm_size_t size,
152 uint32_t flags)
153 {
154 panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
155 "invalid arguments passed",
156 what, map, (void *)address, (size_t)size, flags);
157 }
158
159 __abortlike
160 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)161 __kmem_failed_panic(
162 vm_map_t map,
163 vm_size_t size,
164 uint32_t flags,
165 kern_return_t kr,
166 const char *what)
167 {
168 panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
169 what, map, (size_t)size, flags, kr);
170 }
171
172 __abortlike
173 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)174 __kmem_entry_not_found_panic(
175 vm_map_t map,
176 vm_offset_t addr)
177 {
178 panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
179 }
180
181 __abortlike
182 static void
__kmem_invalid_object_panic(uint32_t flags)183 __kmem_invalid_object_panic(uint32_t flags)
184 {
185 if (flags == 0) {
186 panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
187 }
188 panic("more than one of KMEM_KOBJECT or KMEM_COMPRESSOR specified");
189 }
190
191 static inline vm_object_t
__kmem_object(kmem_flags_t flags)192 __kmem_object(kmem_flags_t flags)
193 {
194 flags &= (KMEM_KOBJECT | KMEM_COMPRESSOR);
195 if (flags == 0 || (flags & (flags - 1))) {
196 __kmem_invalid_object_panic(flags);
197 }
198
199 return (flags & KMEM_KOBJECT) ? kernel_object : compressor_object;
200 }
201
202 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)203 __kmem_guard_left(kmem_flags_t flags)
204 {
205 return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
206 }
207
208 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)209 __kmem_guard_right(kmem_flags_t flags)
210 {
211 return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
212 }
213
214 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)215 __kmem_guard_size(kmem_flags_t flags)
216 {
217 return __kmem_guard_left(flags) + __kmem_guard_right(flags);
218 }
219
220
221 #pragma mark kmem range methods
222
223 #if __arm64__
224 // <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
225 #define mach_vm_range_load(r, r_min, r_max) \
226 asm("ldp %[rmin], %[rmax], [%[range]]" \
227 : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
228 : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
229 #else
230 #define mach_vm_range_load(r, rmin, rmax) \
231 ({ rmin = (r)->min_address; rmax = (r)->max_address; })
232 #endif
233
234 __abortlike
235 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)236 __mach_vm_range_overflow(
237 mach_vm_offset_t addr,
238 mach_vm_offset_t size)
239 {
240 panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
241 addr, addr, size);
242 }
243
244 __abortlike
245 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)246 __mach_vm_range_invalid(
247 mach_vm_offset_t min_address,
248 mach_vm_offset_t max_address)
249 {
250 panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
251 min_address, max_address);
252 }
253
254 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)255 mach_vm_range_size(const struct mach_vm_range *r)
256 {
257 mach_vm_offset_t rmin, rmax;
258
259 mach_vm_range_load(r, rmin, rmax);
260 return rmax - rmin;
261 }
262
263 __attribute__((overloadable))
264 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)265 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
266 {
267 mach_vm_offset_t rmin, rmax;
268
269 #if CONFIG_KERNEL_TBI
270 if (VM_KERNEL_ADDRESS(addr)) {
271 addr = VM_KERNEL_TBI_FILL(addr);
272 }
273 #endif /* CONFIG_KERNEL_TBI */
274
275 /*
276 * The `&` is not a typo: we really expect the check to pass,
277 * so encourage the compiler to eagerly load and test without branches
278 */
279 mach_vm_range_load(r, rmin, rmax);
280 return (addr >= rmin) & (addr < rmax);
281 }
282
283 __attribute__((overloadable))
284 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)285 mach_vm_range_contains(
286 const struct mach_vm_range *r,
287 mach_vm_offset_t addr,
288 mach_vm_offset_t size)
289 {
290 mach_vm_offset_t rmin, rmax;
291
292 #if CONFIG_KERNEL_TBI
293 if (VM_KERNEL_ADDRESS(addr)) {
294 addr = VM_KERNEL_TBI_FILL(addr);
295 }
296 #endif /* CONFIG_KERNEL_TBI */
297
298 /*
299 * The `&` is not a typo: we really expect the check to pass,
300 * so encourage the compiler to eagerly load and test without branches
301 */
302 mach_vm_range_load(r, rmin, rmax);
303 return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
304 }
305
306 __attribute__((overloadable))
307 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)308 mach_vm_range_intersects(
309 const struct mach_vm_range *r1,
310 const struct mach_vm_range *r2)
311 {
312 mach_vm_offset_t r1_min, r1_max;
313 mach_vm_offset_t r2_min, r2_max;
314
315 mach_vm_range_load(r1, r1_min, r1_max);
316 r2_min = r2->min_address;
317 r2_max = r2->max_address;
318
319 if (r1_min > r1_max) {
320 __mach_vm_range_invalid(r1_min, r1_max);
321 }
322
323 if (r2_min > r2_max) {
324 __mach_vm_range_invalid(r2_min, r2_max);
325 }
326
327 return r1_max > r2_min && r1_min < r2_max;
328 }
329
330 __attribute__((overloadable))
331 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)332 mach_vm_range_intersects(
333 const struct mach_vm_range *r1,
334 mach_vm_offset_t addr,
335 mach_vm_offset_t size)
336 {
337 struct mach_vm_range r2;
338
339 #if CONFIG_KERNEL_TBI
340 addr = VM_KERNEL_STRIP_UPTR(addr);
341 #endif /* CONFIG_KERNEL_TBI */
342 r2.min_address = addr;
343 if (os_add_overflow(addr, size, &r2.max_address)) {
344 __mach_vm_range_overflow(addr, size);
345 }
346
347 return mach_vm_range_intersects(r1, &r2);
348 }
349
350 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)351 kmem_range_id_contains(
352 kmem_range_id_t range_id,
353 vm_map_offset_t addr,
354 vm_map_size_t size)
355 {
356 return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
357 }
358
359 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)360 kmem_range_id_size(kmem_range_id_t range_id)
361 {
362 return mach_vm_range_size(&kmem_ranges[range_id]);
363 }
364
365 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)366 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
367 {
368 kmem_range_id_t range_id = 0;
369 for (; range_id < KMEM_RANGE_COUNT; range_id++) {
370 if (kmem_range_id_contains(range_id, addr, size)) {
371 break;
372 }
373 }
374 return range_id;
375 }
376
377
378 #pragma mark entry parameters
379
380
381 __abortlike
382 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)383 __kmem_entry_validate_panic(
384 vm_map_t map,
385 vm_map_entry_t entry,
386 vm_offset_t addr,
387 vm_size_t size,
388 uint32_t flags,
389 kmem_guard_t guard)
390 {
391 const char *what = "???";
392
393 if (entry->vme_atomic != guard.kmg_atomic) {
394 what = "atomicity";
395 } else if (entry->is_sub_map != guard.kmg_submap) {
396 what = "objectness";
397 } else if (addr != entry->vme_start) {
398 what = "left bound";
399 } else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
400 what = "right bound";
401 #if __LP64__
402 } else if (guard.kmg_context != entry->vme_context) {
403 what = "guard";
404 #endif
405 }
406
407 panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
408 "entry:%p %s mismatch guard(0x%08x)",
409 map, (void *)addr, size, flags, entry,
410 what, guard.kmg_context);
411 }
412
413 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)414 __kmem_entry_validate_guard(
415 vm_map_entry_t entry,
416 vm_offset_t addr,
417 vm_size_t size,
418 kmem_flags_t flags,
419 kmem_guard_t guard)
420 {
421 if (entry->vme_atomic != guard.kmg_atomic) {
422 return false;
423 }
424
425 if (!guard.kmg_atomic) {
426 return true;
427 }
428
429 if (entry->is_sub_map != guard.kmg_submap) {
430 return false;
431 }
432
433 if (addr != entry->vme_start) {
434 return false;
435 }
436
437 if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
438 return false;
439 }
440
441 #if __LP64__
442 if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
443 return false;
444 }
445 #endif
446
447 return true;
448 }
449
450 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)451 kmem_entry_validate_guard(
452 vm_map_t map,
453 vm_map_entry_t entry,
454 vm_offset_t addr,
455 vm_size_t size,
456 kmem_guard_t guard)
457 {
458 if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
459 __kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
460 }
461 }
462
463 __abortlike
464 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)465 __kmem_entry_validate_object_panic(
466 vm_map_t map,
467 vm_map_entry_t entry,
468 kmem_flags_t flags)
469 {
470 const char *what;
471 const char *verb;
472
473 if (entry->is_sub_map) {
474 panic("kmem(map=%p) entry %p is a submap", map, entry);
475 }
476
477 if (flags & KMEM_KOBJECT) {
478 what = "kernel";
479 verb = "isn't";
480 } else if (flags & KMEM_COMPRESSOR) {
481 what = "compressor";
482 verb = "isn't";
483 } else if (entry->vme_kernel_object) {
484 what = "kernel";
485 verb = "is unexpectedly";
486 } else {
487 what = "compressor";
488 verb = "is unexpectedly";
489 }
490
491 panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
492 map, flags, entry, verb, what);
493 }
494
495 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)496 __kmem_entry_validate_object(
497 vm_map_entry_t entry,
498 kmem_flags_t flags)
499 {
500 if (entry->is_sub_map) {
501 return false;
502 }
503 if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
504 return false;
505 }
506
507 return (bool)(flags & KMEM_COMPRESSOR) ==
508 (VME_OBJECT(entry) == compressor_object);
509 }
510
511 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)512 kmem_size_guard(
513 vm_map_t map,
514 vm_offset_t addr,
515 kmem_guard_t guard)
516 {
517 kmem_flags_t flags = KMEM_GUESS_SIZE;
518 vm_map_entry_t entry;
519 vm_size_t size;
520
521 vm_map_lock_read(map);
522
523 if (!vm_map_lookup_entry(map, addr, &entry)) {
524 __kmem_entry_not_found_panic(map, addr);
525 }
526
527 if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
528 __kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
529 }
530
531 size = (vm_size_t)(entry->vme_end - entry->vme_start);
532
533 vm_map_unlock_read(map);
534
535 return size;
536 }
537
538 #if ZSECURITY_CONFIG(KALLOC_TYPE)
539 static inline uint16_t
kmem_hash_backtrace(void * fp)540 kmem_hash_backtrace(
541 void *fp)
542 {
543 uint64_t bt_count;
544 uintptr_t bt[8] = {};
545
546 struct backtrace_control ctl = {
547 .btc_frame_addr = (uintptr_t)fp,
548 };
549
550 bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
551 return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
552 }
553 #endif
554
555 static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
556 "Insufficient bits to represent ptr ranges");
557
558 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)559 kmem_adjust_range_id(
560 uint32_t hash)
561 {
562 return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
563 (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
564 }
565
566 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)567 kmem_apply_security_policy(
568 vm_map_t map,
569 kma_flags_t kma_flags,
570 kmem_guard_t guard,
571 vm_map_kernel_flags_t *vmk_flags,
572 bool assert_dir __unused)
573 {
574 kmem_range_id_t range_id;
575 bool direction;
576 uint16_t type_hash = guard.kmg_type_hash;
577
578 if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
579 return;
580 }
581
582 /*
583 * When ZSECURITY_CONFIG(KALLOC_TYPE) is enabled, a non-zero type-hash
584 * must be passed by krealloc_type
585 */
586 #if (DEBUG || DEVELOPMENT) && ZSECURITY_CONFIG(KALLOC_TYPE)
587 if (assert_dir && !(kma_flags & KMA_DATA)) {
588 assert(type_hash != 0);
589 }
590 #endif
591
592 if (kma_flags & KMA_DATA) {
593 range_id = KMEM_RANGE_ID_DATA;
594 /*
595 * As an optimization in KMA_DATA to avoid fragmentation,
596 * allocate static carveouts at the end of the DATA range.
597 */
598 direction = (bool)(kma_flags & KMA_PERMANENT);
599 } else if (type_hash) {
600 range_id = type_hash & KMEM_RANGE_MASK;
601 direction = type_hash & KMEM_DIRECTION_MASK;
602 } else {
603 #if ZSECURITY_CONFIG(KALLOC_TYPE)
604 type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
605 #endif
606 /*
607 * Range id needs to correspond to one of the PTR ranges
608 */
609 range_id = kmem_adjust_range_id(type_hash);
610 direction = type_hash & KMEM_DIRECTION_MASK;
611 }
612
613 vmk_flags->vmkf_range_id = range_id;
614 vmk_flags->vmkf_last_free = direction;
615 }
616
617 #pragma mark allocation
618
619 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)620 kmem_alloc_contig(
621 vm_map_t map,
622 vm_offset_t *addrp,
623 vm_size_t size,
624 vm_offset_t mask,
625 ppnum_t max_pnum,
626 ppnum_t pnum_mask,
627 kma_flags_t flags,
628 vm_tag_t tag)
629 {
630 vm_object_t object;
631 vm_object_offset_t offset;
632 vm_map_offset_t map_addr;
633 vm_map_offset_t map_mask;
634 vm_map_size_t map_size, i;
635 vm_map_entry_t entry;
636 vm_page_t m, pages;
637 kern_return_t kr;
638 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
639
640 assert(VM_KERN_MEMORY_NONE != tag);
641 assert(map);
642 assert3u(flags & ~KMEM_ALLOC_CONTIG_FLAGS, ==, 0);
643
644 map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
645 map_mask = (vm_map_offset_t)mask;
646
647 /* Check for zero allocation size (either directly or via overflow) */
648 if (map_size == 0) {
649 *addrp = 0;
650 return KERN_INVALID_ARGUMENT;
651 }
652
653 /*
654 * Allocate a new object (if necessary) and the reference we
655 * will be donating to the map entry. We must do this before
656 * locking the map, or risk deadlock with the default pager.
657 */
658 if ((flags & KMA_KOBJECT) != 0) {
659 object = kernel_object;
660 vm_object_reference(object);
661 } else {
662 object = vm_object_allocate(map_size);
663 /* stabilize the object to prevent shadowing */
664 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
665 object->true_share = TRUE;
666 }
667 if (flags & KMA_PERMANENT) {
668 vmk_flags.vmkf_permanent = true;
669 }
670 kmem_apply_security_policy(map, flags, KMEM_GUARD_NONE, &vmk_flags, false);
671
672 kr = vm_map_find_space(map, 0, map_size, map_mask,
673 vmk_flags, &entry);
674 if (KERN_SUCCESS != kr) {
675 vm_object_deallocate(object);
676 return kr;
677 }
678
679 map_addr = entry->vme_start;
680 if (object == kernel_object) {
681 offset = map_addr;
682 } else {
683 offset = 0;
684 }
685 VME_OBJECT_SET(entry, object, false, 0);
686 VME_OFFSET_SET(entry, offset);
687 VME_ALIAS_SET(entry, tag);
688
689 /* Take an extra object ref in case the map entry gets deleted */
690 vm_object_reference(object);
691 vm_map_unlock(map);
692
693 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
694
695 if (kr != KERN_SUCCESS) {
696 vm_map_remove(map,
697 vm_map_trunc_page(map_addr,
698 VM_MAP_PAGE_MASK(map)),
699 vm_map_round_page(map_addr + map_size,
700 VM_MAP_PAGE_MASK(map)));
701 vm_object_deallocate(object);
702 *addrp = 0;
703 return kr;
704 }
705
706 if (flags & KMA_ZERO) {
707 for (m = pages; m; m = NEXT_PAGE(m)) {
708 vm_page_zero_fill(m);
709 }
710 }
711
712
713 vm_object_lock(object);
714 for (i = 0; i < map_size; i += PAGE_SIZE) {
715 m = pages;
716 pages = NEXT_PAGE(m);
717 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
718 m->vmp_busy = FALSE;
719 vm_page_insert(m, object, offset + i);
720 }
721 vm_object_unlock(object);
722
723 kr = vm_map_wire_kernel(map,
724 vm_map_trunc_page(map_addr,
725 VM_MAP_PAGE_MASK(map)),
726 vm_map_round_page(map_addr + map_size,
727 VM_MAP_PAGE_MASK(map)),
728 VM_PROT_DEFAULT, tag,
729 FALSE);
730
731 if (kr != KERN_SUCCESS) {
732 if (object == kernel_object) {
733 vm_object_lock(object);
734 vm_object_page_remove(object, offset, offset + map_size);
735 vm_object_unlock(object);
736 }
737 vm_map_remove(map,
738 vm_map_trunc_page(map_addr,
739 VM_MAP_PAGE_MASK(map)),
740 vm_map_round_page(map_addr + map_size,
741 VM_MAP_PAGE_MASK(map)));
742 vm_object_deallocate(object);
743 return kr;
744 }
745 vm_object_deallocate(object);
746
747 if (object == kernel_object) {
748 vm_map_simplify(map, map_addr);
749 vm_tag_update_size(tag, map_size);
750 }
751 *addrp = (vm_offset_t) map_addr;
752 assert((vm_map_offset_t) *addrp == map_addr);
753
754 return KERN_SUCCESS;
755 }
756
757 kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)758 kmem_alloc_guard(
759 vm_map_t map,
760 vm_size_t size,
761 vm_offset_t mask,
762 kma_flags_t flags,
763 kmem_guard_t guard)
764 {
765 vm_object_t object;
766 vm_map_entry_t entry = NULL;
767 vm_map_offset_t map_addr, fill_start;
768 vm_map_size_t map_size, fill_size;
769 vm_page_t guard_left = VM_PAGE_NULL;
770 vm_page_t guard_right = VM_PAGE_NULL;
771 vm_page_t wired_page_list = VM_PAGE_NULL;
772 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
773 bool need_guards;
774 kmem_return_t kmr = { };
775
776 assert(kernel_map && map->pmap == kernel_pmap);
777
778 #if DEBUG || DEVELOPMENT
779 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
780 size, 0, 0, 0);
781 #endif
782
783 if (size == 0 ||
784 (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
785 (size < __kmem_guard_size(ANYF(flags)))) {
786 __kmem_invalid_size_panic(map, size, flags);
787 }
788
789 /*
790 * limit the size of a single extent of wired memory
791 * to try and limit the damage to the system if
792 * too many pages get wired down
793 * limit raised to 2GB with 128GB max physical limit,
794 * but scaled by installed memory above this
795 */
796 if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
797 size > MAX(1ULL << 31, sane_size / 64))) {
798 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
799 goto out_error;
800 }
801
802 /*
803 * Guard pages:
804 *
805 * Guard pages are implemented as fictitious pages.
806 *
807 * However, some maps, and some objects are known
808 * to manage their memory explicitly, and do not need
809 * those to be materialized, which saves memory.
810 *
811 * By placing guard pages on either end of a stack,
812 * they can help detect cases where a thread walks
813 * off either end of its stack.
814 *
815 * They are allocated and set up here and attempts
816 * to access those pages are trapped in vm_fault_page().
817 *
818 * The map_size we were passed may include extra space for
819 * guard pages. fill_size represents the actual size to populate.
820 * Similarly, fill_start indicates where the actual pages
821 * will begin in the range.
822 */
823
824 map_size = round_page(size);
825 fill_start = 0;
826 fill_size = map_size - __kmem_guard_size(ANYF(flags));
827
828 need_guards = flags & (KMA_KOBJECT | KMA_COMPRESSOR) ||
829 !map->never_faults;
830
831 if (flags & KMA_GUARD_FIRST) {
832 vmk_flags.vmkf_guard_before = true;
833 fill_start += PAGE_SIZE;
834 }
835 if ((flags & KMA_GUARD_FIRST) && need_guards) {
836 guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
837 if (__improbable(guard_left == VM_PAGE_NULL)) {
838 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
839 goto out_error;
840 }
841 }
842 if ((flags & KMA_GUARD_LAST) && need_guards) {
843 guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
844 if (__improbable(guard_right == VM_PAGE_NULL)) {
845 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
846 goto out_error;
847 }
848 }
849
850 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
851 kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
852 &wired_page_list);
853 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
854 goto out_error;
855 }
856 }
857
858 /*
859 * Allocate a new object (if necessary). We must do this before
860 * locking the map, or risk deadlock with the default pager.
861 */
862 if (flags & KMA_KOBJECT) {
863 object = kernel_object;
864 vm_object_reference(object);
865 } else if (flags & KMA_COMPRESSOR) {
866 object = compressor_object;
867 vm_object_reference(object);
868 } else {
869 object = vm_object_allocate(map_size);
870 /* stabilize the object to prevent shadowing */
871 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
872 object->true_share = TRUE;
873 }
874
875 if (flags & KMA_LAST_FREE) {
876 vmk_flags.vmkf_last_free = true;
877 }
878 if (flags & KMA_PERMANENT) {
879 vmk_flags.vmkf_permanent = true;
880 }
881 kmem_apply_security_policy(map, flags, guard, &vmk_flags, false);
882
883 kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
884 vmk_flags, &entry);
885 if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
886 vm_object_deallocate(object);
887 goto out_error;
888 }
889
890 map_addr = entry->vme_start;
891 VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
892 VME_ALIAS_SET(entry, guard.kmg_tag);
893 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
894 VME_OFFSET_SET(entry, map_addr);
895 } else {
896 vm_object_reference(object);
897 }
898
899 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
900 entry->wired_count = 1;
901 }
902
903 if (guard_left || guard_right || wired_page_list) {
904 vm_object_offset_t offset = 0ull;
905
906 vm_object_lock(object);
907 vm_map_unlock(map);
908
909 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
910 offset = map_addr;
911 }
912
913 if (guard_left) {
914 vm_page_insert(guard_left, object, offset);
915 guard_left->vmp_busy = FALSE;
916 guard_left = VM_PAGE_NULL;
917 }
918
919 if (guard_right) {
920 vm_page_insert(guard_right, object,
921 offset + fill_start + fill_size);
922 guard_right->vmp_busy = FALSE;
923 guard_right = VM_PAGE_NULL;
924 }
925
926 if (wired_page_list) {
927 kernel_memory_populate_object_and_unlock(object,
928 map_addr + fill_start, offset + fill_start, fill_size,
929 wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT);
930 } else {
931 vm_object_unlock(object);
932 }
933 } else {
934 vm_map_unlock(map);
935 }
936
937 #if KASAN
938 if (flags & KMA_PAGEABLE) {
939 /*
940 * We need to allow the range for pageable memory,
941 * or faulting will not be allowed.
942 */
943 kasan_notify_address(map_addr, map_size);
944 }
945 #endif
946 /*
947 * now that the pages are wired, we no longer have to fear coalesce
948 */
949 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
950 vm_map_simplify(map, map_addr);
951 } else {
952 vm_object_deallocate(object);
953 }
954
955 #if DEBUG || DEVELOPMENT
956 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
957 atop(fill_size), 0, 0, 0);
958 #endif
959 kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
960 return kmr;
961
962 out_error:
963 if (flags & KMA_NOFAIL) {
964 __kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
965 }
966 if (guard_left) {
967 guard_left->vmp_snext = wired_page_list;
968 wired_page_list = guard_left;
969 }
970 if (guard_right) {
971 guard_right->vmp_snext = wired_page_list;
972 wired_page_list = guard_right;
973 }
974 if (wired_page_list) {
975 vm_page_free_list(wired_page_list, FALSE);
976 }
977
978 #if DEBUG || DEVELOPMENT
979 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
980 0, 0, 0, 0);
981 #endif
982
983 return kmr;
984 }
985
986 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)987 kmem_suballoc(
988 vm_map_t parent,
989 mach_vm_offset_t *addr,
990 vm_size_t size,
991 vm_map_create_options_t vmc_options,
992 int vm_flags,
993 kms_flags_t flags,
994 vm_tag_t tag)
995 {
996 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
997 vm_map_offset_t map_addr = 0;
998 kmem_return_t kmr = { };
999 vm_map_t map;
1000
1001 assert(page_aligned(size));
1002 assert(parent->pmap == kernel_pmap);
1003
1004 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1005 if (parent == kernel_map) {
1006 assert((vm_flags & VM_FLAGS_FIXED_RANGE_SUBALLOC) ||
1007 (flags & KMS_DATA));
1008 }
1009 #endif /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1010
1011 if ((vm_flags & VM_FLAGS_ANYWHERE) == 0) {
1012 map_addr = trunc_page(*addr);
1013 }
1014
1015 pmap_reference(vm_map_pmap(parent));
1016 map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1017
1018 /*
1019 * 1. vm_map_enter() will consume one ref on success.
1020 *
1021 * 2. make the entry atomic as kernel submaps should never be split.
1022 *
1023 * 3. instruct vm_map_enter() that it is a fresh submap
1024 * that needs to be taught its bounds as it inserted.
1025 */
1026 vm_map_reference(map);
1027 vmk_flags.vmkf_submap = true;
1028 if ((flags & KMS_DATA) == 0) {
1029 /* FIXME: IOKit submaps get fragmented and can't be atomic */
1030 vmk_flags.vmkf_submap_atomic = true;
1031 }
1032 vmk_flags.vmkf_submap_adjust = true;
1033 if (flags & KMS_LAST_FREE) {
1034 vmk_flags.vmkf_last_free = true;
1035 }
1036 if (flags & KMS_PERMANENT) {
1037 vmk_flags.vmkf_permanent = true;
1038 }
1039 if (flags & KMS_DATA) {
1040 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1041 }
1042
1043 kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1044 vm_flags, vmk_flags, tag, (vm_object_t)map, 0, FALSE,
1045 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1046
1047 if (kmr.kmr_return != KERN_SUCCESS) {
1048 if (flags & KMS_NOFAIL) {
1049 panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1050 parent, size, kmr.kmr_return);
1051 }
1052 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1053 vm_map_deallocate(map);
1054 vm_map_deallocate(map); /* also removes ref to pmap */
1055 return kmr;
1056 }
1057
1058 /*
1059 * For kmem_suballocs that register a claim and are assigned a range, ensure
1060 * that the exact same range is returned.
1061 */
1062 if (*addr != 0 && parent == kernel_map &&
1063 startup_phase > STARTUP_SUB_KMEM) {
1064 assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1065 } else {
1066 *addr = map_addr;
1067 }
1068
1069 kmr.kmr_submap = map;
1070 return kmr;
1071 }
1072
1073 /*
1074 * kmem_alloc:
1075 *
1076 * Allocate wired-down memory in the kernel's address map
1077 * or a submap. The memory is not zero-filled.
1078 */
1079
1080 __exported kern_return_t
1081 kmem_alloc_external(
1082 vm_map_t map,
1083 vm_offset_t *addrp,
1084 vm_size_t size);
1085 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1086 kmem_alloc_external(
1087 vm_map_t map,
1088 vm_offset_t *addrp,
1089 vm_size_t size)
1090 {
1091 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1092 return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1093 }
1094 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1095 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1096 }
1097
1098
1099 /*
1100 * kmem_alloc_kobject:
1101 *
1102 * Allocate wired-down memory in the kernel's address map
1103 * or a submap. The memory is not zero-filled.
1104 *
1105 * The memory is allocated in the kernel_object.
1106 * It may not be copied with vm_map_copy, and
1107 * it may not be reallocated with kmem_realloc.
1108 */
1109
1110 __exported kern_return_t
1111 kmem_alloc_kobject_external(
1112 vm_map_t map,
1113 vm_offset_t *addrp,
1114 vm_size_t size);
1115 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1116 kmem_alloc_kobject_external(
1117 vm_map_t map,
1118 vm_offset_t *addrp,
1119 vm_size_t size)
1120 {
1121 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1122 return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1123 }
1124 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1125 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1126 }
1127
1128 /*
1129 * kmem_alloc_pageable:
1130 *
1131 * Allocate pageable memory in the kernel's address map.
1132 */
1133
1134 __exported kern_return_t
1135 kmem_alloc_pageable_external(
1136 vm_map_t map,
1137 vm_offset_t *addrp,
1138 vm_size_t size);
1139 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1140 kmem_alloc_pageable_external(
1141 vm_map_t map,
1142 vm_offset_t *addrp,
1143 vm_size_t size)
1144 {
1145 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1146 return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
1147 }
1148 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1149 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1150 }
1151
1152
1153 #pragma mark population
1154
1155 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags)1156 kernel_memory_populate_pmap_enter(
1157 vm_object_t object,
1158 vm_address_t addr,
1159 vm_object_offset_t offset,
1160 vm_page_t mem,
1161 vm_prot_t prot,
1162 int pe_flags)
1163 {
1164 kern_return_t pe_result;
1165 int pe_options;
1166
1167 PMAP_ENTER_CHECK(kernel_pmap, mem);
1168
1169 pe_options = PMAP_OPTIONS_NOWAIT;
1170 if (object->internal) {
1171 pe_options |= PMAP_OPTIONS_INTERNAL;
1172 }
1173 if (mem->vmp_reusable || object->all_reusable) {
1174 pe_options |= PMAP_OPTIONS_REUSABLE;
1175 }
1176
1177 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1178 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1179 pe_flags, /* wired */ TRUE, pe_options, NULL);
1180
1181 if (pe_result == KERN_RESOURCE_SHORTAGE) {
1182 vm_object_unlock(object);
1183
1184 pe_options &= ~PMAP_OPTIONS_NOWAIT;
1185
1186 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1187 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1188 pe_flags, /* wired */ TRUE, pe_options, NULL);
1189
1190 vm_object_lock(object);
1191 }
1192
1193 assert(pe_result == KERN_SUCCESS);
1194 }
1195
1196 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)1197 kernel_memory_populate_object_and_unlock(
1198 vm_object_t object, /* must be locked */
1199 vm_address_t addr,
1200 vm_offset_t offset,
1201 vm_size_t size,
1202 vm_page_t page_list,
1203 kma_flags_t flags,
1204 vm_tag_t tag,
1205 vm_prot_t prot)
1206 {
1207 vm_page_t mem;
1208 int pe_flags;
1209
1210 assert3u((bool)(flags & KMA_KOBJECT), ==, object == kernel_object);
1211 assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1212 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1213 assert3u(offset, ==, addr);
1214 }
1215
1216 if (flags & KMA_KSTACK) {
1217 pe_flags = VM_MEM_STACK;
1218 } else {
1219 pe_flags = 0;
1220 }
1221
1222 for (vm_object_offset_t pg_offset = 0;
1223 pg_offset < size;
1224 pg_offset += PAGE_SIZE_64) {
1225 if (page_list == NULL) {
1226 panic("%s: page_list too short", __func__);
1227 }
1228
1229 mem = page_list;
1230 page_list = mem->vmp_snext;
1231 mem->vmp_snext = NULL;
1232
1233 assert(mem->vmp_wire_count == 0);
1234 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1235
1236 if (flags & KMA_COMPRESSOR) {
1237 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1238 /*
1239 * Background processes doing I/O accounting can call
1240 * into NVME driver to do some work which results in
1241 * an allocation here and so we want to make sure
1242 * that the pages used by compressor, regardless of
1243 * process context, are never on the special Q.
1244 */
1245 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1246
1247 vm_page_insert(mem, object, offset + pg_offset);
1248 } else {
1249 mem->vmp_q_state = VM_PAGE_IS_WIRED;
1250 mem->vmp_wire_count = 1;
1251
1252 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1253 }
1254
1255 mem->vmp_busy = false;
1256 mem->vmp_pmapped = true;
1257 mem->vmp_wpmapped = true;
1258
1259 /*
1260 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1261 * for the kernel and compressor objects.
1262 */
1263
1264 kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1265 mem, prot, pe_flags);
1266
1267 if (flags & KMA_NOENCRYPT) {
1268 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1269 }
1270 }
1271
1272 if (page_list) {
1273 panic("%s: page_list too long", __func__);
1274 }
1275
1276 vm_object_unlock(object);
1277
1278 if (!(flags & KMA_COMPRESSOR)) {
1279 vm_page_lockspin_queues();
1280 vm_page_wire_count += atop(size);
1281 vm_page_unlock_queues();
1282 }
1283
1284 if (flags & KMA_KOBJECT) {
1285 /* vm_page_insert_wired() handles regular objects already */
1286 vm_tag_update_size(tag, size);
1287 }
1288
1289 #if KASAN
1290 if (flags & KMA_COMPRESSOR) {
1291 kasan_notify_address_nopoison(addr, size);
1292 } else {
1293 kasan_notify_address(addr, size);
1294 }
1295 #endif
1296 }
1297
1298
1299 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1300 kernel_memory_populate(
1301 vm_offset_t addr,
1302 vm_size_t size,
1303 kma_flags_t flags,
1304 vm_tag_t tag)
1305 {
1306 kern_return_t kr = KERN_SUCCESS;
1307 vm_page_t page_list = NULL;
1308 vm_size_t page_count = atop_64(size);
1309 vm_object_t object = __kmem_object(ANYF(flags));
1310
1311 #if DEBUG || DEVELOPMENT
1312 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
1313 size, 0, 0, 0);
1314 #endif
1315
1316 kr = vm_page_alloc_list(page_count, flags, &page_list);
1317 if (kr == KERN_SUCCESS) {
1318 vm_object_lock(object);
1319 kernel_memory_populate_object_and_unlock(object, addr,
1320 addr, size, page_list, flags, tag, VM_PROT_DEFAULT);
1321 }
1322
1323 #if DEBUG || DEVELOPMENT
1324 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
1325 page_count, 0, 0, 0);
1326 #endif
1327 return kr;
1328 }
1329
1330 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1331 kernel_memory_depopulate(
1332 vm_offset_t addr,
1333 vm_size_t size,
1334 kma_flags_t flags,
1335 vm_tag_t tag)
1336 {
1337 vm_object_t object = __kmem_object(ANYF(flags));
1338 vm_object_offset_t offset = addr;
1339 vm_page_t mem;
1340 vm_page_t local_freeq = NULL;
1341 unsigned int pages_unwired = 0;
1342
1343 vm_object_lock(object);
1344
1345 pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1346
1347 for (vm_object_offset_t pg_offset = 0;
1348 pg_offset < size;
1349 pg_offset += PAGE_SIZE_64) {
1350 mem = vm_page_lookup(object, offset + pg_offset);
1351
1352 assert(mem);
1353
1354 if (flags & KMA_COMPRESSOR) {
1355 assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1356 } else {
1357 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1358 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1359 pages_unwired++;
1360 }
1361
1362 mem->vmp_busy = TRUE;
1363
1364 assert(mem->vmp_tabled);
1365 vm_page_remove(mem, TRUE);
1366 assert(mem->vmp_busy);
1367
1368 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1369
1370 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1371 mem->vmp_snext = local_freeq;
1372 local_freeq = mem;
1373 }
1374
1375 vm_object_unlock(object);
1376
1377 vm_page_free_list(local_freeq, TRUE);
1378
1379 if (!(flags & KMA_COMPRESSOR)) {
1380 vm_page_lockspin_queues();
1381 vm_page_wire_count -= pages_unwired;
1382 vm_page_unlock_queues();
1383 }
1384
1385 if (flags & KMA_KOBJECT) {
1386 /* vm_page_remove() handles regular objects already */
1387 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
1388 }
1389 }
1390
1391 #pragma mark reallocation
1392
1393 __abortlike
1394 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry,vm_object_t object)1395 __kmem_realloc_invalid_object_size_panic(
1396 vm_map_t map,
1397 vm_address_t address,
1398 vm_size_t size,
1399 vm_map_entry_t entry,
1400 vm_object_t object)
1401 {
1402 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1403 "object %p has unexpected size %lld",
1404 map, (void *)address, (size_t)size, entry, object, object->vo_size);
1405 }
1406
1407 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_size_t newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1408 kmem_realloc_shrink_guard(
1409 vm_map_t map,
1410 vm_offset_t oldaddr,
1411 vm_size_t oldsize,
1412 vm_size_t newsize,
1413 kmr_flags_t flags,
1414 kmem_guard_t guard,
1415 vm_map_entry_t entry)
1416 {
1417 vm_object_t object;
1418 kmem_return_t kmr = { .kmr_address = oldaddr };
1419 bool was_atomic;
1420
1421 vm_map_lock_assert_exclusive(map);
1422
1423 if ((flags & KMR_KOBJECT) == 0) {
1424 object = VME_OBJECT(entry);
1425 vm_object_reference(object);
1426 }
1427
1428 /*
1429 * Shrinking an atomic entry starts with splitting it,
1430 * and removing the second half.
1431 */
1432 was_atomic = entry->vme_atomic;
1433 entry->vme_atomic = false;
1434 vm_map_clip_end(map, entry, entry->vme_start + newsize);
1435 entry->vme_atomic = was_atomic;
1436
1437 (void)vm_map_remove_and_unlock(map,
1438 oldaddr + newsize, oldaddr + oldsize,
1439 VM_MAP_REMOVE_KUNWIRE, KMEM_GUARD_NONE);
1440
1441
1442 /*
1443 * Lastly, if there are guard pages, deal with them.
1444 *
1445 * The kernel object just needs to depopulate,
1446 * regular objects require freeing the last page
1447 * and replacing it with a guard.
1448 */
1449 if (flags & KMR_KOBJECT) {
1450 if (flags & KMR_GUARD_LAST) {
1451 kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1452 PAGE_SIZE, KMA_KOBJECT, guard.kmg_tag);
1453 }
1454 } else {
1455 vm_page_t guard_right = VM_PAGE_NULL;
1456 vm_offset_t remove_start = newsize;
1457
1458 if (flags & KMR_GUARD_LAST) {
1459 guard_right = vm_page_grab_guard(true);
1460 remove_start -= PAGE_SIZE;
1461 }
1462
1463 vm_object_lock(object);
1464
1465 if (object->vo_size != oldsize) {
1466 __kmem_realloc_invalid_object_size_panic(map,
1467 oldaddr, oldsize, entry, object);
1468 }
1469 object->vo_size = newsize;
1470
1471 vm_object_page_remove(object, remove_start, oldsize);
1472
1473 if (flags & KMR_GUARD_LAST) {
1474 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1475 guard_right->vmp_busy = false;
1476 }
1477 vm_object_unlock(object);
1478 vm_object_deallocate(object);
1479 }
1480
1481 return kmr;
1482 }
1483
1484 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_size_t newsize,kmr_flags_t flags,kmem_guard_t guard)1485 kmem_realloc_guard(
1486 vm_map_t map,
1487 vm_offset_t oldaddr,
1488 vm_size_t oldsize,
1489 vm_size_t newsize,
1490 kmr_flags_t flags,
1491 kmem_guard_t guard)
1492 {
1493 vm_object_t object;
1494 vm_map_offset_t newaddr;
1495 vm_object_offset_t newoffs;
1496 vm_map_entry_t oldentry;
1497 vm_map_entry_t newentry;
1498 vm_page_t page_list = NULL;
1499 bool needs_wakeup = false;
1500 kmem_return_t kmr = { };
1501 unsigned int last_timestamp;
1502 vm_map_kernel_flags_t vmk_flags = {
1503 .vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1504 };
1505
1506 assert(KMEM_REALLOC_FLAGS_VALID(flags));
1507 if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
1508 __kmem_invalid_arguments_panic("realloc", map, oldaddr,
1509 oldsize, flags);
1510 }
1511
1512 if (oldaddr == 0ul) {
1513 return kmem_alloc_guard(map, newsize, 0, (kma_flags_t)flags, guard);
1514 }
1515
1516 if (newsize == 0ul) {
1517 kmem_free_guard(map, oldaddr, oldsize, KMF_NONE, guard);
1518 return kmr;
1519 }
1520
1521 if (newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1522 __kmem_invalid_size_panic(map, newsize, flags);
1523 }
1524 if (newsize < __kmem_guard_size(ANYF(flags))) {
1525 __kmem_invalid_size_panic(map, newsize, flags);
1526 }
1527
1528 oldsize = round_page(oldsize);
1529 newsize = round_page(newsize);
1530
1531 if (oldsize == newsize) {
1532 kmr.kmr_address = oldaddr;
1533 return kmr;
1534 }
1535
1536 /*
1537 * If we're growing the allocation,
1538 * then reserve the pages we'll need,
1539 * and find a spot for its new place.
1540 */
1541 if (oldsize < newsize) {
1542 #if DEBUG || DEVELOPMENT
1543 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1544 VM_KERN_REQUEST, DBG_FUNC_START,
1545 newsize - oldsize, 0, 0, 0);
1546 #endif
1547 kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1548 (kma_flags_t)flags, &page_list);
1549 if (kmr.kmr_return == KERN_SUCCESS) {
1550 kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1551 &vmk_flags, true);
1552 kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1553 vmk_flags, &newentry);
1554 }
1555 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1556 if (flags & KMR_REALLOCF) {
1557 kmem_free_guard(map, oldaddr, oldsize,
1558 KMF_NONE, guard);
1559 }
1560 if (page_list) {
1561 vm_page_free_list(page_list, FALSE);
1562 }
1563 #if DEBUG || DEVELOPMENT
1564 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1565 VM_KERN_REQUEST, DBG_FUNC_END,
1566 0, 0, 0, 0);
1567 #endif
1568 return kmr;
1569 }
1570
1571 /* map is locked */
1572 } else {
1573 vm_map_lock(map);
1574 }
1575
1576
1577 /*
1578 * Locate the entry:
1579 * - wait for it to quiesce.
1580 * - validate its guard,
1581 * - learn its correct tag,
1582 */
1583 again:
1584 if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1585 __kmem_entry_not_found_panic(map, oldaddr);
1586 }
1587 if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1588 oldentry->needs_wakeup = true;
1589 vm_map_entry_wait(map, THREAD_UNINT);
1590 goto again;
1591 }
1592 kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1593 if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1594 __kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1595 }
1596 /*
1597 * TODO: We should validate for non atomic entries that the range
1598 * we are acting on is what we expect here.
1599 */
1600
1601 guard.kmg_tag = VME_ALIAS(oldentry);
1602
1603 if (newsize < oldsize) {
1604 return kmem_realloc_shrink_guard(map, oldaddr, oldsize, newsize,
1605 flags, guard, oldentry);
1606 }
1607
1608 /*
1609 * We are growing the entry
1610 *
1611 * For regular objects we use the object `vo_size` updates
1612 * as a guarantee that no 2 kmem_realloc() can happen
1613 * concurrently (by doing it before the map is unlocked.
1614 *
1615 * For the kernel object, prevent the entry from being
1616 * reallocated or changed by marking it "in_transition".
1617 */
1618
1619 object = VME_OBJECT(oldentry);
1620 vm_object_lock(object);
1621 vm_object_reference_locked(object);
1622
1623 newaddr = newentry->vme_start;
1624 newoffs = oldsize;
1625
1626 VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
1627 VME_ALIAS_SET(newentry, guard.kmg_tag);
1628 if (flags & KMR_KOBJECT) {
1629 oldentry->in_transition = true;
1630 VME_OFFSET_SET(newentry, newaddr);
1631 newentry->wired_count = 1;
1632 newoffs = newaddr + oldsize;
1633 } else {
1634 if (object->vo_size != oldsize) {
1635 __kmem_realloc_invalid_object_size_panic(map,
1636 oldaddr, oldsize, oldentry, object);
1637 }
1638 object->vo_size = newsize;
1639 }
1640
1641 last_timestamp = map->timestamp;
1642 vm_map_unlock(map);
1643
1644
1645 /*
1646 * Now proceed with the population of pages.
1647 *
1648 * Kernel objects can use the kmem population helpers.
1649 *
1650 * Regular objects will insert pages manually,
1651 * then wire the memory into the new range.
1652 */
1653
1654 vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
1655
1656 if (flags & KMR_KOBJECT) {
1657 assert(flags & KMR_FREEOLD);
1658
1659 pmap_protect(kernel_pmap,
1660 oldaddr, oldaddr + oldsize - guard_right_size,
1661 VM_PROT_NONE);
1662
1663 for (vm_object_offset_t offset = 0;
1664 offset < oldsize - guard_right_size;
1665 offset += PAGE_SIZE_64) {
1666 vm_page_t mem;
1667
1668 mem = vm_page_lookup(object, oldaddr + offset);
1669 if (mem == VM_PAGE_NULL) {
1670 continue;
1671 }
1672
1673 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1674
1675 mem->vmp_busy = true;
1676 vm_page_remove(mem, true);
1677 vm_page_insert_wired(mem, object, newaddr + offset,
1678 guard.kmg_tag);
1679 mem->vmp_busy = false;
1680
1681 kernel_memory_populate_pmap_enter(object, newaddr,
1682 offset, mem, VM_PROT_DEFAULT, 0);
1683 }
1684
1685 kernel_memory_populate_object_and_unlock(object,
1686 newaddr + oldsize - guard_right_size,
1687 newoffs - guard_right_size,
1688 newsize - oldsize,
1689 page_list, (kma_flags_t)flags,
1690 guard.kmg_tag, VM_PROT_DEFAULT);
1691 } else {
1692 vm_page_t guard_right = VM_PAGE_NULL;
1693 kern_return_t kr;
1694
1695 /*
1696 * Note: we are borrowing the new entry reference
1697 * on the object for the duration of this code,
1698 * which works because we keep the object locked
1699 * throughout.
1700 */
1701 if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
1702 guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
1703 assert(guard_right->vmp_fictitious);
1704 guard_right->vmp_busy = true;
1705 vm_page_remove(guard_right, true);
1706 }
1707
1708 for (vm_object_offset_t offset = oldsize - guard_right_size;
1709 offset < newsize - guard_right_size;
1710 offset += PAGE_SIZE_64) {
1711 vm_page_t mem = page_list;
1712
1713 page_list = mem->vmp_snext;
1714 mem->vmp_snext = VM_PAGE_NULL;
1715
1716 vm_page_insert(mem, object, offset);
1717 mem->vmp_busy = false;
1718 }
1719
1720 if (guard_right) {
1721 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1722 guard_right->vmp_busy = false;
1723 }
1724
1725 vm_object_unlock(object);
1726
1727 kr = vm_map_wire_kernel(map, newaddr, newaddr + newsize,
1728 VM_PROT_DEFAULT, guard.kmg_tag, FALSE);
1729 assert(kr == KERN_SUCCESS);
1730 }
1731
1732 #if KASAN
1733 kasan_notify_address(newaddr, newsize);
1734 #endif
1735
1736
1737 /*
1738 * Mark the entry as idle again,
1739 * and honor KMR_FREEOLD if needed.
1740 */
1741
1742 vm_map_lock(map);
1743 if (last_timestamp + 1 != map->timestamp &&
1744 !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1745 __kmem_entry_not_found_panic(map, oldaddr);
1746 }
1747
1748 if (flags & KMR_KOBJECT) {
1749 assert(oldentry->in_transition);
1750 oldentry->in_transition = false;
1751 if (oldentry->needs_wakeup) {
1752 needs_wakeup = true;
1753 oldentry->needs_wakeup = false;
1754 }
1755 }
1756
1757 if (flags & KMR_FREEOLD) {
1758 (void)vm_map_remove_and_unlock(map,
1759 oldaddr, oldaddr + oldsize,
1760 VM_MAP_REMOVE_KUNWIRE, guard);
1761 } else {
1762 vm_map_unlock(map);
1763 }
1764
1765 if (needs_wakeup) {
1766 vm_map_entry_wakeup(map);
1767 }
1768
1769
1770 #if DEBUG || DEVELOPMENT
1771 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
1772 atop(newsize - oldsize), 0, 0, 0);
1773 #endif
1774 kmr.kmr_address = newaddr;
1775 return kmr;
1776 }
1777
1778
1779 #pragma mark free
1780
1781 vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t addr,vm_size_t size,kmf_flags_t flags,kmem_guard_t guard)1782 kmem_free_guard(
1783 vm_map_t map,
1784 vm_offset_t addr,
1785 vm_size_t size,
1786 kmf_flags_t flags,
1787 kmem_guard_t guard)
1788 {
1789 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1790
1791 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1792 assert(map->pmap == kernel_pmap);
1793
1794 if (flags & KMF_GUESS_SIZE) {
1795 vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
1796 size = PAGE_SIZE;
1797 } else if (size == 0) {
1798 __kmem_invalid_size_panic(map, size, flags);
1799 } else {
1800 size = round_page(size);
1801 }
1802
1803 return vm_map_remove_guard(map, addr, addr + size,
1804 vmr_flags, guard).kmr_size;
1805 }
1806
1807 __exported void
1808 kmem_free_external(
1809 vm_map_t map,
1810 vm_offset_t addr,
1811 vm_size_t size);
1812 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)1813 kmem_free_external(
1814 vm_map_t map,
1815 vm_offset_t addr,
1816 vm_size_t size)
1817 {
1818 if (size) {
1819 kmem_free(map, trunc_page(addr), size);
1820 #if MACH_ASSERT
1821 } else {
1822 printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
1823 map, (void *)addr, __builtin_return_address(0));
1824 #endif
1825 }
1826 }
1827
1828
1829 #pragma mark kmem init
1830
1831 /*
1832 * The default percentage of memory that can be mlocked is scaled based on the total
1833 * amount of memory in the system. These percentages are caclulated
1834 * offline and stored in this table. We index this table by
1835 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1836 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1837 *
1838 * Note that these values were picked for mac.
1839 * If we ever have very large memory config arm devices, we may want to revisit
1840 * since the kernel overhead is smaller there due to the larger page size.
1841 */
1842
1843 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1844 #define VM_USER_WIREABLE_MIN_CONFIG 32
1845 #if CONFIG_JETSAM
1846 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1847 * pressure.
1848 */
1849 static vm_map_size_t wire_limit_percents[] =
1850 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1851 #else
1852 static vm_map_size_t wire_limit_percents[] =
1853 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1854 #endif /* CONFIG_JETSAM */
1855
1856 /*
1857 * Sets the default global user wire limit which limits the amount of
1858 * memory that can be locked via mlock() based on the above algorithm..
1859 * This can be overridden via a sysctl.
1860 */
1861 static void
kmem_set_user_wire_limits(void)1862 kmem_set_user_wire_limits(void)
1863 {
1864 uint64_t available_mem_log;
1865 uint64_t max_wire_percent;
1866 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1867 sizeof(vm_map_size_t);
1868 vm_map_size_t limit;
1869 uint64_t config_memsize = max_mem;
1870 #if defined(XNU_TARGET_OS_OSX)
1871 config_memsize = max_mem_actual;
1872 #endif /* defined(XNU_TARGET_OS_OSX) */
1873
1874 available_mem_log = bit_floor(config_memsize);
1875
1876 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1877 available_mem_log = 0;
1878 } else {
1879 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1880 }
1881 if (available_mem_log >= wire_limit_percents_length) {
1882 available_mem_log = wire_limit_percents_length - 1;
1883 }
1884 max_wire_percent = wire_limit_percents[available_mem_log];
1885
1886 limit = config_memsize * max_wire_percent / 100;
1887 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1888 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1889 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1890 }
1891
1892 vm_global_user_wire_limit = limit;
1893 /* the default per task limit is the same as the global limit */
1894 vm_per_task_user_wire_limit = limit;
1895 vm_add_wire_count_over_global_limit = 0;
1896 vm_add_wire_count_over_user_limit = 0;
1897 }
1898
1899 #define KMEM_MAX_CLAIMS 50
1900 __startup_data
1901 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
1902 __startup_data
1903 uint32_t kmem_claim_count = 0;
1904
1905 __startup_func
1906 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)1907 kmem_range_startup_init(
1908 struct kmem_range_startup_spec *sp)
1909 {
1910 assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
1911 if (sp->kc_calculate_sz) {
1912 sp->kc_size = (sp->kc_calculate_sz)();
1913 }
1914 if (sp->kc_size) {
1915 kmem_claims[kmem_claim_count] = *sp;
1916 kmem_claim_count++;
1917 }
1918 }
1919
1920 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1921 static vm_offset_t
kmem_fuzz_start(void)1922 kmem_fuzz_start(void)
1923 {
1924 vm_offset_t kmapoff_kaddr = 0;
1925 uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
1926 vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
1927
1928 kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
1929 KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
1930 VM_KERN_MEMORY_OSFMK);
1931 return kmapoff_kaddr + kmapoff_size;
1932 }
1933
1934 /*
1935 * Returns a 16bit random number between 0 and
1936 * upper_limit (inclusive)
1937 */
1938 __startup_func
1939 uint16_t
kmem_get_random16(uint16_t upper_limit)1940 kmem_get_random16(
1941 uint16_t upper_limit)
1942 {
1943 static uint64_t random_entropy;
1944 assert(upper_limit < UINT16_MAX);
1945 if (random_entropy == 0) {
1946 random_entropy = early_random();
1947 }
1948 uint32_t result = random_entropy & UINT32_MAX;
1949 random_entropy >>= 32;
1950 return (uint16_t)(result % (upper_limit + 1));
1951 }
1952
1953 /*
1954 * Generate a randomly shuffled array of indices from 0 to count - 1
1955 */
1956 __startup_func
1957 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)1958 kmem_shuffle(
1959 uint16_t *shuffle_buf,
1960 uint16_t count)
1961 {
1962 for (uint16_t i = 0; i < count; i++) {
1963 uint16_t j = kmem_get_random16(i);
1964 if (j != i) {
1965 shuffle_buf[i] = shuffle_buf[j];
1966 }
1967 shuffle_buf[j] = i;
1968 }
1969 }
1970
1971 __startup_func
1972 static void
kmem_shuffle_claims(void)1973 kmem_shuffle_claims(void)
1974 {
1975 uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
1976 uint16_t limit = (uint16_t)kmem_claim_count;
1977
1978 kmem_shuffle(&shuffle_buf[0], limit);
1979 for (uint16_t i = 0; i < limit; i++) {
1980 struct kmem_range_startup_spec tmp = kmem_claims[i];
1981 kmem_claims[i] = kmem_claims[shuffle_buf[i]];
1982 kmem_claims[shuffle_buf[i]] = tmp;
1983 }
1984 }
1985 __startup_func
1986 static void
kmem_readjust_ranges(uint32_t cur_idx)1987 kmem_readjust_ranges(
1988 uint32_t cur_idx)
1989 {
1990 assert(cur_idx != 0);
1991 uint32_t j = cur_idx - 1, random;
1992 struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
1993 struct mach_vm_range *sp_range = sp.kc_range;
1994
1995 /*
1996 * Find max index where restriction is met
1997 */
1998 for (; j > 0; j--) {
1999 struct kmem_range_startup_spec spj = kmem_claims[j];
2000 vm_map_offset_t max_start = spj.kc_range->min_address;
2001 if (spj.kc_flags & KC_NO_MOVE) {
2002 panic("kmem_range_init: Can't scramble with multiple constraints");
2003 }
2004 if (max_start <= sp_range->min_address) {
2005 break;
2006 }
2007 }
2008
2009 /*
2010 * Pick a random index from 0 to max index and shift claims to the right
2011 * to make room for restricted claim
2012 */
2013 random = kmem_get_random16((uint16_t)j);
2014 assert(random <= j);
2015
2016 sp_range->min_address = kmem_claims[random].kc_range->min_address;
2017 sp_range->max_address = sp_range->min_address + sp.kc_size;
2018
2019 for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
2020 struct kmem_range_startup_spec spj = kmem_claims[j];
2021 struct mach_vm_range *range = spj.kc_range;
2022 range->min_address += sp.kc_size;
2023 range->max_address += sp.kc_size;
2024 kmem_claims[j + 1] = spj;
2025 }
2026
2027 sp.kc_flags = KC_NO_MOVE;
2028 kmem_claims[random] = sp;
2029 }
2030
2031 __startup_func
2032 static void
kmem_add_extra_claims(void)2033 kmem_add_extra_claims(void)
2034 {
2035 vm_map_size_t largest_free_size = 0, total_claims = 0;
2036
2037 vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
2038 largest_free_size = trunc_page(largest_free_size);
2039
2040 /*
2041 * Determine size of data and pointer kmem_ranges
2042 */
2043 for (uint32_t i = 0; i < kmem_claim_count; i++) {
2044 total_claims += kmem_claims[i].kc_size;
2045 }
2046 assert((total_claims & PAGE_MASK) == 0);
2047 largest_free_size -= total_claims;
2048
2049 /*
2050 * kasan and configs w/o *TRR need to have just one ptr range due to
2051 * resource constraints.
2052 */
2053 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
2054 kmem_ptr_ranges = 1;
2055 #endif
2056
2057 ptr_range_size = round_page(largest_free_size /
2058 (kmem_ptr_ranges * 3));
2059 data_range_size = largest_free_size -
2060 (ptr_range_size * kmem_ptr_ranges);
2061
2062
2063 /*
2064 * Add claims for data and pointer
2065 */
2066 struct kmem_range_startup_spec kmem_spec_data = {
2067 .kc_name = "kmem_data_range",
2068 .kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
2069 .kc_size = data_range_size,
2070 .kc_flags = KC_NO_ENTRY,
2071 };
2072 kmem_claims[kmem_claim_count++] = kmem_spec_data;
2073
2074 for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
2075 struct kmem_range_startup_spec kmem_spec_ptr = {
2076 .kc_name = "kmem_ptr_range",
2077 .kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
2078 .kc_size = ptr_range_size,
2079 .kc_flags = KC_NO_ENTRY,
2080 };
2081 kmem_claims[kmem_claim_count++] = kmem_spec_ptr;
2082 }
2083 }
2084
2085 __startup_func
2086 static void
kmem_scramble_ranges(void)2087 kmem_scramble_ranges(void)
2088 {
2089 vm_map_offset_t start = 0;
2090
2091 /*
2092 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
2093 * the vm can find the requested ranges.
2094 */
2095 kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
2096 VM_MAP_PAGE_SIZE(kernel_map));
2097 kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
2098
2099 /*
2100 * Allocating the g_kext_map prior to randomizing the remaining submaps as
2101 * this map is 2G in size and starts at the end of kernel_text on x86. It
2102 * could overflow into the heap.
2103 */
2104 kext_alloc_init();
2105
2106 /*
2107 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
2108 * stack addresses. (With a 4K page and 9 bits of randomness, this
2109 * eats about 2M of VA from the map)
2110 *
2111 * Note that we always need to slide by at least one page because the VM
2112 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
2113 * do not admit this address to be part of any zone submap.
2114 */
2115 start = kmem_fuzz_start();
2116
2117 /*
2118 * Add claims for ptr and data kmem_ranges
2119 */
2120 kmem_add_extra_claims();
2121
2122 /*
2123 * Shuffle registered claims
2124 */
2125 assert(kmem_claim_count < UINT16_MAX);
2126 kmem_shuffle_claims();
2127
2128 /*
2129 * Apply restrictions and determine range for each claim
2130 */
2131 for (uint32_t i = 0; i < kmem_claim_count; i++) {
2132 vm_map_offset_t end = 0;
2133 struct kmem_range_startup_spec sp = kmem_claims[i];
2134 struct mach_vm_range *sp_range = sp.kc_range;
2135 if (vm_map_locate_space(kernel_map, sp.kc_size, 0,
2136 VM_MAP_KERNEL_FLAGS_NONE, &start, NULL) != KERN_SUCCESS) {
2137 panic("kmem_range_init: vm_map_locate_space failing for claim %s",
2138 sp.kc_name);
2139 }
2140
2141 end = start + sp.kc_size;
2142 /*
2143 * Re-adjust ranges if restriction not met
2144 */
2145 if (sp_range->min_address && start > sp_range->min_address) {
2146 kmem_readjust_ranges(i);
2147 } else {
2148 sp_range->min_address = start;
2149 sp_range->max_address = end;
2150 }
2151 start = end;
2152 }
2153
2154 /*
2155 * We have settled on the ranges, now create temporary entries for the
2156 * claims
2157 */
2158 for (uint32_t i = 0; i < kmem_claim_count; i++) {
2159 struct kmem_range_startup_spec sp = kmem_claims[i];
2160 vm_map_entry_t entry = NULL;
2161 if (sp.kc_flags & KC_NO_ENTRY) {
2162 continue;
2163 }
2164 if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
2165 VM_MAP_KERNEL_FLAGS_NONE, &entry) != KERN_SUCCESS) {
2166 panic("kmem_range_init: vm_map_find_space failing for claim %s",
2167 sp.kc_name);
2168 }
2169 vm_object_reference(kernel_object);
2170 VME_OBJECT_SET(entry, kernel_object, false, 0);
2171 VME_OFFSET_SET(entry, entry->vme_start);
2172 vm_map_unlock(kernel_map);
2173 }
2174 /*
2175 * Now that we are done assigning all the ranges, reset
2176 * kmem_ranges[KMEM_RANGE_ID_NONE]
2177 */
2178 kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
2179
2180 #if DEBUG || DEVELOPMENT
2181 for (uint32_t i = 0; i < kmem_claim_count; i++) {
2182 struct kmem_range_startup_spec sp = kmem_claims[i];
2183
2184 printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
2185 (void *)sp.kc_range->min_address,
2186 (void *)sp.kc_range->max_address,
2187 mach_vm_size_pretty(sp.kc_size),
2188 mach_vm_size_unit(sp.kc_size));
2189 }
2190 #endif /* DEBUG || DEVELOPMENT */
2191 }
2192
2193 __startup_func
2194 static void
kmem_range_init(void)2195 kmem_range_init(void)
2196 {
2197 kmem_scramble_ranges();
2198
2199 /* Initialize kmem_large_ranges. Skip 1/16th of range size on either side
2200 * for ptr ranges and 1/8th only from left for data as we a single front
2201 * for data.
2202 */
2203 vm_size_t range_adjustment = ptr_range_size >> 4;
2204 for (kmem_range_id_t i = 0; i < kmem_ptr_ranges; i++) {
2205 kmem_large_ranges[KMEM_RANGE_ID_PTR_0 + i].min_address =
2206 kmem_ranges[KMEM_RANGE_ID_PTR_0 + i].min_address + range_adjustment;
2207 kmem_large_ranges[KMEM_RANGE_ID_PTR_0 + i].max_address =
2208 kmem_ranges[KMEM_RANGE_ID_PTR_0 + i].max_address - range_adjustment;
2209 }
2210 range_adjustment = data_range_size >> 3;
2211 kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
2212 kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
2213 kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
2214 kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
2215
2216 #if DEBUG || DEVELOPMENT
2217 for (kmem_range_id_t i = 0; i < KMEM_RANGE_COUNT; i++) {
2218 vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
2219 printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n", i,
2220 (void *)kmem_large_ranges[i].min_address,
2221 (void *)kmem_large_ranges[i].max_address,
2222 mach_vm_size_pretty(range_size),
2223 mach_vm_size_unit(range_size));
2224 }
2225 #endif
2226 }
2227 #else /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
2228 __startup_func
2229 static void
kmem_range_init(void)2230 kmem_range_init(void)
2231 {
2232 for (kmem_range_id_t i = 0; i < KMEM_RANGE_COUNT; i++) {
2233 kmem_ranges[i].min_address = kernel_map->min_offset;
2234 kmem_ranges[i].max_address = kernel_map->max_offset;
2235 }
2236 kext_alloc_init();
2237 kmem_fuzz_start();
2238 }
2239 #endif
2240 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
2241
2242 /*
2243 * kmem_init:
2244 *
2245 * Initialize the kernel's virtual memory map, taking
2246 * into account all memory allocated up to this time.
2247 */
2248 __startup_func
2249 void
kmem_init(vm_offset_t start,vm_offset_t end)2250 kmem_init(
2251 vm_offset_t start,
2252 vm_offset_t end)
2253 {
2254 vm_map_offset_t map_start;
2255 vm_map_offset_t map_end;
2256 vm_map_kernel_flags_t vmk_flags;
2257
2258 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2259 vmk_flags.vmkf_permanent = TRUE;
2260 vmk_flags.vmkf_no_pmap_check = TRUE;
2261
2262 map_start = vm_map_trunc_page(start,
2263 VM_MAP_PAGE_MASK(kernel_map));
2264 map_end = vm_map_round_page(end,
2265 VM_MAP_PAGE_MASK(kernel_map));
2266
2267 vm_map_will_allocate_early_map(&kernel_map);
2268 #if defined(__arm64__)
2269 kernel_map = vm_map_create_options(pmap_kernel(),
2270 VM_MIN_KERNEL_AND_KEXT_ADDRESS,
2271 VM_MAX_KERNEL_ADDRESS,
2272 VM_MAP_CREATE_DEFAULT);
2273 /*
2274 * Reserve virtual memory allocated up to this time.
2275 */
2276 {
2277 unsigned int region_select = 0;
2278 vm_map_offset_t region_start;
2279 vm_map_size_t region_size;
2280 vm_map_offset_t map_addr;
2281 kern_return_t kr;
2282
2283 while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
2284 map_addr = region_start;
2285 kr = vm_map_enter(kernel_map, &map_addr,
2286 vm_map_round_page(region_size,
2287 VM_MAP_PAGE_MASK(kernel_map)),
2288 (vm_map_offset_t) 0,
2289 VM_FLAGS_FIXED,
2290 vmk_flags,
2291 VM_KERN_MEMORY_NONE,
2292 VM_OBJECT_NULL,
2293 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
2294 VM_INHERIT_DEFAULT);
2295
2296 if (kr != KERN_SUCCESS) {
2297 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
2298 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
2299 (uint64_t) region_size, kr);
2300 }
2301
2302 region_select++;
2303 }
2304 }
2305 #else
2306 kernel_map = vm_map_create_options(pmap_kernel(),
2307 VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
2308 VM_MAP_CREATE_DEFAULT);
2309 /*
2310 * Reserve virtual memory allocated up to this time.
2311 */
2312 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
2313 vm_map_offset_t map_addr;
2314 kern_return_t kr;
2315
2316 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2317 vmk_flags.vmkf_no_pmap_check = TRUE;
2318
2319 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2320 kr = vm_map_enter(kernel_map,
2321 &map_addr,
2322 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
2323 (vm_map_offset_t) 0,
2324 VM_FLAGS_FIXED,
2325 vmk_flags,
2326 VM_KERN_MEMORY_NONE,
2327 VM_OBJECT_NULL,
2328 (vm_object_offset_t) 0, FALSE,
2329 VM_PROT_NONE, VM_PROT_NONE,
2330 VM_INHERIT_DEFAULT);
2331
2332 if (kr != KERN_SUCCESS) {
2333 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
2334 (uint64_t) start, (uint64_t) end,
2335 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
2336 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
2337 kr);
2338 }
2339 }
2340 #endif
2341
2342 kmem_set_user_wire_limits();
2343 }
2344
2345
2346 #pragma mark map copyio
2347
2348 /*
2349 * Routine: copyinmap
2350 * Purpose:
2351 * Like copyin, except that fromaddr is an address
2352 * in the specified VM map. This implementation
2353 * is incomplete; it handles the current user map
2354 * and the kernel map/submaps.
2355 */
2356 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)2357 copyinmap(
2358 vm_map_t map,
2359 vm_map_offset_t fromaddr,
2360 void *todata,
2361 vm_size_t length)
2362 {
2363 kern_return_t kr = KERN_SUCCESS;
2364 vm_map_t oldmap;
2365
2366 if (vm_map_pmap(map) == pmap_kernel()) {
2367 /* assume a correct copy */
2368 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
2369 } else if (current_map() == map) {
2370 if (copyin(fromaddr, todata, length) != 0) {
2371 kr = KERN_INVALID_ADDRESS;
2372 }
2373 } else {
2374 vm_map_reference(map);
2375 oldmap = vm_map_switch(map);
2376 if (copyin(fromaddr, todata, length) != 0) {
2377 kr = KERN_INVALID_ADDRESS;
2378 }
2379 vm_map_switch(oldmap);
2380 vm_map_deallocate(map);
2381 }
2382 return kr;
2383 }
2384
2385 /*
2386 * Routine: copyoutmap
2387 * Purpose:
2388 * Like copyout, except that toaddr is an address
2389 * in the specified VM map.
2390 */
2391 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)2392 copyoutmap(
2393 vm_map_t map,
2394 void *fromdata,
2395 vm_map_address_t toaddr,
2396 vm_size_t length)
2397 {
2398 kern_return_t kr = KERN_SUCCESS;
2399 vm_map_t oldmap;
2400
2401 if (vm_map_pmap(map) == pmap_kernel()) {
2402 /* assume a correct copy */
2403 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
2404 } else if (current_map() == map) {
2405 if (copyout(fromdata, toaddr, length) != 0) {
2406 kr = KERN_INVALID_ADDRESS;
2407 }
2408 } else {
2409 vm_map_reference(map);
2410 oldmap = vm_map_switch(map);
2411 if (copyout(fromdata, toaddr, length) != 0) {
2412 kr = KERN_INVALID_ADDRESS;
2413 }
2414 vm_map_switch(oldmap);
2415 vm_map_deallocate(map);
2416 }
2417 return kr;
2418 }
2419
2420 /*
2421 * Routine: copyoutmap_atomic{32, 64}
2422 * Purpose:
2423 * Like copyoutmap, except that the operation is atomic.
2424 * Takes in value rather than *fromdata pointer.
2425 */
2426 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)2427 copyoutmap_atomic32(
2428 vm_map_t map,
2429 uint32_t value,
2430 vm_map_address_t toaddr)
2431 {
2432 kern_return_t kr = KERN_SUCCESS;
2433 vm_map_t oldmap;
2434
2435 if (vm_map_pmap(map) == pmap_kernel()) {
2436 /* assume a correct toaddr */
2437 *(uint32_t *)toaddr = value;
2438 } else if (current_map() == map) {
2439 if (copyout_atomic32(value, toaddr) != 0) {
2440 kr = KERN_INVALID_ADDRESS;
2441 }
2442 } else {
2443 vm_map_reference(map);
2444 oldmap = vm_map_switch(map);
2445 if (copyout_atomic32(value, toaddr) != 0) {
2446 kr = KERN_INVALID_ADDRESS;
2447 }
2448 vm_map_switch(oldmap);
2449 vm_map_deallocate(map);
2450 }
2451 return kr;
2452 }
2453
2454 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)2455 copyoutmap_atomic64(
2456 vm_map_t map,
2457 uint64_t value,
2458 vm_map_address_t toaddr)
2459 {
2460 kern_return_t kr = KERN_SUCCESS;
2461 vm_map_t oldmap;
2462
2463 if (vm_map_pmap(map) == pmap_kernel()) {
2464 /* assume a correct toaddr */
2465 *(uint64_t *)toaddr = value;
2466 } else if (current_map() == map) {
2467 if (copyout_atomic64(value, toaddr) != 0) {
2468 kr = KERN_INVALID_ADDRESS;
2469 }
2470 } else {
2471 vm_map_reference(map);
2472 oldmap = vm_map_switch(map);
2473 if (copyout_atomic64(value, toaddr) != 0) {
2474 kr = KERN_INVALID_ADDRESS;
2475 }
2476 vm_map_switch(oldmap);
2477 vm_map_deallocate(map);
2478 }
2479 return kr;
2480 }
2481
2482
2483 #pragma mark pointer obfuscation / packing
2484
2485 /*
2486 *
2487 * The following two functions are to be used when exposing kernel
2488 * addresses to userspace via any of the various debug or info
2489 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
2490 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
2491 * are exported to KEXTs.
2492 *
2493 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
2494 */
2495
2496 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)2497 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
2498 {
2499 assert(salt != 0);
2500
2501 if (addr == 0) {
2502 return 0ul;
2503 }
2504
2505 if (VM_KERNEL_IS_SLID(addr)) {
2506 return VM_KERNEL_UNSLIDE(addr);
2507 }
2508
2509 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
2510 SHA256_CTX sha_ctx;
2511
2512 SHA256_Init(&sha_ctx);
2513 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
2514 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
2515 SHA256_Final(sha_digest, &sha_ctx);
2516
2517 return sha_digest[0];
2518 }
2519
2520 __exported vm_offset_t
2521 vm_kernel_addrhash_external(vm_offset_t addr);
2522 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)2523 vm_kernel_addrhash_external(vm_offset_t addr)
2524 {
2525 return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
2526 }
2527
2528 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)2529 vm_kernel_addrhide(
2530 vm_offset_t addr,
2531 vm_offset_t *hide_addr)
2532 {
2533 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
2534 }
2535
2536 /*
2537 * vm_kernel_addrperm_external:
2538 * vm_kernel_unslide_or_perm_external:
2539 *
2540 * Use these macros when exposing an address to userspace that could come from
2541 * either kernel text/data *or* the heap.
2542 */
2543 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)2544 vm_kernel_addrperm_external(
2545 vm_offset_t addr,
2546 vm_offset_t *perm_addr)
2547 {
2548 if (VM_KERNEL_IS_SLID(addr)) {
2549 *perm_addr = VM_KERNEL_UNSLIDE(addr);
2550 } else if (VM_KERNEL_ADDRESS(addr)) {
2551 *perm_addr = addr + vm_kernel_addrperm_ext;
2552 } else {
2553 *perm_addr = addr;
2554 }
2555 }
2556
2557 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)2558 vm_kernel_unslide_or_perm_external(
2559 vm_offset_t addr,
2560 vm_offset_t *up_addr)
2561 {
2562 vm_kernel_addrperm_external(addr, up_addr);
2563 }
2564
2565 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)2566 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
2567 {
2568 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
2569 panic("pointer %p can't be packed: low %d bits aren't 0",
2570 (void *)ptr, params.vmpp_shift);
2571 } else if (ptr <= params.vmpp_base) {
2572 panic("pointer %p can't be packed: below base %p",
2573 (void *)ptr, (void *)params.vmpp_base);
2574 } else {
2575 panic("pointer %p can't be packed: maximum encodable pointer is %p",
2576 (void *)ptr, (void *)vm_packing_max_packable(params));
2577 }
2578 }
2579
2580 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)2581 vm_packing_verify_range(
2582 const char *subsystem,
2583 vm_offset_t min_address,
2584 vm_offset_t max_address,
2585 vm_packing_params_t params)
2586 {
2587 if (min_address > max_address) {
2588 panic("%s: %s range invalid min:%p > max:%p",
2589 __func__, subsystem, (void *)min_address, (void *)max_address);
2590 }
2591
2592 if (!params.vmpp_base_relative) {
2593 return;
2594 }
2595
2596 if (min_address <= params.vmpp_base) {
2597 panic("%s: %s range invalid min:%p <= base:%p",
2598 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
2599 }
2600
2601 if (max_address > vm_packing_max_packable(params)) {
2602 panic("%s: %s range invalid max:%p >= max packable:%p",
2603 __func__, subsystem, (void *)max_address,
2604 (void *)vm_packing_max_packable(params));
2605 }
2606 }
2607
2608 #pragma mark tests
2609 #if DEBUG || DEVELOPMENT
2610 #include <sys/errno.h>
2611
2612 static void
2613 kmem_test_for_entry(
2614 vm_map_t map,
2615 vm_offset_t addr,
2616 void (^block)(vm_map_entry_t))
2617 {
2618 vm_map_entry_t entry;
2619
2620 vm_map_lock(map);
2621 block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
2622 vm_map_unlock(map);
2623 }
2624
2625 #define kmem_test_assert_map(map, pg, entries) ({ \
2626 assert3u((map)->size, ==, ptoa(pg)); \
2627 assert3u((map)->hdr.nentries, ==, entries); \
2628 })
2629
2630 static bool
can_write_at(vm_offset_t offs,uint32_t page)2631 can_write_at(vm_offset_t offs, uint32_t page)
2632 {
2633 static const int zero;
2634
2635 return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
2636 }
2637 #define assert_writeable(offs, page) \
2638 assertf(can_write_at(offs, page), \
2639 "can write at %p + ptoa(%d)", (void *)offs, page)
2640
2641 #define assert_faults(offs, page) \
2642 assertf(!can_write_at(offs, page), \
2643 "can write at %p + ptoa(%d)", (void *)offs, page)
2644
2645 #define peek(offs, page) \
2646 (*(uint32_t *)((offs) + ptoa(page)))
2647
2648 #define poke(offs, page, v) \
2649 (*(uint32_t *)((offs) + ptoa(page)) = (v))
2650
2651 __attribute__((noinline))
2652 static void
kmem_alloc_basic_test(vm_map_t map)2653 kmem_alloc_basic_test(vm_map_t map)
2654 {
2655 kmem_guard_t guard = {
2656 .kmg_tag = VM_KERN_MEMORY_DIAG,
2657 };
2658 vm_offset_t addr;
2659
2660 /*
2661 * Test wired basics:
2662 * - KMA_KOBJECT
2663 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
2664 * - allocation alignment
2665 */
2666 addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
2667 KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
2668 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
2669 assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
2670 kmem_test_assert_map(map, 10, 1);
2671
2672 kmem_test_for_entry(map, addr, ^(vm_map_entry_t e){
2673 assertf(e, "unable to find address %p in map %p", (void *)addr, map);
2674 assert(e->vme_kernel_object);
2675 assert(!e->vme_atomic);
2676 assert3u(e->vme_start, <=, addr);
2677 assert3u(addr + ptoa(10), <=, e->vme_end);
2678 });
2679
2680 assert_faults(addr, 0);
2681 for (int i = 1; i < 9; i++) {
2682 assert_writeable(addr, i);
2683 }
2684 assert_faults(addr, 9);
2685
2686 kmem_free(map, addr, ptoa(10));
2687 kmem_test_assert_map(map, 0, 0);
2688
2689 /*
2690 * Test pageable basics.
2691 */
2692 addr = kmem_alloc_guard(map, ptoa(10), 0,
2693 KMA_PAGEABLE, guard).kmr_address;
2694 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
2695 kmem_test_assert_map(map, 10, 1);
2696
2697 for (int i = 0; i < 9; i++) {
2698 assert_faults(addr, i);
2699 poke(addr, i, 42);
2700 assert_writeable(addr, i);
2701 }
2702
2703 kmem_free(map, addr, ptoa(10));
2704 kmem_test_assert_map(map, 0, 0);
2705 }
2706
2707 __attribute__((noinline))
2708 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)2709 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
2710 {
2711 kmem_guard_t guard = {
2712 .kmg_atomic = !(kind & KMR_DATA),
2713 .kmg_tag = VM_KERN_MEMORY_DIAG,
2714 .kmg_context = 0xefface,
2715 };
2716 vm_offset_t addr, newaddr;
2717 const int N = 10;
2718
2719 /*
2720 * This isn't something kmem_realloc_guard() _needs_ to do,
2721 * we could conceive an implementation where it grows in place
2722 * if there's space after it.
2723 *
2724 * However, this is what the implementation does today.
2725 */
2726 bool realloc_growth_changes_address = true;
2727 bool GL = (kind & KMR_GUARD_LAST);
2728
2729 /*
2730 * Initial N page allocation
2731 */
2732 addr = kmem_alloc_guard(map, ptoa(N), 0,
2733 (kind & (KMA_KOBJECT | KMA_GUARD_LAST)) | KMA_ZERO,
2734 guard).kmr_address;
2735 assert3u(addr, !=, 0);
2736 kmem_test_assert_map(map, N, 1);
2737 for (int pg = 0; pg < N - GL; pg++) {
2738 poke(addr, pg, 42 + pg);
2739 }
2740 for (int pg = N - GL; pg < N; pg++) {
2741 assert_faults(addr, pg);
2742 }
2743
2744
2745 /*
2746 * Grow to N + 3 pages
2747 */
2748 newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
2749 kind | KMR_ZERO, guard).kmr_address;
2750 assert3u(newaddr, !=, 0);
2751 if (realloc_growth_changes_address) {
2752 assert3u(addr, !=, newaddr);
2753 }
2754 if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
2755 kmem_test_assert_map(map, N + 3, 1);
2756 } else {
2757 kmem_test_assert_map(map, 2 * N + 3, 2);
2758 }
2759 for (int pg = 0; pg < N - GL; pg++) {
2760 assert3u(peek(newaddr, pg), ==, 42 + pg);
2761 }
2762 if ((kind & KMR_FREEOLD) == 0) {
2763 for (int pg = 0; pg < N - GL; pg++) {
2764 assert3u(peek(addr, pg), ==, 42 + pg);
2765 }
2766 /* check for tru-share */
2767 poke(addr + 16, 0, 1234);
2768 assert3u(peek(newaddr + 16, 0), ==, 1234);
2769 kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
2770 kmem_test_assert_map(map, N + 3, 1);
2771 }
2772 if (addr != newaddr) {
2773 for (int pg = 0; pg < N - GL; pg++) {
2774 assert_faults(addr, pg);
2775 }
2776 }
2777 for (int pg = N - GL; pg < N + 3 - GL; pg++) {
2778 assert3u(peek(newaddr, pg), ==, 0);
2779 }
2780 for (int pg = N + 3 - GL; pg < N + 3; pg++) {
2781 assert_faults(newaddr, pg);
2782 }
2783 addr = newaddr;
2784
2785
2786 /*
2787 * Shrink to N - 2 pages
2788 */
2789 newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
2790 kind | KMR_ZERO, guard).kmr_address;
2791 assert3u(map->size, ==, ptoa(N - 2));
2792 assert3u(newaddr, ==, addr);
2793 kmem_test_assert_map(map, N - 2, 1);
2794
2795 for (int pg = 0; pg < N - 2 - GL; pg++) {
2796 assert3u(peek(addr, pg), ==, 42 + pg);
2797 }
2798 for (int pg = N - 2 - GL; pg < N + 3; pg++) {
2799 assert_faults(addr, pg);
2800 }
2801
2802 kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
2803 kmem_test_assert_map(map, 0, 0);
2804 }
2805
2806 static int
kmem_basic_test(__unused int64_t in,int64_t * out)2807 kmem_basic_test(__unused int64_t in, int64_t *out)
2808 {
2809 mach_vm_offset_t addr;
2810 vm_map_t map;
2811
2812 printf("%s: test running\n", __func__);
2813
2814 map = kmem_suballoc(kernel_map, &addr, 64U << 20,
2815 VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
2816 KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
2817
2818 printf("%s: kmem_alloc ...\n", __func__);
2819 kmem_alloc_basic_test(map);
2820 printf("%s: PASS\n", __func__);
2821
2822 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
2823 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
2824 printf("%s: PASS\n", __func__);
2825
2826 printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
2827 kmem_realloc_basic_test(map, KMR_FREEOLD);
2828 printf("%s: PASS\n", __func__);
2829
2830 printf("%s: kmem_realloc (KMR_NONE) ...\n", __func__);
2831 kmem_realloc_basic_test(map, KMR_NONE);
2832 printf("%s: PASS\n", __func__);
2833
2834 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
2835 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
2836 printf("%s: PASS\n", __func__);
2837
2838 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
2839 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
2840 printf("%s: PASS\n", __func__);
2841
2842 printf("%s: kmem_realloc (KMR_GUARD_LAST) ...\n", __func__);
2843 kmem_realloc_basic_test(map, KMR_GUARD_LAST);
2844 printf("%s: PASS\n", __func__);
2845
2846 /* using KMR_DATA signals to test the non atomic realloc path */
2847 printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
2848 kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
2849 printf("%s: PASS\n", __func__);
2850
2851 printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
2852 kmem_realloc_basic_test(map, KMR_DATA);
2853 printf("%s: PASS\n", __func__);
2854
2855 kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
2856 vm_map_deallocate(map);
2857
2858 printf("%s: test passed\n", __func__);
2859 *out = 1;
2860 return 0;
2861 }
2862 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
2863 #endif /* DEBUG || DEVELOPMENT */
2864