1 /*
2 * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/kalloc.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1985
62 *
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
65 */
66
67 #include <mach/boolean.h>
68 #include <mach/sdt.h>
69 #include <mach/machine/vm_types.h>
70 #include <mach/vm_param.h>
71 #include <kern/misc_protos.h>
72 #include <kern/zalloc_internal.h>
73 #include <kern/kalloc.h>
74 #include <kern/ledger.h>
75 #include <kern/backtrace.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_map.h>
79 #include <sys/kdebug.h>
80
81 #include <san/kasan.h>
82 #include <libkern/section_keywords.h>
83 #include <libkern/prelink.h>
84
85 /* #define KALLOC_DEBUG 1 */
86
87 #define KiB(x) (1024 * (x))
88 #define MeB(x) (1024 * 1024 * (x))
89
90 #define KALLOC_MAP_SIZE_MIN MeB(16)
91 #define KALLOC_MAP_SIZE_MAX MeB(128)
92
93 #if __LP64__
94 #define KALLOC_KERNMAP_ALLOC_THRESHOLD (MeB(1))
95 #else
96 #define KALLOC_KERNMAP_ALLOC_THRESHOLD (KiB(256))
97 #endif
98
99 #define EMPTY_RANGE (struct zone_map_range){0,0}
100
101 static SECURITY_READ_ONLY_LATE(struct zone_map_range) kernel_data_map_range;
102 static SECURITY_READ_ONLY_LATE(struct zone_map_range) kalloc_data_or_kernel_data_range;
103 static SECURITY_READ_ONLY_LATE(vm_size_t) kalloc_map_size;
104 SECURITY_READ_ONLY_LATE(vm_size_t) kalloc_max_prerounded;
105 static SECURITY_READ_ONLY_LATE(struct zone_map_range) kalloc_large_range[KHEAP_ID_COUNT];
106
107 /* size of kallocs that can come from kernel map */
108 static SECURITY_READ_ONLY_LATE(vm_map_t) kernel_data_map;
109 static SECURITY_READ_ONLY_LATE(vm_map_t) kalloc_large_map;
110 static SECURITY_READ_ONLY_LATE(vm_map_t) kalloc_large_data_map;
111
112 /* how many times we couldn't allocate out of kalloc_large_map and fell back to kernel_map */
113 unsigned long kalloc_fallback_count;
114
115 uint_t kalloc_large_inuse;
116 vm_size_t kalloc_large_total;
117 vm_size_t kalloc_large_max;
118 vm_size_t kalloc_largest_allocated = 0;
119 uint64_t kalloc_large_sum;
120
121 LCK_GRP_DECLARE(kalloc_lck_grp, "kalloc.large");
122 LCK_SPIN_DECLARE(kalloc_lock, &kalloc_lck_grp);
123
124 #define kalloc_spin_lock() lck_spin_lock(&kalloc_lock)
125 #define kalloc_unlock() lck_spin_unlock(&kalloc_lock)
126
127 #pragma mark initialization
128
129 /*
130 * All allocations of size less than kalloc_max are rounded to the next nearest
131 * sized zone. This allocator is built on top of the zone allocator. A zone
132 * is created for each potential size that we are willing to get in small
133 * blocks.
134 *
135 * kalloc_max_prerounded, which is equivalent to kheap->kalloc_max, is the
136 * smallest allocation size, before rounding, for which no zone exists.
137 *
138 * Also if the allocation size is more than KALLOC_KERNMAP_ALLOC_THRESHOLD then allocate
139 * from kernel map rather than kalloc_large_map.
140 */
141
142 /*
143 * The k_zone_cfg table defines the configuration of zones on various platforms.
144 * The currently defined list of zones and their per-CPU caching behavior are as
145 * follows
146 *
147 * X:zone not present
148 * N:zone present no cpu-caching
149 * Y:zone present with cpu-caching
150 *
151 * Size macOS(64-bit) embedded(32-bit) embedded(64-bit)
152 *-------- ---------------- ---------------- ----------------
153 *
154 * 8 X Y X
155 * 16 Y Y Y
156 * 24 X Y X
157 * 32 Y Y Y
158 * 40 X Y X
159 * 48 Y Y Y
160 * 64 Y Y Y
161 * 72 X Y X
162 * 80 Y X Y
163 * 88 X Y X
164 * 96 Y X Y
165 * 112 X Y X
166 * 128 Y Y Y
167 * 160 Y X Y
168 * 192 Y Y Y
169 * 224 Y X Y
170 * 256 Y Y Y
171 * 288 Y Y Y
172 * 368 Y X Y
173 * 384 X Y X
174 * 400 Y X Y
175 * 440 X Y X
176 * 512 Y Y Y
177 * 576 Y N N
178 * 768 Y N N
179 * 1024 Y Y Y
180 * 1152 N N N
181 * 1280 N N N
182 * 1536 X N X
183 * 1664 N X N
184 * 2048 Y N N
185 * 2128 X N X
186 * 3072 X N X
187 * 4096 Y N N
188 * 6144 N N N
189 * 8192 Y N N
190 * 12288 N X X
191 * 16384 N X N
192 * 32768 X X N
193 *
194 */
195 struct kalloc_zone_cfg {
196 bool kzc_caching;
197 uint32_t kzc_size;
198 char kzc_name[MAX_ZONE_NAME];
199 };
200
201 #define KZC_ENTRY(SIZE, caching) { \
202 .kzc_caching = (caching), \
203 .kzc_size = (SIZE), \
204 .kzc_name = "kalloc." #SIZE \
205 }
206 static SECURITY_READ_ONLY_LATE(struct kalloc_zone_cfg) k_zone_cfg[] = {
207 #if !defined(XNU_TARGET_OS_OSX)
208
209 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
210 /* Zone config for embedded 64-bit platforms */
211 KZC_ENTRY(16, true),
212 KZC_ENTRY(32, true),
213 KZC_ENTRY(48, true),
214 KZC_ENTRY(64, true),
215 KZC_ENTRY(80, true),
216 KZC_ENTRY(96, true),
217 KZC_ENTRY(128, true),
218 KZC_ENTRY(160, true),
219 KZC_ENTRY(192, true),
220 KZC_ENTRY(224, true),
221 KZC_ENTRY(256, true),
222 KZC_ENTRY(288, true),
223 KZC_ENTRY(368, true),
224 KZC_ENTRY(400, true),
225 KZC_ENTRY(512, true),
226 KZC_ENTRY(576, false),
227 KZC_ENTRY(768, false),
228 KZC_ENTRY(1024, true),
229 KZC_ENTRY(1152, false),
230 KZC_ENTRY(1280, false),
231 KZC_ENTRY(1664, false),
232 KZC_ENTRY(2048, false),
233 KZC_ENTRY(4096, false),
234 KZC_ENTRY(6144, false),
235 KZC_ENTRY(8192, false),
236 KZC_ENTRY(16384, false),
237 KZC_ENTRY(32768, false),
238
239 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
240 /* Zone config for embedded 32-bit platforms */
241 KZC_ENTRY(8, true),
242 KZC_ENTRY(16, true),
243 KZC_ENTRY(24, true),
244 KZC_ENTRY(32, true),
245 KZC_ENTRY(40, true),
246 KZC_ENTRY(48, true),
247 KZC_ENTRY(64, true),
248 KZC_ENTRY(72, true),
249 KZC_ENTRY(88, true),
250 KZC_ENTRY(112, true),
251 KZC_ENTRY(128, true),
252 KZC_ENTRY(192, true),
253 KZC_ENTRY(256, true),
254 KZC_ENTRY(288, true),
255 KZC_ENTRY(384, true),
256 KZC_ENTRY(440, true),
257 KZC_ENTRY(512, true),
258 KZC_ENTRY(576, false),
259 KZC_ENTRY(768, false),
260 KZC_ENTRY(1024, true),
261 KZC_ENTRY(1152, false),
262 KZC_ENTRY(1280, false),
263 KZC_ENTRY(1536, false),
264 KZC_ENTRY(2048, false),
265 KZC_ENTRY(2128, false),
266 KZC_ENTRY(3072, false),
267 KZC_ENTRY(4096, false),
268 KZC_ENTRY(6144, false),
269 KZC_ENTRY(8192, false),
270 /* To limit internal fragmentation, only add the following zones if the
271 * page size is greater than 4K.
272 * Note that we use ARM_PGBYTES here (instead of one of the VM macros)
273 * since it's guaranteed to be a compile time constant.
274 */
275 #if ARM_PGBYTES > 4096
276 KZC_ENTRY(16384, false),
277 KZC_ENTRY(32768, false),
278 #endif /* ARM_PGBYTES > 4096 */
279
280 #else
281 #error missing or invalid zone size parameters for kalloc
282 #endif
283
284 #else /* !defined(XNU_TARGET_OS_OSX) */
285
286 /* Zone config for macOS 64-bit platforms */
287 KZC_ENTRY(16, true),
288 KZC_ENTRY(32, true),
289 KZC_ENTRY(48, true),
290 KZC_ENTRY(64, true),
291 KZC_ENTRY(80, true),
292 KZC_ENTRY(96, true),
293 KZC_ENTRY(128, true),
294 KZC_ENTRY(160, true),
295 KZC_ENTRY(192, true),
296 KZC_ENTRY(224, true),
297 KZC_ENTRY(256, true),
298 KZC_ENTRY(288, true),
299 KZC_ENTRY(368, true),
300 KZC_ENTRY(400, true),
301 KZC_ENTRY(512, true),
302 KZC_ENTRY(576, true),
303 KZC_ENTRY(768, true),
304 KZC_ENTRY(1024, true),
305 KZC_ENTRY(1152, false),
306 KZC_ENTRY(1280, false),
307 KZC_ENTRY(1664, false),
308 KZC_ENTRY(2048, true),
309 KZC_ENTRY(4096, true),
310 KZC_ENTRY(6144, false),
311 KZC_ENTRY(8192, true),
312 #if __x86_64__
313 KZC_ENTRY(12288, false),
314 #endif /* __x86_64__ */
315 KZC_ENTRY(16384, false),
316 #if __arm64__
317 KZC_ENTRY(32768, false),
318 #endif
319 #endif /* !defined(XNU_TARGET_OS_OSX) */
320 };
321
322
323 static SECURITY_READ_ONLY_LATE(struct kalloc_zone_cfg) k_zone_cfg_data[] = {
324 KZC_ENTRY(16, true),
325 KZC_ENTRY(32, true),
326 KZC_ENTRY(48, true),
327 KZC_ENTRY(64, true),
328 KZC_ENTRY(96, true),
329 KZC_ENTRY(128, true),
330 KZC_ENTRY(160, true),
331 KZC_ENTRY(192, true),
332 KZC_ENTRY(256, true),
333 KZC_ENTRY(368, true),
334 KZC_ENTRY(512, true),
335 KZC_ENTRY(768, false),
336 KZC_ENTRY(1024, true),
337 KZC_ENTRY(1152, false),
338 KZC_ENTRY(1664, false),
339 KZC_ENTRY(2048, false),
340 KZC_ENTRY(4096, false),
341 KZC_ENTRY(6144, false),
342 KZC_ENTRY(8192, false),
343 KZC_ENTRY(16384, false),
344 #if __arm64__
345 KZC_ENTRY(32768, false),
346 #endif
347 };
348 #undef KZC_ENTRY
349
350 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
351
352 /*
353 * Many kalloc() allocations are for small structures containing a few
354 * pointers and longs - the dlut[] direct lookup table, indexed by
355 * size normalized to the minimum alignment, finds the right zone index
356 * for them in one dereference.
357 */
358
359 #define INDEX_ZDLUT(size) (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
360 #define MAX_SIZE_ZDLUT ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
361
362 static SECURITY_READ_ONLY_LATE(zone_t) k_zone_default[MAX_K_ZONE(k_zone_cfg)];
363 static SECURITY_READ_ONLY_LATE(zone_t) k_zone_data[MAX_K_ZONE(k_zone_cfg_data)];
364 static SECURITY_READ_ONLY_LATE(zone_t) k_zone_kext[MAX_K_ZONE(k_zone_cfg)];
365
366 #if VM_TAG_SIZECLASSES
367 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(k_zone_cfg));
368 #endif
369
370 const char * const kalloc_heap_names[] = {
371 [KHEAP_ID_NONE] = "",
372 [KHEAP_ID_DEFAULT] = "default.",
373 [KHEAP_ID_DATA_BUFFERS] = "data.",
374 [KHEAP_ID_KEXT] = "kext.",
375 };
376
377 /*
378 * Default kalloc heap configuration
379 */
380 static SECURITY_READ_ONLY_LATE(struct kheap_zones) kalloc_zones_default = {
381 .cfg = k_zone_cfg,
382 .heap_id = KHEAP_ID_DEFAULT,
383 .k_zone = k_zone_default,
384 .max_k_zone = MAX_K_ZONE(k_zone_cfg)
385 };
386 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DEFAULT[1] = {
387 {
388 .kh_zones = &kalloc_zones_default,
389 .kh_name = "default.",
390 .kh_heap_id = KHEAP_ID_DEFAULT,
391 }
392 };
393
394
395 /*
396 * Bag of bytes heap configuration
397 */
398 static SECURITY_READ_ONLY_LATE(struct kheap_zones) kalloc_zones_data = {
399 .cfg = k_zone_cfg_data,
400 .heap_id = KHEAP_ID_DATA_BUFFERS,
401 .k_zone = k_zone_data,
402 .max_k_zone = MAX_K_ZONE(k_zone_cfg_data)
403 };
404 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
405 {
406 .kh_zones = &kalloc_zones_data,
407 .kh_name = "data.",
408 .kh_heap_id = KHEAP_ID_DATA_BUFFERS,
409 }
410 };
411
412
413 /*
414 * Kext heap configuration
415 */
416 static SECURITY_READ_ONLY_LATE(struct kheap_zones) kalloc_zones_kext = {
417 .cfg = k_zone_cfg,
418 .heap_id = KHEAP_ID_KEXT,
419 .k_zone = k_zone_kext,
420 .max_k_zone = MAX_K_ZONE(k_zone_cfg)
421 };
422 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KEXT[1] = {
423 {
424 .kh_zones = &kalloc_zones_kext,
425 .kh_name = "kext.",
426 .kh_heap_id = KHEAP_ID_KEXT,
427 }
428 };
429
430 KALLOC_HEAP_DEFINE(KERN_OS_MALLOC, "kern_os_malloc", KHEAP_ID_KEXT);
431
432 /*
433 * Initialize kalloc heap: Create zones, generate direct lookup table and
434 * do a quick test on lookups
435 */
436 __startup_func
437 static void
kalloc_zones_init(struct kalloc_heap * kheap)438 kalloc_zones_init(struct kalloc_heap *kheap)
439 {
440 struct kheap_zones *zones = kheap->kh_zones;
441 struct kalloc_zone_cfg *cfg = zones->cfg;
442 zone_t *k_zone = zones->k_zone;
443 vm_size_t size;
444
445 /*
446 * Allocate a zone for each size we are going to handle.
447 */
448 zones->kalloc_max = (zones->cfg[zones->max_k_zone - 1].kzc_size) + 1;
449 for (uint32_t i = 0; i < zones->max_k_zone &&
450 (size = cfg[i].kzc_size) < zones->kalloc_max; i++) {
451 zone_create_flags_t flags = ZC_KASAN_NOREDZONE |
452 ZC_KASAN_NOQUARANTINE | ZC_KALLOC_HEAP;
453 if (cfg[i].kzc_caching) {
454 flags |= ZC_CACHING;
455 }
456
457 k_zone[i] = zone_create_ext(cfg[i].kzc_name, size, flags,
458 ZONE_ID_ANY, ^(zone_t z){
459 zone_security_array[zone_index(z)].z_kheap_id = (uint8_t)zones->heap_id;
460 });
461 /*
462 * Set the updated elem size back to the config
463 */
464 uint32_t elem_size = k_zone[i]->z_elem_size;
465 if (cfg[i].kzc_size != elem_size) {
466 cfg[i].kzc_size = elem_size;
467 snprintf(cfg[i].kzc_name, MAX_ZONE_NAME, "kalloc.%u", elem_size);
468 }
469 }
470
471 /*
472 * Set large maps and fallback maps for each zone
473 */
474 if (ZSECURITY_ENABLED(KERNEL_DATA_MAP) && kheap == KHEAP_DATA_BUFFERS) {
475 kheap->kh_large_map = kalloc_large_data_map;
476 kheap->kh_fallback_map = kernel_data_map;
477 kheap->kh_tag = VM_KERN_MEMORY_KALLOC_DATA;
478 } else {
479 kheap->kh_large_map = kalloc_large_map;
480 kheap->kh_fallback_map = kernel_map;
481 kheap->kh_tag = VM_KERN_MEMORY_KALLOC;
482 }
483
484 /*
485 * Count all the "raw" views for zones in the heap.
486 */
487 zone_view_count += zones->max_k_zone;
488
489 /*
490 * Build the Direct LookUp Table for small allocations
491 * As k_zone_cfg is shared between the heaps the
492 * Direct LookUp Table is also shared and doesn't need to
493 * be rebuilt per heap.
494 */
495 size = 0;
496 for (int i = 0; i <= KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
497 uint8_t zindex = 0;
498
499 while ((vm_size_t)(cfg[zindex].kzc_size) < size) {
500 zindex++;
501 }
502
503 if (i == KALLOC_DLUT_SIZE) {
504 zones->k_zindex_start = zindex;
505 break;
506 }
507 zones->dlut[i] = zindex;
508 }
509
510 #ifdef KALLOC_DEBUG
511 printf("kalloc_init: k_zindex_start %d\n", zones->k_zindex_start);
512
513 /*
514 * Do a quick synthesis to see how well/badly we can
515 * find-a-zone for a given size.
516 * Useful when debugging/tweaking the array of zone sizes.
517 * Cache misses probably more critical than compare-branches!
518 */
519 for (uint32_t i = 0; i < zones->max_k_zone; i++) {
520 vm_size_t testsize = (vm_size_t)(cfg[i].kzc_size - 1);
521 int compare = 0;
522 uint8_t zindex;
523
524 if (testsize < MAX_SIZE_ZDLUT) {
525 compare += 1; /* 'if' (T) */
526
527 long dindex = INDEX_ZDLUT(testsize);
528 zindex = (int)zones->dlut[dindex];
529 } else if (testsize < zones->kalloc_max) {
530 compare += 2; /* 'if' (F), 'if' (T) */
531
532 zindex = zones->k_zindex_start;
533 while ((vm_size_t)(cfg[zindex].kzc_size) < testsize) {
534 zindex++;
535 compare++; /* 'while' (T) */
536 }
537 compare++; /* 'while' (F) */
538 } else {
539 break; /* not zone-backed */
540 }
541 zone_t z = k_zone[zindex];
542 printf("kalloc_init: req size %4lu: %8s.%16s took %d compare%s\n",
543 (unsigned long)testsize, kalloc_heap_names[zones->heap_id],
544 z->z_name, compare, compare == 1 ? "" : "s");
545 }
546 #endif
547 }
548
549 /*
550 * Initialize the memory allocator. This should be called only
551 * once on a system wide basis (i.e. first processor to get here
552 * does the initialization).
553 *
554 * This initializes all of the zones.
555 */
556
557 __startup_func
558 void
kalloc_init_maps(vm_address_t min_address)559 kalloc_init_maps(vm_address_t min_address)
560 {
561 kern_return_t retval;
562 vm_map_kernel_flags_t vmk_flags;
563 vm_size_t data_map_size;
564 struct zone_map_range range, *cur;
565
566 /*
567 * Scale the kalloc_map_size to physical memory size: stay below
568 * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel).
569 */
570 kalloc_map_size = round_page((vm_size_t)((sane_size >> 2) / 10));
571 #if !__LP64__
572 if (kalloc_map_size > KALLOC_MAP_SIZE_MAX) {
573 kalloc_map_size = KALLOC_MAP_SIZE_MAX;
574 }
575 #endif /* !__LP64__ */
576 if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) {
577 kalloc_map_size = KALLOC_MAP_SIZE_MIN;
578 }
579
580 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
581 vmk_flags.vmkf_permanent = TRUE;
582
583 /* map for large allocations */
584
585 retval = kmem_suballoc(kernel_map, &range.min_address, kalloc_map_size,
586 FALSE, VM_FLAGS_ANYWHERE, vmk_flags,
587 VM_KERN_MEMORY_KALLOC, &kalloc_large_map);
588 if (retval != KERN_SUCCESS) {
589 panic("kalloc_large_data_map: kmem_suballoc failed %d", retval);
590 }
591 range.max_address = range.min_address + kalloc_map_size;
592 #if DEBUG || DEVELOPMENT
593 printf("kalloc: deflt map %p:%p (%zuM)\n",
594 (void *)range.min_address, (void *)range.max_address,
595 (size_t)zone_range_size(&range) >> 20);
596 #endif /* DEBUG || DEVELOPMENT */
597
598 /* unless overridden below, all kalloc heaps share the same range */
599 kalloc_large_range[KHEAP_ID_DEFAULT] = range;
600 kalloc_large_range[KHEAP_ID_KEXT] = range;
601 kalloc_large_range[KHEAP_ID_DATA_BUFFERS] = range;
602
603 min_address = MAX(min_address, range.max_address);
604
605 if (ZSECURITY_ENABLED(KERNEL_DATA_MAP)) {
606 vm_map_size_t largest_free_size;
607
608 vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
609 data_map_size = (vm_size_t)(largest_free_size / 2);
610 data_map_size -= (data_map_size % MeB(1));
611
612 kalloc_data_or_kernel_data_range = (struct zone_map_range){
613 .min_address = min_address,
614 .max_address = min_address + data_map_size,
615 };
616
617 /* map for large user controlled data allocations */
618
619 cur = &kalloc_large_range[KHEAP_ID_DATA_BUFFERS];
620 cur->min_address = min_address;
621 cur->max_address = min_address + kalloc_map_size;
622 #if DEBUG || DEVELOPMENT
623 printf("kalloc: data map %p:%p (%zuM)\n",
624 (void *)cur->min_address, (void *)cur->max_address,
625 (size_t)zone_range_size(cur) >> 20);
626 #endif /* DEBUG || DEVELOPMENT */
627
628 retval = kmem_suballoc(kernel_map, &cur->min_address,
629 kalloc_map_size, FALSE, VM_FLAGS_FIXED, vmk_flags,
630 VM_KERN_MEMORY_KALLOC_DATA, &kalloc_large_data_map);
631 if (retval != KERN_SUCCESS) {
632 panic("kalloc_large_data_map: kmem_suballoc failed %d",
633 retval);
634 }
635
636
637 /* kernel data map for user controlled data */
638
639 cur = &kernel_data_map_range;
640 cur->min_address = min_address + kalloc_map_size;
641 cur->max_address = min_address + data_map_size;
642 #if DEBUG || DEVELOPMENT
643 printf("kernel: data map %p:%p (%zuM)\n",
644 (void *)cur->min_address, (void *)cur->max_address,
645 (size_t)zone_range_size(cur) >> 20);
646 #endif /* DEBUG || DEVELOPMENT */
647
648 retval = kmem_suballoc(kernel_map, &cur->min_address,
649 data_map_size - kalloc_map_size,
650 FALSE, VM_FLAGS_FIXED, vmk_flags,
651 VM_KERN_MEMORY_KALLOC_DATA, &kernel_data_map);
652 if (retval != KERN_SUCCESS) {
653 panic("kalloc_large_data_map: kmem_suballoc failed %d",
654 retval);
655 }
656 }
657 }
658
659 __startup_func
660 static void
kalloc_init(void)661 kalloc_init(void)
662 {
663 /* allocations larger than 16 times kalloc_max go directly to kernel map */
664 kalloc_largest_allocated = KALLOC_KERNMAP_ALLOC_THRESHOLD;
665
666 /* Initialize kalloc default heap */
667 kalloc_zones_init(KHEAP_DEFAULT);
668 kalloc_max_prerounded = KHEAP_DEFAULT->kh_zones->kalloc_max;
669 assert(kalloc_max_prerounded > KALLOC_SAFE_ALLOC_SIZE);
670
671 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
672 /* Initialize kalloc data buffers heap */
673 kalloc_zones_init(KHEAP_DATA_BUFFERS);
674 #else
675 *KHEAP_DATA_BUFFERS = *KHEAP_DEFAULT;
676 #endif
677
678 #if ZSECURITY_CONFIG(SEQUESTER_KEXT_KALLOC)
679 /* Initialize kalloc kext heap */
680 kalloc_zones_init(KHEAP_KEXT);
681 #else
682 *KHEAP_KEXT = *KHEAP_DEFAULT;
683 #endif
684 }
685 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
686
687 #define KEXT_ALIGN_SHIFT 6
688 #define KEXT_ALIGN_BYTES (1<< KEXT_ALIGN_SHIFT)
689 #define KEXT_ALIGN_MASK (KEXT_ALIGN_BYTES-1)
690 #define kt_scratch_size (256ul << 10)
691
692 #if XNU_TARGET_OS_WATCH
693 #define KT_ZBUDGET 85
694 #else
695 #define KT_ZBUDGET 200
696 #endif
697
698 TUNABLE(kalloc_type_options_t, kt_options, "kt", KT_OPTIONS_ON);
699 TUNABLE(uint16_t, kt_zone_budget, "kt_zbudget", KT_ZBUDGET);
700
701 extern struct kalloc_type_view kalloc_types[]
702 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
703
704 extern struct kalloc_type_view kalloc_types_end[]
705 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
706
707 __startup_data
708 static kalloc_type_view_t *kt_buffer = NULL;
709 __startup_data
710 static uint64_t kt_count;
711
712 #if DEBUG || DEVELOPMENT
713 /*
714 * For use by lldb to iterate over kalloc types
715 */
716 uint64_t num_kt_sizeclass = MAX_K_ZONE(k_zone_cfg);
717 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(k_zone_cfg)];
718 #endif
719
720 static_assert(KT_VM_TAG_MASK == Z_VM_TAG_MASK, "validate vm tag mask");
721 static_assert(MAX_K_ZONE(k_zone_cfg) < KALLOC_TYPE_IDX_MASK,
722 "validate idx mask");
723 /* qsort routines */
724 typedef int (*cmpfunc_t)(const void *a, const void *b);
725 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
726
727 static uint32_t
kalloc_idx_for_size(kalloc_heap_t kheap,uint32_t size)728 kalloc_idx_for_size(kalloc_heap_t kheap, uint32_t size)
729 {
730 struct kheap_zones *khz = kheap->kh_zones;
731 uint16_t idx;
732 if (size >= khz->kalloc_max) {
733 assert(size <= KALLOC_TYPE_SIZE_MASK);
734 return kalloc_type_set_idx(size, KALLOC_TYPE_IDX_MASK);
735 }
736
737 if (size < MAX_SIZE_ZDLUT) {
738 idx = khz->dlut[INDEX_ZDLUT(size)];
739 return kalloc_type_set_idx(size, idx);
740 }
741
742 idx = khz->k_zindex_start;
743 while (khz->cfg[idx].kzc_size < size) {
744 idx++;
745 }
746 return kalloc_type_set_idx(size, idx);
747 }
748
749 static zone_t
kalloc_heap_zone_for_idx(kalloc_heap_t kheap,uint16_t zindex)750 kalloc_heap_zone_for_idx(kalloc_heap_t kheap, uint16_t zindex)
751 {
752 struct kheap_zones *khz = kheap->kh_zones;
753 return khz->k_zone[zindex];
754 }
755
756 static void
kalloc_type_assign_zone(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,vm_tag_t type_tag __unused)757 kalloc_type_assign_zone(kalloc_type_view_t *cur, kalloc_type_view_t *end,
758 zone_t z, vm_tag_t type_tag __unused)
759 {
760 /*
761 * Assign the zone created for every kalloc_type_view
762 * of the same unique signature
763 */
764 bool need_raw_view = false;
765 while (cur < end) {
766 kalloc_type_view_t kt = *cur;
767 struct zone_view *zv = &kt->kt_zv;
768 zv->zv_zone = z;
769
770 kalloc_type_flags_t kt_flags = kt->kt_flags;
771 if (kt_flags & KT_PRIV_ACCT ||
772 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
773 zv->zv_stats = zalloc_percpu_permanent_type(
774 struct zone_stats);
775 need_raw_view = true;
776 zone_view_count += 1;
777 } else {
778 zv->zv_stats = z->z_stats;
779 }
780 zv->zv_next = (zone_view_t) z->z_views;
781 zv->zv_zone->z_views = (zone_view_t) kt;
782 #if VM_TAG_SIZECLASSES
783 /*
784 * If there isn't a tag provided at the callsite
785 * collapse into VM_KERN_MEMORY_KALLOC_TYPE or
786 * VM_KERN_MEMORY_KALLOC_DATA respectively.
787 */
788 if (__improbable(z->z_uses_tags)) {
789 vm_tag_t tag = zalloc_flags_get_tag((zalloc_flags_t) kt->kt_flags);
790 if (tag == VM_KERN_MEMORY_NONE) {
791 kt->kt_flags |= Z_VM_TAG(type_tag);
792 }
793 }
794 #endif
795 cur++;
796 }
797 if (need_raw_view) {
798 zone_view_count += 1;
799 }
800 }
801
802 /*
803 * Check if signature of type is made up of only data and padding
804 */
805 static bool
kalloc_type_is_data(const char * kt_signature)806 kalloc_type_is_data(const char *kt_signature)
807 {
808 while ((*kt_signature == '2') || (*kt_signature == '0')) {
809 kt_signature++;
810 }
811 return *kt_signature == '\0';
812 }
813
814 __startup_func
815 static void
kalloc_type_view_copy(kalloc_type_view_t start,kalloc_type_view_t end,uint64_t * cur_count,vm_offset_t slide)816 kalloc_type_view_copy(kalloc_type_view_t start, kalloc_type_view_t end,
817 uint64_t *cur_count, vm_offset_t slide)
818 {
819 struct kalloc_type_view *cur = start;
820 uint64_t count = end - start;
821 if (count + *cur_count >= kt_count) {
822 panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
823 }
824 while (cur < end) {
825 cur->kt_signature += slide;
826 cur->kt_zv.zv_name += slide;
827 /*
828 * If signature indicates that the entire allocation is data move it to
829 * KHEAP_DATA_BUFFERS
830 */
831 if (kalloc_type_is_data(cur->kt_signature)) {
832 cur->kt_size = kalloc_idx_for_size(KHEAP_DATA_BUFFERS, cur->kt_size);
833 uint16_t kt_idx = kalloc_type_get_idx(cur->kt_size);
834 if (kt_idx != KALLOC_TYPE_IDX_MASK) {
835 zone_t z = kalloc_heap_zone_for_idx(KHEAP_DATA_BUFFERS, kt_idx);
836 kalloc_type_assign_zone(&cur, &cur + 1, z, VM_KERN_MEMORY_KALLOC_DATA);
837 }
838 cur++;
839 continue;
840 }
841
842 cur->kt_size = kalloc_idx_for_size(KHEAP_DEFAULT, cur->kt_size);
843
844 kt_buffer[*cur_count] = cur;
845 cur++;
846 *cur_count = *cur_count + 1;
847 }
848 }
849
850 __startup_func
851 static uint64_t
kalloc_type_view_parse(void)852 kalloc_type_view_parse(void)
853 {
854 kc_format_t kc_format;
855 uint64_t cur_count = 0;
856
857 if (!PE_get_primary_kc_format(&kc_format)) {
858 panic("kalloc_type_view_parse: wasn't able to determine kc format");
859 }
860
861 if (kc_format == KCFormatStatic) {
862 /*
863 * If kc is static or KCGEN, __kalloc_type sections from kexts and
864 * xnu are coalesced.
865 */
866 kalloc_type_view_copy(kalloc_types, kalloc_types_end, &cur_count, 0);
867 } else if (kc_format == KCFormatFileset) {
868 /*
869 * If kc uses filesets, traverse __kalloc_type section for each
870 * macho in the BootKC.
871 */
872 kernel_mach_header_t *kc_mh = NULL;
873 kernel_mach_header_t *kext_mh = NULL;
874 char *fse_name = NULL;
875 kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
876 struct load_command *lc =
877 (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
878 for (uint32_t i = 0; i < kc_mh->ncmds;
879 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
880 if (lc->cmd != LC_FILESET_ENTRY) {
881 continue;
882 }
883 struct fileset_entry_command *fse =
884 (struct fileset_entry_command *)(vm_offset_t)lc;
885 fse_name = (char *)((vm_offset_t)fse +
886 (vm_offset_t)(fse->entry_id.offset));
887 kext_mh = (kernel_mach_header_t *)fse->vmaddr;
888 kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
889 kext_mh, KALLOC_TYPE_SEGMENT, "__kalloc_type");
890 if (sect != NULL) {
891 kalloc_type_view_copy((kalloc_type_view_t) sect->addr,
892 (kalloc_type_view_t)(sect->addr + sect->size), &cur_count, 0);
893 }
894 }
895 } else if (kc_format == KCFormatKCGEN) {
896 /*
897 * Parse __kalloc_type section from xnu
898 */
899 kalloc_type_view_copy(kalloc_types, kalloc_types_end, &cur_count, 0);
900
901 #if defined(__LP64__)
902 /*
903 * Parse __kalloc_type section for kexts
904 *
905 * Note: We don't process the kalloc_type_views for kexts on armv7
906 * as this platform has insufficient memory for type based
907 * segregation. kalloc_type_impl_external will direct callsites
908 * based on their size.
909 */
910 kernel_mach_header_t *xnu_mh = &_mh_execute_header;
911 vm_offset_t cur = 0;
912 vm_offset_t end = 0;
913 vm_offset_t kext_slide = vm_kernel_slide;
914
915 /*
916 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
917 * and traverse it.
918 */
919 kernel_section_t *prelink_sect = getsectbynamefromheader(
920 xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
921 assert(prelink_sect);
922 cur = prelink_sect->addr;
923 end = prelink_sect->addr + prelink_sect->size;
924
925 while (cur < end) {
926 uint64_t kext_text_sz = 0;
927 kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
928
929 if (kext_mh->magic == 0) {
930 /*
931 * Assert that we have processed all kexts and all that is left
932 * is padding
933 */
934 assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
935 break;
936 } else if (kext_mh->magic != MH_MAGIC_64 &&
937 kext_mh->magic != MH_CIGAM_64) {
938 panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
939 cur);
940 }
941
942 /*
943 * Kext macho found, iterate through its segments
944 */
945 struct load_command *lc =
946 (struct load_command *)(cur + sizeof(kernel_mach_header_t));
947 bool isSplitKext = false;
948
949 for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
950 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
951 if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
952 isSplitKext = true;
953 continue;
954 } else if (lc->cmd != LC_SEGMENT_64) {
955 continue;
956 }
957
958 kernel_segment_command_t *seg_cmd =
959 (struct segment_command_64 *)(vm_offset_t)lc;
960 /*
961 * Parse kalloc_type section
962 */
963 if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
964 kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
965 KALLOC_TYPE_SEGMENT, "__kalloc_type");
966 if (kt_sect) {
967 kalloc_type_view_copy(
968 (kalloc_type_view_t) (kt_sect->addr + kext_slide),
969 (kalloc_type_view_t)(kt_sect->addr +
970 kt_sect->size + kext_slide), &cur_count, kext_slide);
971 }
972 }
973 /*
974 * If the kext has a __TEXT segment, that is the only thing that
975 * will be in the special __PRELINK_TEXT KC segment, so the next
976 * macho is right after.
977 */
978 if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
979 kext_text_sz = seg_cmd->filesize;
980 }
981 }
982 /*
983 * If the kext did not have a __TEXT segment (special xnu kexts with
984 * only a __LINKEDIT segment) then the next macho will be after all the
985 * header commands.
986 */
987 if (!kext_text_sz) {
988 kext_text_sz = kext_mh->sizeofcmds;
989 } else if (!isSplitKext) {
990 panic("kalloc_type_view_parse: No support for non-split seg KCs");
991 break;
992 }
993
994 cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
995 }
996
997 #endif
998 } else {
999 /*
1000 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1001 * parsing kalloc_type_view structs during startup.
1002 */
1003 panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1004 " for kc_format = %d\n", kc_format);
1005 }
1006 return cur_count;
1007 }
1008
1009 static int
kalloc_type_cmp(const void * a,const void * b)1010 kalloc_type_cmp(const void *a, const void *b)
1011 {
1012 const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1013 const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1014
1015 const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1016 const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1017 /*
1018 * If the kalloc_type_views are in the same kalloc bucket, sort by
1019 * signature else sort by size
1020 */
1021 if (idxA == idxB) {
1022 int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1023 /*
1024 * If the kalloc_type_views have the same signature sort by site
1025 * name
1026 */
1027 if (result == 0) {
1028 return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1029 }
1030 return result;
1031 }
1032 const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1033 const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1034 return (int)(sizeA - sizeB);
1035 }
1036
1037 static uint16_t *
kalloc_type_create_iterators(uint16_t * kt_skip_list_start,uint16_t * kt_freq_list,uint16_t * kt_freq_list_total,uint64_t count)1038 kalloc_type_create_iterators(uint16_t *kt_skip_list_start,
1039 uint16_t *kt_freq_list, uint16_t *kt_freq_list_total, uint64_t count)
1040 {
1041 uint16_t *kt_skip_list = kt_skip_list_start;
1042 /*
1043 * cur and prev kalloc size bucket
1044 */
1045 uint16_t p_idx = 0;
1046 uint16_t c_idx = 0;
1047
1048 /*
1049 * Init values
1050 */
1051 uint16_t unique_sig = 1;
1052 uint16_t total_sig = 0;
1053 kt_skip_list++;
1054 const char *p_sig = "";
1055 const char *p_name = "";
1056
1057 /*
1058 * Walk over each kalloc_type_view
1059 */
1060 for (uint16_t i = 0; i < count; i++) {
1061 kalloc_type_view_t kt = kt_buffer[i];
1062 c_idx = kalloc_type_get_idx(kt->kt_size);
1063 /*
1064 * When current kalloc_type_view is in a different kalloc size
1065 * bucket than the previous, it means we have processed all in
1066 * the previous size bucket, so store the accumulated values
1067 * and advance the indices.
1068 */
1069 if (c_idx != p_idx) {
1070 /*
1071 * Updates for frequency lists
1072 */
1073 kt_freq_list[p_idx] = unique_sig;
1074 unique_sig = 1;
1075 kt_freq_list_total[p_idx] = total_sig;
1076 total_sig = 1;
1077 p_idx = c_idx;
1078
1079 /*
1080 * Updates to signature skip list
1081 */
1082 *kt_skip_list = i;
1083 kt_skip_list++;
1084 p_sig = kt->kt_signature;
1085 continue;
1086 }
1087
1088 /*
1089 * When current kalloc_type_views is in the kalloc size bucket as
1090 * previous, analyze the siganture to see if it is unique.
1091 *
1092 * Signatures are collapsible if one is a substring of the next.
1093 */
1094 const char *c_sig = kt->kt_signature;
1095 if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1096 /*
1097 * Unique signature detected. Update counts and advance index
1098 */
1099 unique_sig++;
1100 *kt_skip_list = i;
1101 kt_skip_list++;
1102 }
1103
1104 /*
1105 * Check if current kalloc_type_view corresponds to a new type
1106 */
1107 const char *c_name = kt->kt_zv.zv_name;
1108 if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1109 total_sig++;
1110 }
1111 p_name = c_name;
1112 p_sig = c_sig;
1113 }
1114 /*
1115 * Final update
1116 */
1117 assert(c_idx == p_idx);
1118 /*
1119 * Update iterators only if size fits in zone. When size is larger
1120 * than kalloc_max, idx is set to KALLOC_TYPE_IDX_MASK. These
1121 * allocations will be serviced by kalloc_large when
1122 * kalloc_type_impl_external is called.
1123 */
1124 if (c_idx != KALLOC_TYPE_IDX_MASK) {
1125 assert(kt_freq_list[c_idx] == 0);
1126 kt_freq_list[c_idx] = unique_sig;
1127 kt_freq_list_total[c_idx] = (uint16_t) total_sig;
1128 *kt_skip_list = (uint16_t) count;
1129 }
1130 return ++kt_skip_list;
1131 }
1132
1133 static uint16_t
kalloc_type_apply_policy(uint16_t * kt_freq_list,uint16_t * kt_zones,uint16_t zone_budget)1134 kalloc_type_apply_policy(uint16_t *kt_freq_list, uint16_t *kt_zones,
1135 uint16_t zone_budget)
1136 {
1137 uint16_t total_sig = 0;
1138 uint16_t min_sig = 0;
1139 uint16_t assigned_zones = 0;
1140 uint16_t remaining_zones = zone_budget;
1141 uint16_t min_zones_per_size = 2;
1142
1143 #if DEBUG || DEVELOPMENT
1144 if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1145 uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1146 assert(zone_budget + current_zones <= MAX_ZONES);
1147 }
1148 #endif
1149
1150 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1151 uint16_t sig_freq = kt_freq_list[i];
1152 uint16_t min_zones = min_zones_per_size;
1153 if (sig_freq < min_zones_per_size) {
1154 min_zones = sig_freq;
1155 }
1156 total_sig += sig_freq;
1157 kt_zones[i] = min_zones;
1158 min_sig += min_zones;
1159 }
1160 if (remaining_zones > total_sig) {
1161 remaining_zones = total_sig;
1162 }
1163 assert(remaining_zones >= min_sig);
1164 remaining_zones -= min_sig;
1165 total_sig -= min_sig;
1166 assigned_zones += min_sig;
1167 uint16_t modulo = 0;
1168 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1169 uint16_t freq = kt_freq_list[i];
1170 if (freq < min_zones_per_size) {
1171 continue;
1172 }
1173 uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1174 uint16_t n_zones = (uint16_t) numer / total_sig;
1175
1176 /*
1177 * Accumulate remainder and increment n_zones when it goes above
1178 * denominator
1179 */
1180 modulo += numer % total_sig;
1181 if (modulo >= total_sig) {
1182 n_zones++;
1183 modulo -= total_sig;
1184 }
1185
1186 /*
1187 * Cap the total number of zones to the unique signatures
1188 */
1189 if ((n_zones + min_zones_per_size) > freq) {
1190 uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1191 modulo += (extra_zones * total_sig);
1192 n_zones -= extra_zones;
1193 }
1194 kt_zones[i] += n_zones;
1195 assigned_zones += n_zones;
1196 }
1197
1198 if (kt_options & KT_OPTIONS_DEBUG) {
1199 printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1200 assigned_zones, remaining_zones + min_sig - assigned_zones);
1201 }
1202 return remaining_zones + min_sig - assigned_zones;
1203 }
1204
1205 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1206 kalloc_type_create_zone_for_size(zone_t *kt_zones_for_size,
1207 uint16_t kt_zones, vm_size_t z_size)
1208 {
1209 zone_t p_zone = NULL;
1210
1211 for (uint16_t i = 0; i < kt_zones; i++) {
1212 char *z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1213 snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1214 (size_t) z_size);
1215 zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1216 #if DEBUG || DEVELOPMENT
1217 if (i != 0) {
1218 p_zone->z_kt_next = z;
1219 }
1220 #endif
1221 p_zone = z;
1222 kt_zones_for_size[i] = z;
1223 }
1224 }
1225
1226 #define KT_ENTROPY_SHIFT 16
1227 #define KT_ENTROPY_MASK 0xFFFF
1228
1229 /*
1230 * Returns a 16bit random number between 0 and
1231 * upper_limit (inclusive)
1232 */
1233 static uint16_t
kalloc_type_get_random(uint16_t upper_limit)1234 kalloc_type_get_random(uint16_t upper_limit)
1235 {
1236 assert(upper_limit < KT_ENTROPY_MASK);
1237 static uint64_t random_entropy;
1238 if (random_entropy == 0) {
1239 random_entropy = early_random();
1240 }
1241 uint16_t result = random_entropy & KT_ENTROPY_MASK;
1242 random_entropy >>= KT_ENTROPY_SHIFT;
1243 return result % (upper_limit + 1);
1244 }
1245
1246 /*
1247 * Generate a randomly shuffled array of indices from 0 to count - 1
1248 */
1249 static void
kalloc_type_shuffle(uint16_t * shuffle_buf,uint16_t count)1250 kalloc_type_shuffle(uint16_t *shuffle_buf, uint16_t count)
1251 {
1252 for (uint16_t i = 0; i < count; i++) {
1253 uint16_t j = kalloc_type_get_random(i);
1254 if (j != i) {
1255 shuffle_buf[i] = shuffle_buf[j];
1256 }
1257 shuffle_buf[j] = i;
1258 }
1259 }
1260
1261 static void
kalloc_type_create_zones(uint16_t * kt_skip_list_start,uint16_t * kt_freq_list,uint16_t * kt_freq_list_total,uint16_t * kt_shuffle_buf)1262 kalloc_type_create_zones(uint16_t *kt_skip_list_start,
1263 uint16_t *kt_freq_list, uint16_t *kt_freq_list_total,
1264 uint16_t *kt_shuffle_buf)
1265 {
1266 uint16_t *kt_skip_list = kt_skip_list_start;
1267 uint16_t p_j = 0;
1268
1269 uint16_t kt_zones[MAX_K_ZONE(k_zone_cfg)] = {};
1270
1271 #if DEBUG || DEVELOPMENT
1272 uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1273 (vm_address_t) kt_buffer) / sizeof(uint16_t);
1274 #endif
1275 /*
1276 * Apply policy to determine how many zones to create for each size
1277 * class.
1278 */
1279 if (kt_options & KT_OPTIONS_ON) {
1280 kalloc_type_apply_policy(kt_freq_list, kt_zones, kt_zone_budget);
1281 /*
1282 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1283 */
1284 if (kt_options & KT_OPTIONS_DEBUG) {
1285 printf("Size\ttotal_sig\tunique_signatures\tzones\n");
1286 for (uint32_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1287 printf("%u\t%u\t%u\t%u\n", k_zone_cfg[i].kzc_size,
1288 kt_freq_list_total[i], kt_freq_list[i], kt_zones[i]);
1289 }
1290 }
1291 }
1292
1293 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1294 uint16_t n_unique_sig = kt_freq_list[i];
1295 vm_size_t z_size = k_zone_cfg[i].kzc_size;
1296 uint16_t n_zones = kt_zones[i];
1297
1298 if (n_unique_sig == 0) {
1299 continue;
1300 }
1301
1302 assert(n_zones <= 20);
1303 zone_t kt_zones_for_size[20] = {};
1304 if (kt_options & KT_OPTIONS_ON) {
1305 kalloc_type_create_zone_for_size(kt_zones_for_size,
1306 n_zones, z_size);
1307 } else {
1308 /*
1309 * Default to using KHEAP_DEFAULT if this feature is off
1310 */
1311 n_zones = 1;
1312 kt_zones_for_size[0] = kalloc_heap_zone_for_size(
1313 KHEAP_DEFAULT, z_size);
1314 }
1315
1316 #if DEBUG || DEVELOPMENT
1317 kalloc_type_zarray[i] = kt_zones_for_size[0];
1318 /*
1319 * Ensure that there is enough space to shuffle n_unique_sig
1320 * indices
1321 */
1322 assert(n_unique_sig < kt_shuffle_count);
1323 #endif
1324
1325 /*
1326 * Get a shuffled set of signature indices
1327 */
1328 *kt_shuffle_buf = 0;
1329 if (n_unique_sig > 1) {
1330 kalloc_type_shuffle(kt_shuffle_buf, n_unique_sig);
1331 }
1332
1333 for (uint16_t j = 0; j < n_unique_sig; j++) {
1334 /*
1335 * For every size that has unique types
1336 */
1337 uint16_t shuffle_idx = kt_shuffle_buf[j];
1338 uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1339 uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1340 zone_t zone = kt_zones_for_size[j % n_zones];
1341 kalloc_type_assign_zone(&kt_buffer[cur], &kt_buffer[end], zone,
1342 VM_KERN_MEMORY_KALLOC_TYPE);
1343 }
1344 p_j += n_unique_sig;
1345 }
1346 }
1347
1348 __startup_func
1349 static void
kalloc_type_view_init(void)1350 kalloc_type_view_init(void)
1351 {
1352 /*
1353 * Turn off this feature on armv7 and kasan
1354 */
1355 #if !defined(__LP64__) || KASAN_ZALLOC
1356 kt_options &= ~KT_OPTIONS_ON;
1357 #endif
1358
1359 /*
1360 * Allocate scratch space to parse kalloc_type_views and create
1361 * other structures necessary to process them.
1362 */
1363 kt_count = kt_scratch_size / sizeof(kalloc_type_view_t);
1364 if (kmem_alloc_flags(kernel_map, (vm_offset_t *) &kt_buffer,
1365 kt_scratch_size,
1366 VM_KERN_MEMORY_KALLOC, KMA_ZERO) != KERN_SUCCESS) {
1367 panic("kalloc_type_view_init: Couldn't create scratch space");
1368 }
1369 /*
1370 * Parse __kalloc_type sections and build array of pointers to
1371 * all kalloc type views in kt_buffer.
1372 */
1373 kt_count = kalloc_type_view_parse();
1374 assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1375
1376 #if DEBUG || DEVELOPMENT
1377 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1378 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1379 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1380 #endif
1381
1382 /*
1383 * Sort based on size class and signature
1384 */
1385 qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1386 kalloc_type_cmp);
1387
1388 /*
1389 * Build a skip list that holds starts of unique signatures and a
1390 * frequency list of number of unique and total signatures per kalloc
1391 * size class
1392 */
1393 uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1394 uint16_t kt_freq_list[MAX_K_ZONE(k_zone_cfg)] = { 0 };
1395 uint16_t kt_freq_list_total[MAX_K_ZONE(k_zone_cfg)] = { 0 };
1396 uint16_t *kt_shuffle_buf = kalloc_type_create_iterators(
1397 kt_skip_list_start, kt_freq_list, kt_freq_list_total, kt_count);
1398
1399 /*
1400 * Create zones based on signatures
1401 */
1402 kalloc_type_create_zones(kt_skip_list_start, kt_freq_list,
1403 kt_freq_list_total, kt_shuffle_buf);
1404
1405 /*
1406 * Free resources used
1407 */
1408 kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
1409 }
1410 STARTUP(ZALLOC, STARTUP_RANK_LAST, kalloc_type_view_init);
1411
1412 #pragma mark accessors
1413
1414 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)1415 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
1416 {
1417 thread_t thr = current_thread();
1418 ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
1419 }
1420
1421 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)1422 KALLOC_ZINFO_SFREE(vm_size_t bytes)
1423 {
1424 thread_t thr = current_thread();
1425 ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
1426 }
1427
1428 static inline vm_map_t
kalloc_guess_map_for_addr(kalloc_heap_t kheap,vm_address_t addr)1429 kalloc_guess_map_for_addr(kalloc_heap_t kheap, vm_address_t addr)
1430 {
1431 /* kheap is NULL when KHEAP_ANY */
1432 if (kheap == KHEAP_ANY) {
1433 kheap = KHEAP_DEFAULT;
1434 }
1435
1436 if (zone_range_contains(&kalloc_large_range[kheap->kh_heap_id], addr)) {
1437 return kheap->kh_large_map;
1438 } else {
1439 return kheap->kh_fallback_map;
1440 }
1441 }
1442
1443 static inline vm_map_t
kalloc_map_for_size(kalloc_heap_t kheap,vm_size_t size)1444 kalloc_map_for_size(kalloc_heap_t kheap, vm_size_t size)
1445 {
1446 if (size < KALLOC_KERNMAP_ALLOC_THRESHOLD) {
1447 return kheap->kh_large_map;
1448 }
1449 return kheap->kh_fallback_map;
1450 }
1451
1452 zone_t
kalloc_heap_zone_for_size(kalloc_heap_t kheap,vm_size_t size)1453 kalloc_heap_zone_for_size(kalloc_heap_t kheap, vm_size_t size)
1454 {
1455 struct kheap_zones *khz = kheap->kh_zones;
1456
1457 if (size < MAX_SIZE_ZDLUT) {
1458 uint32_t zindex = khz->dlut[INDEX_ZDLUT(size)];
1459 return khz->k_zone[zindex];
1460 }
1461
1462 if (size < khz->kalloc_max) {
1463 uint32_t zindex = khz->k_zindex_start;
1464 while (khz->cfg[zindex].kzc_size < size) {
1465 zindex++;
1466 }
1467 assert(zindex < khz->max_k_zone);
1468 return khz->k_zone[zindex];
1469 }
1470
1471 return ZONE_NULL;
1472 }
1473
1474 static vm_size_t
vm_map_lookup_kalloc_entry_locked(vm_map_t map,void * addr)1475 vm_map_lookup_kalloc_entry_locked(vm_map_t map, void *addr)
1476 {
1477 vm_map_entry_t vm_entry = NULL;
1478
1479 if (!vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry)) {
1480 panic("address %p not allocated via kalloc, map %p",
1481 addr, map);
1482 }
1483 if (vm_entry->vme_start != (vm_map_offset_t)addr) {
1484 panic("address %p inside vm entry %p [%p:%p), map %p",
1485 addr, vm_entry, (void *)vm_entry->vme_start,
1486 (void *)vm_entry->vme_end, map);
1487 }
1488 if (!vm_entry->vme_atomic) {
1489 panic("address %p not managed by kalloc (entry %p, map %p)",
1490 addr, vm_entry, map);
1491 }
1492 return vm_entry->vme_end - vm_entry->vme_start;
1493 }
1494
1495 #if KASAN_KALLOC
1496 /*
1497 * KASAN kalloc stashes the original user-requested size away in the poisoned
1498 * area. Return that directly.
1499 */
1500 vm_size_t
kheap_alloc_size(__unused kalloc_heap_t kheap,void * addr)1501 kheap_alloc_size(__unused kalloc_heap_t kheap, void *addr)
1502 {
1503 (void)vm_map_lookup_kalloc_entry_locked; /* silence warning */
1504 return kasan_user_size((vm_offset_t)addr);
1505 }
1506 #else
1507 vm_size_t
kheap_alloc_size(kalloc_heap_t kheap,void * addr)1508 kheap_alloc_size(kalloc_heap_t kheap, void *addr)
1509 {
1510 vm_map_t map;
1511 vm_size_t size;
1512
1513 size = zone_element_size(addr, NULL);
1514 if (size) {
1515 return size;
1516 }
1517
1518 map = kalloc_guess_map_for_addr(kheap, (vm_offset_t)addr);
1519 vm_map_lock_read(map);
1520 size = vm_map_lookup_kalloc_entry_locked(map, addr);
1521 vm_map_unlock_read(map);
1522 return size;
1523 }
1524 #endif
1525
1526 static vm_size_t
kalloc_bucket_size(kalloc_heap_t kheap,vm_size_t size)1527 kalloc_bucket_size(kalloc_heap_t kheap, vm_size_t size)
1528 {
1529 zone_t z = kalloc_heap_zone_for_size(kheap, size);
1530 vm_map_t map = kalloc_map_for_size(kheap, size);
1531
1532 if (z) {
1533 return zone_elem_size(z);
1534 }
1535 return vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
1536 }
1537
1538 bool
kalloc_owned_map(vm_map_t map)1539 kalloc_owned_map(vm_map_t map)
1540 {
1541 return map && (map == kalloc_large_map ||
1542 map == kalloc_large_data_map ||
1543 map == kernel_data_map);
1544 }
1545
1546 vm_map_t
kalloc_large_map_get(void)1547 kalloc_large_map_get(void)
1548 {
1549 return kalloc_large_map;
1550 }
1551
1552 vm_map_t
kalloc_large_data_map_get(void)1553 kalloc_large_data_map_get(void)
1554 {
1555 return kalloc_large_data_map;
1556 }
1557
1558 vm_map_t
kernel_data_map_get(void)1559 kernel_data_map_get(void)
1560 {
1561 return kernel_data_map;
1562 }
1563
1564 #pragma mark kalloc
1565
1566 __attribute__((noinline))
1567 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,vm_size_t size,zalloc_flags_t flags,vm_allocation_site_t * site)1568 kalloc_large(
1569 kalloc_heap_t kheap,
1570 vm_size_t req_size,
1571 vm_size_t size,
1572 zalloc_flags_t flags,
1573 vm_allocation_site_t *site)
1574 {
1575 int kma_flags = KMA_ATOMIC;
1576 vm_tag_t tag;
1577 vm_map_t alloc_map;
1578 vm_offset_t addr;
1579
1580 if (flags & Z_NOFAIL) {
1581 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
1582 (size_t)size);
1583 }
1584 /*
1585 * kmem_alloc could block so we return if noblock
1586 *
1587 * also, reject sizes larger than our address space is quickly,
1588 * as kt_size or IOMallocArraySize() expect this.
1589 */
1590 if ((flags & Z_NOWAIT) || (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
1591 return (struct kalloc_result){ };
1592 }
1593
1594 #ifndef __x86_64__
1595 /*
1596 * (73465472) on Intel we didn't use to pass this flag,
1597 * which in turned allowed kalloc_large() memory to be shared
1598 * with user directly.
1599 *
1600 * We're bound by this unfortunate ABI.
1601 */
1602 kma_flags |= KMA_KOBJECT;
1603 #endif
1604 if (flags & Z_NOPAGEWAIT) {
1605 kma_flags |= KMA_NOPAGEWAIT;
1606 }
1607 if (flags & Z_ZERO) {
1608 kma_flags |= KMA_ZERO;
1609 }
1610
1611 #if KASAN_KALLOC
1612 /* large allocation - use guard pages instead of small redzones */
1613 size = round_page(req_size + 2 * PAGE_SIZE);
1614 assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded);
1615 #else
1616 size = round_page(size);
1617 #endif
1618
1619 alloc_map = kalloc_map_for_size(kheap, size);
1620
1621 tag = zalloc_flags_get_tag(flags);
1622 if (tag == VM_KERN_MEMORY_NONE) {
1623 if (site) {
1624 tag = vm_tag_alloc(site);
1625 } else if (kheap->kh_heap_id == KHEAP_ID_DATA_BUFFERS) {
1626 tag = VM_KERN_MEMORY_KALLOC_DATA;
1627 } else {
1628 tag = VM_KERN_MEMORY_KALLOC;
1629 }
1630 }
1631
1632 if (kmem_alloc_flags(alloc_map, &addr, size, tag, kma_flags) != KERN_SUCCESS) {
1633 if (alloc_map != kheap->kh_fallback_map) {
1634 if (kalloc_fallback_count++ == 0) {
1635 printf("%s: falling back to kernel_map\n", __func__);
1636 }
1637 if (kmem_alloc_flags(kheap->kh_fallback_map, &addr, size,
1638 tag, kma_flags) != KERN_SUCCESS) {
1639 addr = 0;
1640 }
1641 } else {
1642 addr = 0;
1643 }
1644 }
1645
1646 if (addr != 0) {
1647 kalloc_spin_lock();
1648 /*
1649 * Thread-safe version of the workaround for 4740071
1650 * (a double FREE())
1651 */
1652 if (size > kalloc_largest_allocated) {
1653 kalloc_largest_allocated = size;
1654 }
1655
1656 kalloc_large_inuse++;
1657 assert(kalloc_large_total + size >= kalloc_large_total); /* no wrap around */
1658 kalloc_large_total += size;
1659 kalloc_large_sum += size;
1660
1661 if (kalloc_large_total > kalloc_large_max) {
1662 kalloc_large_max = kalloc_large_total;
1663 }
1664
1665 kalloc_unlock();
1666
1667 KALLOC_ZINFO_SALLOC(size);
1668 }
1669 #if KASAN_KALLOC
1670 /* fixup the return address to skip the redzone */
1671 addr = kasan_alloc(addr, size, req_size, PAGE_SIZE);
1672 #else
1673 req_size = size;
1674 #endif
1675
1676 DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
1677 return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
1678 }
1679
1680 struct kalloc_result
kalloc_ext(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,vm_allocation_site_t * site)1681 kalloc_ext(
1682 kalloc_heap_t kheap,
1683 vm_size_t req_size,
1684 zalloc_flags_t flags,
1685 vm_allocation_site_t *site)
1686 {
1687 vm_size_t size, esize;
1688 void *addr;
1689 zone_t z;
1690
1691 /*
1692 * Kasan for kalloc heaps will put the redzones *inside*
1693 * the allocation, and hence augment its size.
1694 *
1695 * kalloc heaps do not use zone_t::z_kasan_redzone.
1696 */
1697 #if KASAN_KALLOC
1698 size = kasan_alloc_resize(req_size);
1699 #else
1700 size = req_size;
1701 #endif
1702 z = kalloc_heap_zone_for_size(kheap, size);
1703 if (__improbable(z == ZONE_NULL)) {
1704 return kalloc_large(kheap, req_size, size, flags, site);
1705 }
1706
1707 esize = zone_elem_size(z);
1708 #ifdef KALLOC_DEBUG
1709 if (size > esize) {
1710 panic("%s: z %p (%s%s) but requested size %lu", __func__, z,
1711 kalloc_heap_names[kheap->kh_zones->heap_id], z->z_name,
1712 (unsigned long)size);
1713 }
1714 #endif
1715 assert(size <= esize);
1716
1717 #if VM_TAG_SIZECLASSES
1718 if (__improbable(z->z_uses_tags)) {
1719 vm_tag_t tag = zalloc_flags_get_tag(flags);
1720 if (tag == VM_KERN_MEMORY_NONE && site) {
1721 tag = vm_tag_alloc(site);
1722 }
1723 if (tag != VM_KERN_MEMORY_NONE) {
1724 tag = vm_tag_will_update_zone(tag, z->z_tags_sizeclass,
1725 flags & (Z_WAITOK | Z_NOWAIT | Z_NOPAGEWAIT));
1726 }
1727 flags |= Z_VM_TAG(tag);
1728 }
1729 #endif
1730 addr = zalloc_ext(z, kheap->kh_stats ?: z->z_stats, flags, esize);
1731
1732 #if KASAN_KALLOC
1733 addr = (void *)kasan_alloc((vm_offset_t)addr, esize,
1734 req_size, KASAN_GUARD_SIZE);
1735 #else
1736 req_size = esize;
1737 #endif
1738
1739 DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
1740 return (struct kalloc_result){ .addr = addr, .size = req_size };
1741 }
1742
1743 void *
1744 kalloc_external(vm_size_t size);
1745 void *
kalloc_external(vm_size_t size)1746 kalloc_external(vm_size_t size)
1747 {
1748 return kheap_alloc_tag_bt(KHEAP_KEXT, size, Z_WAITOK, VM_KERN_MEMORY_KALLOC);
1749 }
1750
1751 void *
1752 kalloc_data_external(
1753 vm_size_t size,
1754 zalloc_flags_t flags);
1755 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)1756 kalloc_data_external(
1757 vm_size_t size,
1758 zalloc_flags_t flags)
1759 {
1760 return kheap_alloc_tag_bt(KHEAP_DATA_BUFFERS, size, flags,
1761 VM_KERN_MEMORY_KALLOC_DATA);
1762 }
1763
1764 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
1765
1766 __abortlike
1767 static void
kalloc_data_require_panic(void * addr,vm_size_t size)1768 kalloc_data_require_panic(void *addr, vm_size_t size)
1769 {
1770 zone_id_t zid = zone_id_for_native_element(addr, size);
1771
1772 if (zid != ZONE_ID_INVALID) {
1773 zone_t z = &zone_array[zid];
1774 zone_security_flags_t zsflags = zone_security_array[zid];
1775
1776 if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
1777 panic("kalloc_data_require failed: address %p in [%s%s]",
1778 addr, zone_heap_name(z), zone_name(z));
1779 }
1780
1781 panic("kalloc_data_require failed: address %p in [%s%s], "
1782 "size too large %zd > %zd", addr,
1783 zone_heap_name(z), zone_name(z),
1784 (size_t)size, (size_t)zone_elem_size(z));
1785 } else {
1786 panic("kalloc_data_require failed: address %p not in zone native map",
1787 addr);
1788 }
1789 }
1790
1791 __abortlike
1792 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)1793 kalloc_non_data_require_panic(void *addr, vm_size_t size)
1794 {
1795 zone_id_t zid = zone_id_for_native_element(addr, size);
1796
1797 if (zid != ZONE_ID_INVALID) {
1798 zone_t z = &zone_array[zid];
1799 zone_security_flags_t zsflags = zone_security_array[zid];
1800
1801 switch (zsflags.z_kheap_id) {
1802 case KHEAP_ID_NONE:
1803 case KHEAP_ID_DATA_BUFFERS:
1804 panic("kalloc_non_data_require failed: address %p in [%s%s]",
1805 addr, zone_heap_name(z), zone_name(z));
1806 default:
1807 break;
1808 }
1809
1810 panic("kalloc_non_data_require failed: address %p in [%s%s], "
1811 "size too large %zd > %zd", addr,
1812 zone_heap_name(z), zone_name(z),
1813 (size_t)size, (size_t)zone_elem_size(z));
1814 } else {
1815 panic("kalloc_non_data_require failed: address %p not in zone native map",
1816 addr);
1817 }
1818 }
1819
1820 #endif /* ZSECURITY_CONFIG(SUBMAP_USER_DATA) */
1821
1822 void
kalloc_data_require(void * addr,vm_size_t size)1823 kalloc_data_require(void *addr, vm_size_t size)
1824 {
1825 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
1826 zone_id_t zid = zone_id_for_native_element(addr, size);
1827
1828 if (zid != ZONE_ID_INVALID) {
1829 zone_t z = &zone_array[zid];
1830 zone_security_flags_t zsflags = zone_security_array[zid];
1831 if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS &&
1832 size <= zone_elem_size(z)) {
1833 return;
1834 }
1835 } else if (!ZSECURITY_ENABLED(KERNEL_DATA_MAP)) {
1836 return;
1837 } else if (zone_range_contains(&kalloc_data_or_kernel_data_range,
1838 (vm_address_t)addr, size)) {
1839 return;
1840 }
1841
1842 kalloc_data_require_panic(addr, size);
1843 #else
1844 #pragma unused(addr, size)
1845 #endif
1846 }
1847
1848 void
kalloc_non_data_require(void * addr,vm_size_t size)1849 kalloc_non_data_require(void *addr, vm_size_t size)
1850 {
1851 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
1852 zone_id_t zid = zone_id_for_native_element(addr, size);
1853
1854 if (zid != ZONE_ID_INVALID) {
1855 zone_t z = &zone_array[zid];
1856 zone_security_flags_t zsflags = zone_security_array[zid];
1857 switch (zsflags.z_kheap_id) {
1858 case KHEAP_ID_NONE:
1859 if (!zsflags.z_kalloc_type) {
1860 break;
1861 }
1862 OS_FALLTHROUGH;
1863 case KHEAP_ID_DEFAULT:
1864 case KHEAP_ID_KEXT:
1865 if (size < zone_elem_size(z)) {
1866 return;
1867 }
1868 break;
1869 default:
1870 break;
1871 }
1872 } else if (!ZSECURITY_ENABLED(KERNEL_DATA_MAP)) {
1873 return;
1874 } else if (!zone_range_contains(&kalloc_data_or_kernel_data_range,
1875 (vm_address_t)addr, size)) {
1876 return;
1877 }
1878
1879 kalloc_non_data_require_panic(addr, size);
1880 #else
1881 #pragma unused(addr, size)
1882 #endif
1883 }
1884
1885 void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)1886 kalloc_type_impl_external(
1887 kalloc_type_view_t kt_view,
1888 zalloc_flags_t flags)
1889 {
1890 /*
1891 * Callsites from a kext that aren't in the BootKC on macOS or
1892 * any callsites on armv7 are not processed during startup,
1893 * default to using kheap_alloc
1894 *
1895 * Additionally when size is greater kalloc_max zone is left
1896 * NULL as we need to use the vm for the allocation
1897 *
1898 */
1899 if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
1900 return kheap_alloc_tag_bt(KHEAP_KEXT,
1901 kalloc_type_get_size(kt_view->kt_size), flags,
1902 VM_KERN_MEMORY_KALLOC);
1903 }
1904 #if VM_TAG_SIZECLASSES
1905 zone_t kt_zone = kt_view->kt_zv.zv_zone;
1906 if (__improbable(kt_zone->z_uses_tags)) {
1907 vm_tag_t type_tag = zalloc_flags_get_tag(
1908 (zalloc_flags_t) kt_view->kt_flags);
1909 vm_tag_t tag = 0;
1910 /*
1911 * kalloc_type_tag isn't exposed to kexts, therefore the only
1912 * possible values for type_tag is VM_KERN_MEMORY_KALLOC_TYPE
1913 * or VM_KERN_MEMORY_KALLOC_DATA
1914 */
1915 if (type_tag == VM_KERN_MEMORY_KALLOC_TYPE) {
1916 VM_ALLOC_SITE_STATIC(VM_TAG_BT, VM_KERN_MEMORY_KALLOC_TYPE);
1917 tag = vm_tag_alloc(&site);
1918 } else {
1919 VM_ALLOC_SITE_STATIC(VM_TAG_BT, VM_KERN_MEMORY_KALLOC_DATA);
1920 tag = vm_tag_alloc(&site);
1921 }
1922 assert(tag != VM_KERN_MEMORY_NONE);
1923 tag = vm_tag_will_update_zone(tag,
1924 kt_zone->z_tags_sizeclass,
1925 flags & (Z_WAITOK | Z_NOWAIT | Z_NOPAGEWAIT));
1926 flags |= Z_VM_TAG(tag);
1927 }
1928 #endif
1929 return zalloc_flags(kt_view, flags);
1930 }
1931
1932 #pragma mark kfree
1933
1934 __attribute__((noinline))
1935 static void
kfree_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t size)1936 kfree_large(kalloc_heap_t kheap, vm_offset_t addr, vm_size_t size)
1937 {
1938 vm_map_t map = kalloc_guess_map_for_addr(kheap, addr);
1939 kern_return_t ret;
1940 vm_offset_t end;
1941
1942 if (addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
1943 os_add_overflow(addr, size, &end) ||
1944 end > VM_MAX_KERNEL_ADDRESS) {
1945 panic("kfree: address range (%p, %ld) doesn't belong to the kernel",
1946 (void *)addr, (uintptr_t)size);
1947 }
1948
1949 if (size == 0) {
1950 vm_map_lock(map);
1951 size = vm_map_lookup_kalloc_entry_locked(map, (void *)addr);
1952 ret = vm_map_remove_locked(map,
1953 vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(map)),
1954 vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(map)),
1955 VM_MAP_REMOVE_KUNWIRE);
1956 if (ret != KERN_SUCCESS) {
1957 panic("kfree: vm_map_remove_locked() failed for "
1958 "addr: %p, map: %p ret: %d", (void *)addr, map, ret);
1959 }
1960 vm_map_unlock(map);
1961 } else {
1962 size = round_page(size);
1963
1964 if (size > kalloc_largest_allocated) {
1965 panic("kfree: size %lu > kalloc_largest_allocated %lu",
1966 (uintptr_t)size, (uintptr_t)kalloc_largest_allocated);
1967 }
1968 kmem_free(map, addr, size);
1969 }
1970
1971 kalloc_spin_lock();
1972
1973 assert(kalloc_large_total >= size);
1974 kalloc_large_total -= size;
1975 kalloc_large_inuse--;
1976
1977 kalloc_unlock();
1978
1979 #if !KASAN_KALLOC
1980 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
1981 #endif
1982
1983 KALLOC_ZINFO_SFREE(size);
1984 return;
1985 }
1986
1987 __abortlike
1988 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)1989 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
1990 {
1991 zone_security_flags_t zsflags = zone_security_config(z);
1992 const char *kheap_name = "";
1993
1994 if (kheap == KHEAP_ANY) {
1995 kheap_name = "KHEAP_ANY (default/kext)";
1996 } else {
1997 kheap_name = kalloc_heap_names[kheap->kh_heap_id];
1998 }
1999
2000 if (zsflags.z_kalloc_type) {
2001 panic_include_kalloc_types = true;
2002 kalloc_type_src_zone = z;
2003 panic("kfree: addr %p found in kalloc type zone '%s'"
2004 "but being freed to %s heap", data, z->z_name, kheap_name);
2005 }
2006
2007 if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2008 panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2009 data, size, zone_heap_name(z), z->z_name);
2010 } else {
2011 panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2012 data, size, zone_heap_name(z), kheap_name);
2013 }
2014 }
2015
2016 __abortlike
2017 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t size,size_t zsize)2018 kfree_size_confusion_panic(zone_t z, void *data, size_t size, size_t zsize)
2019 {
2020 if (z) {
2021 panic("kfree: addr %p, size %zd found in zone '%s%s' "
2022 "with elem_size %zd",
2023 data, size, zone_heap_name(z), z->z_name, zsize);
2024 } else {
2025 panic("kfree: addr %p, size %zd not found in any zone",
2026 data, size);
2027 }
2028 }
2029
2030 __abortlike
2031 static void
kfree_size_invalid_panic(void * data,size_t size)2032 kfree_size_invalid_panic(void *data, size_t size)
2033 {
2034 panic("kfree: addr %p trying to free with nonsensical size %zd",
2035 data, size);
2036 }
2037
2038 __abortlike
2039 static void
krealloc_size_invalid_panic(void * data,size_t size)2040 krealloc_size_invalid_panic(void *data, size_t size)
2041 {
2042 panic("krealloc: addr %p trying to free with nonsensical size %zd",
2043 data, size);
2044 }
2045
2046 __abortlike
2047 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2048 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2049 size_t max_size)
2050 {
2051 panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2052 data, size, min_size, max_size);
2053 }
2054
2055 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2056 kfree_size_require(
2057 kalloc_heap_t kheap,
2058 void *addr,
2059 vm_size_t min_size,
2060 vm_size_t max_size)
2061 {
2062 assert3u(min_size, <=, max_size);
2063 #if KASAN_KALLOC
2064 max_size = kasan_alloc_resize(max_size);
2065 #endif
2066 zone_t max_zone = kalloc_heap_zone_for_size(kheap, max_size);
2067 vm_size_t max_zone_size = max_zone->z_elem_size;
2068 vm_size_t elem_size = zone_element_size(addr, NULL);
2069 if (elem_size > max_zone_size || elem_size < min_size) {
2070 kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2071 }
2072 }
2073
2074 /* used to implement kheap_free_addr() */
2075 #define KFREE_UNKNOWN_SIZE ((vm_size_t)~0)
2076 #define KFREE_ABSURD_SIZE \
2077 ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2078
2079 static void
kfree_ext(kalloc_heap_t kheap,void * data,vm_size_t size)2080 kfree_ext(kalloc_heap_t kheap, void *data, vm_size_t size)
2081 {
2082 zone_stats_t zs = NULL;
2083 zone_t z;
2084 vm_size_t zsize;
2085 zone_security_flags_t zsflags;
2086
2087 if (__improbable(data == NULL)) {
2088 return;
2089 }
2090
2091 #if KASAN_KALLOC
2092 /*
2093 * Resize back to the real allocation size and hand off to the KASan
2094 * quarantine. `data` may then point to a different allocation.
2095 */
2096 vm_size_t user_size = size;
2097 if (size == KFREE_UNKNOWN_SIZE) {
2098 user_size = size = kheap_alloc_size(kheap, data);
2099 }
2100 kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC);
2101 data = (void *)kasan_dealloc((vm_address_t)data, &size);
2102 kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true);
2103 if (!data) {
2104 return;
2105 }
2106 #endif
2107
2108 if (size >= kalloc_max_prerounded && size != KFREE_UNKNOWN_SIZE) {
2109 return kfree_large(kheap, (vm_offset_t)data, size);
2110 }
2111
2112 zsize = zone_element_size(data, &z);
2113 if (size == KFREE_UNKNOWN_SIZE) {
2114 if (zsize == 0) {
2115 return kfree_large(kheap, (vm_offset_t)data, 0);
2116 }
2117 size = zsize;
2118 } else if (size > zsize) {
2119 kfree_size_confusion_panic(z, data, size, zsize);
2120 }
2121 zsflags = zone_security_config(z);
2122 if (kheap != KHEAP_ANY) {
2123 if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2124 kfree_heap_confusion_panic(kheap, data, size, z);
2125 }
2126 zs = kheap->kh_stats;
2127 } else if (zsflags.z_kheap_id != KHEAP_ID_DEFAULT &&
2128 zsflags.z_kheap_id != KHEAP_ID_KEXT) {
2129 kfree_heap_confusion_panic(kheap, data, size, z);
2130 }
2131
2132 #if !KASAN_KALLOC
2133 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2134 #endif
2135 bzero(data, zsize);
2136 zfree_ext(z, zs ?: z->z_stats, data, zsize);
2137 }
2138
2139 void
2140 (kfree)(void *addr, vm_size_t size)
2141 {
2142 if (size > KFREE_ABSURD_SIZE) {
2143 kfree_size_invalid_panic(addr, size);
2144 }
2145 kfree_ext(KHEAP_ANY, addr, size);
2146 }
2147
2148 void
2149 (kheap_free)(kalloc_heap_t kheap, void *addr, vm_size_t size)
2150 {
2151 if (size > KFREE_ABSURD_SIZE) {
2152 kfree_size_invalid_panic(addr, size);
2153 }
2154 kfree_ext(kheap, addr, size);
2155 }
2156
2157 void
2158 (kheap_free_addr)(kalloc_heap_t kheap, void *addr)
2159 {
2160 kfree_ext(kheap, addr, KFREE_UNKNOWN_SIZE);
2161 }
2162
2163 void
2164 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2165 vm_size_t min_sz, vm_size_t max_sz)
2166 {
2167 if (__improbable(addr == NULL)) {
2168 return;
2169 }
2170 kfree_size_require(kheap, addr, min_sz, max_sz);
2171 kfree_ext(kheap, addr, KFREE_UNKNOWN_SIZE);
2172 }
2173
2174 static struct kalloc_result
_krealloc_ext(kalloc_heap_t kheap,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,vm_allocation_site_t * site)2175 _krealloc_ext(
2176 kalloc_heap_t kheap,
2177 void *addr,
2178 vm_size_t old_size,
2179 vm_size_t new_size,
2180 zalloc_flags_t flags,
2181 vm_allocation_site_t *site)
2182 {
2183 vm_size_t old_bucket_size, new_bucket_size, min_size;
2184 vm_size_t adj_new_size, adj_old_size;
2185 struct kalloc_result kr;
2186
2187 if (new_size == 0) {
2188 kfree_ext(kheap, addr, old_size);
2189 return (struct kalloc_result){ };
2190 }
2191
2192 if (addr == NULL) {
2193 return kalloc_ext(kheap, new_size, flags, site);
2194 }
2195
2196 adj_old_size = old_size;
2197 adj_new_size = new_size;
2198 #if KASAN_KALLOC
2199 /*
2200 * Adjust sizes to account kasan for redzones
2201 */
2202 adj_new_size = kasan_alloc_resize(new_size);
2203
2204 if (old_size != KFREE_UNKNOWN_SIZE) {
2205 adj_old_size = kasan_alloc_resize(old_size);
2206 }
2207 #endif
2208
2209 /*
2210 * Find out the size of the bucket in which the new sized allocation
2211 * would land. If it matches the bucket of the original allocation,
2212 * simply return the same address.
2213 */
2214 new_bucket_size = kalloc_bucket_size(kheap, adj_new_size);
2215 if (old_size == KFREE_UNKNOWN_SIZE) {
2216 old_size = old_bucket_size = kheap_alloc_size(kheap, addr);
2217 } else {
2218 old_bucket_size = kalloc_bucket_size(kheap, adj_old_size);
2219 }
2220 min_size = MIN(old_size, new_size);
2221
2222 if (old_bucket_size == new_bucket_size) {
2223 kr.addr = addr;
2224 #if KASAN_KALLOC
2225 kr.size = new_size;
2226 /*
2227 * Adjust right redzone in the element and poison it correctly
2228 */
2229 addr = (void *)kasan_realloc((vm_offset_t)addr, new_bucket_size,
2230 new_size, KASAN_GUARD_SIZE);
2231 #else
2232 kr.size = new_bucket_size;
2233 #endif
2234 } else {
2235 kr = kalloc_ext(kheap, new_size, flags & ~Z_ZERO, site);
2236 if (kr.addr == NULL) {
2237 return kr;
2238 }
2239
2240 memcpy(kr.addr, addr, min_size);
2241 kfree_ext(kheap, addr, old_size);
2242 }
2243 if ((flags & Z_ZERO) && kr.size > min_size) {
2244 bzero((void *)((uintptr_t)kr.addr + min_size), kr.size - min_size);
2245 }
2246 return kr;
2247 }
2248
2249 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)2250 kfree_type_impl_external(
2251 kalloc_type_view_t kt_view,
2252 void *ptr)
2253 {
2254 /*
2255 * If callsite is from a kext that isn't in the BootKC, it wasn't
2256 * processed during startup so default to using kheap_alloc
2257 *
2258 * Additionally when size is greater kalloc_max zone is left
2259 * NULL as we need to use the vm for the allocation/free
2260 */
2261 if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2262 return kheap_free(KHEAP_KEXT, ptr,
2263 kalloc_type_get_size(kt_view->kt_size));
2264 }
2265 if (__improbable(ptr == NULL)) {
2266 return;
2267 }
2268 return zfree(kt_view, ptr);
2269 }
2270
2271 void
2272 kfree_data_external(
2273 void *ptr,
2274 vm_size_t size);
2275 void
kfree_data_external(void * ptr,vm_size_t size)2276 kfree_data_external(
2277 void *ptr,
2278 vm_size_t size)
2279 {
2280 return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2281 }
2282
2283 void
2284 kfree_data_addr_external(
2285 void *ptr);
2286 void
kfree_data_addr_external(void * ptr)2287 kfree_data_addr_external(
2288 void *ptr)
2289 {
2290 return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2291 }
2292
2293 struct kalloc_result
krealloc_ext(kalloc_heap_t kheap,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,vm_allocation_site_t * site)2294 krealloc_ext(
2295 kalloc_heap_t kheap,
2296 void *addr,
2297 vm_size_t old_size,
2298 vm_size_t new_size,
2299 zalloc_flags_t flags,
2300 vm_allocation_site_t *site)
2301 {
2302 if (old_size > KFREE_ABSURD_SIZE) {
2303 krealloc_size_invalid_panic(addr, old_size);
2304 }
2305 return _krealloc_ext(kheap, addr, old_size, new_size, flags, site);
2306 }
2307
2308 struct kalloc_result
kheap_realloc_addr(kalloc_heap_t kheap,void * addr,vm_size_t size,zalloc_flags_t flags,vm_allocation_site_t * site)2309 kheap_realloc_addr(
2310 kalloc_heap_t kheap,
2311 void *addr,
2312 vm_size_t size,
2313 zalloc_flags_t flags,
2314 vm_allocation_site_t *site)
2315 {
2316 return _krealloc_ext(kheap, addr, KFREE_UNKNOWN_SIZE, size, flags, site);
2317 }
2318
2319 void *
2320 krealloc_data_external(
2321 void *ptr,
2322 vm_size_t old_size,
2323 vm_size_t new_size,
2324 zalloc_flags_t flags);
2325 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)2326 krealloc_data_external(
2327 void *ptr,
2328 vm_size_t old_size,
2329 vm_size_t new_size,
2330 zalloc_flags_t flags)
2331 {
2332 VM_ALLOC_SITE_STATIC(VM_TAG_BT, VM_KERN_MEMORY_KALLOC_DATA);
2333 return krealloc_ext(KHEAP_DATA_BUFFERS, ptr, old_size, new_size,
2334 flags, &site).addr;
2335 }
2336
2337 void *
2338 krealloc_data_addr_external(
2339 void *ptr,
2340 vm_size_t new_size,
2341 zalloc_flags_t flags);
2342 void *
krealloc_data_addr_external(void * ptr,vm_size_t new_size,zalloc_flags_t flags)2343 krealloc_data_addr_external(
2344 void *ptr,
2345 vm_size_t new_size,
2346 zalloc_flags_t flags)
2347 {
2348 VM_ALLOC_SITE_STATIC(VM_TAG_BT, VM_KERN_MEMORY_KALLOC_DATA);
2349 return kheap_realloc_addr(KHEAP_DATA_BUFFERS, ptr, new_size,
2350 flags, &site).addr;
2351 }
2352
2353 __startup_func
2354 void
kheap_startup_init(kalloc_heap_t kheap)2355 kheap_startup_init(kalloc_heap_t kheap)
2356 {
2357 struct kheap_zones *zones;
2358 vm_map_t kalloc_map;
2359 vm_map_t fb_map;
2360 vm_tag_t tag;
2361
2362 switch (kheap->kh_heap_id) {
2363 case KHEAP_ID_DEFAULT:
2364 zones = KHEAP_DEFAULT->kh_zones;
2365 kalloc_map = KHEAP_DEFAULT->kh_large_map;
2366 fb_map = KHEAP_DEFAULT->kh_fallback_map;
2367 tag = KHEAP_DEFAULT->kh_tag;
2368 break;
2369 case KHEAP_ID_DATA_BUFFERS:
2370 zones = KHEAP_DATA_BUFFERS->kh_zones;
2371 kalloc_map = KHEAP_DATA_BUFFERS->kh_large_map;
2372 fb_map = KHEAP_DATA_BUFFERS->kh_fallback_map;
2373 tag = KHEAP_DATA_BUFFERS->kh_tag;
2374 break;
2375 case KHEAP_ID_KEXT:
2376 zones = KHEAP_KEXT->kh_zones;
2377 kalloc_map = KHEAP_KEXT->kh_large_map;
2378 fb_map = KHEAP_KEXT->kh_fallback_map;
2379 tag = KHEAP_KEXT->kh_tag;
2380 break;
2381 default:
2382 panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
2383 kheap->kh_heap_id);
2384 }
2385
2386 kheap->kh_heap_id = zones->heap_id;
2387 kheap->kh_zones = zones;
2388 kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
2389 kheap->kh_next = zones->views;
2390 zones->views = kheap;
2391 kheap->kh_large_map = kalloc_map;
2392 kheap->kh_fallback_map = fb_map;
2393 kheap->kh_tag = tag;
2394 zone_view_count += 1;
2395 }
2396
2397 #pragma mark OSMalloc
2398 /*
2399 * This is a deprecated interface, here only for legacy reasons.
2400 * There is no internal variant of any of these symbols on purpose.
2401 */
2402 #define OSMallocDeprecated
2403 #include <libkern/OSMalloc.h>
2404
2405 static KALLOC_HEAP_DEFINE(OSMALLOC, "osmalloc", KHEAP_ID_KEXT);
2406 static queue_head_t OSMalloc_tag_list = QUEUE_HEAD_INITIALIZER(OSMalloc_tag_list);
2407 static LCK_GRP_DECLARE(OSMalloc_tag_lck_grp, "OSMalloc_tag");
2408 static LCK_SPIN_DECLARE(OSMalloc_tag_lock, &OSMalloc_tag_lck_grp);
2409
2410 #define OSMalloc_tag_spin_lock() lck_spin_lock(&OSMalloc_tag_lock)
2411 #define OSMalloc_tag_unlock() lck_spin_unlock(&OSMalloc_tag_lock)
2412
2413 extern typeof(OSMalloc_Tagalloc) OSMalloc_Tagalloc_external;
2414 OSMallocTag
OSMalloc_Tagalloc_external(const char * str,uint32_t flags)2415 OSMalloc_Tagalloc_external(const char *str, uint32_t flags)
2416 {
2417 OSMallocTag OSMTag;
2418
2419 OSMTag = kalloc_type(struct _OSMallocTag_, Z_WAITOK | Z_ZERO);
2420
2421 if (flags & OSMT_PAGEABLE) {
2422 OSMTag->OSMT_attr = OSMT_ATTR_PAGEABLE;
2423 }
2424
2425 OSMTag->OSMT_refcnt = 1;
2426
2427 strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
2428
2429 OSMalloc_tag_spin_lock();
2430 enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag);
2431 OSMalloc_tag_unlock();
2432 OSMTag->OSMT_state = OSMT_VALID;
2433 return OSMTag;
2434 }
2435
2436 static void
OSMalloc_Tagref(OSMallocTag tag)2437 OSMalloc_Tagref(OSMallocTag tag)
2438 {
2439 if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) {
2440 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X",
2441 tag->OSMT_name, tag->OSMT_state);
2442 }
2443
2444 os_atomic_inc(&tag->OSMT_refcnt, relaxed);
2445 }
2446
2447 static void
OSMalloc_Tagrele(OSMallocTag tag)2448 OSMalloc_Tagrele(OSMallocTag tag)
2449 {
2450 if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) {
2451 panic("OSMalloc_Tagref():'%s' has bad state 0x%08X",
2452 tag->OSMT_name, tag->OSMT_state);
2453 }
2454
2455 if (os_atomic_dec(&tag->OSMT_refcnt, relaxed) != 0) {
2456 return;
2457 }
2458
2459 if (os_atomic_cmpxchg(&tag->OSMT_state,
2460 OSMT_VALID | OSMT_RELEASED, OSMT_VALID | OSMT_RELEASED, acq_rel)) {
2461 OSMalloc_tag_spin_lock();
2462 (void)remque((queue_entry_t)tag);
2463 OSMalloc_tag_unlock();
2464 kfree_type(struct _OSMallocTag_, tag);
2465 } else {
2466 panic("OSMalloc_Tagrele():'%s' has refcnt 0", tag->OSMT_name);
2467 }
2468 }
2469
2470 extern typeof(OSMalloc_Tagfree) OSMalloc_Tagfree_external;
2471 void
OSMalloc_Tagfree_external(OSMallocTag tag)2472 OSMalloc_Tagfree_external(OSMallocTag tag)
2473 {
2474 if (!os_atomic_cmpxchg(&tag->OSMT_state,
2475 OSMT_VALID, OSMT_VALID | OSMT_RELEASED, acq_rel)) {
2476 panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X",
2477 tag->OSMT_name, tag->OSMT_state);
2478 }
2479
2480 if (os_atomic_dec(&tag->OSMT_refcnt, relaxed) == 0) {
2481 OSMalloc_tag_spin_lock();
2482 (void)remque((queue_entry_t)tag);
2483 OSMalloc_tag_unlock();
2484 kfree_type(struct _OSMallocTag_, tag);
2485 }
2486 }
2487
2488 extern typeof(OSMalloc) OSMalloc_external;
2489 void *
OSMalloc_external(uint32_t size,OSMallocTag tag)2490 OSMalloc_external(
2491 uint32_t size, OSMallocTag tag)
2492 {
2493 void *addr = NULL;
2494 kern_return_t kr;
2495
2496 OSMalloc_Tagref(tag);
2497 if ((tag->OSMT_attr & OSMT_PAGEABLE) && (size & ~PAGE_MASK)) {
2498 if ((kr = kmem_alloc_pageable_external(kernel_map,
2499 (vm_offset_t *)&addr, size)) != KERN_SUCCESS) {
2500 addr = NULL;
2501 }
2502 } else {
2503 addr = kheap_alloc_tag_bt(OSMALLOC, size,
2504 Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2505 }
2506
2507 if (!addr) {
2508 OSMalloc_Tagrele(tag);
2509 }
2510
2511 return addr;
2512 }
2513
2514 extern typeof(OSMalloc_nowait) OSMalloc_nowait_external;
2515 void *
OSMalloc_nowait_external(uint32_t size,OSMallocTag tag)2516 OSMalloc_nowait_external(uint32_t size, OSMallocTag tag)
2517 {
2518 void *addr = NULL;
2519
2520 if (tag->OSMT_attr & OSMT_PAGEABLE) {
2521 return NULL;
2522 }
2523
2524 OSMalloc_Tagref(tag);
2525 /* XXX: use non-blocking kalloc for now */
2526 addr = kheap_alloc_tag_bt(OSMALLOC, (vm_size_t)size,
2527 Z_NOWAIT, VM_KERN_MEMORY_KALLOC);
2528 if (addr == NULL) {
2529 OSMalloc_Tagrele(tag);
2530 }
2531
2532 return addr;
2533 }
2534
2535 extern typeof(OSMalloc_noblock) OSMalloc_noblock_external;
2536 void *
OSMalloc_noblock_external(uint32_t size,OSMallocTag tag)2537 OSMalloc_noblock_external(uint32_t size, OSMallocTag tag)
2538 {
2539 void *addr = NULL;
2540
2541 if (tag->OSMT_attr & OSMT_PAGEABLE) {
2542 return NULL;
2543 }
2544
2545 OSMalloc_Tagref(tag);
2546 addr = kheap_alloc_tag_bt(OSMALLOC, (vm_size_t)size,
2547 Z_NOWAIT, VM_KERN_MEMORY_KALLOC);
2548 if (addr == NULL) {
2549 OSMalloc_Tagrele(tag);
2550 }
2551
2552 return addr;
2553 }
2554
2555 extern typeof(OSFree) OSFree_external;
2556 void
OSFree_external(void * addr,uint32_t size,OSMallocTag tag)2557 OSFree_external(void *addr, uint32_t size, OSMallocTag tag)
2558 {
2559 if ((tag->OSMT_attr & OSMT_PAGEABLE)
2560 && (size & ~PAGE_MASK)) {
2561 kmem_free(kernel_map, (vm_offset_t)addr, size);
2562 } else {
2563 kheap_free(OSMALLOC, addr, size);
2564 }
2565
2566 OSMalloc_Tagrele(tag);
2567 }
2568
2569 #pragma mark kern_os_malloc
2570
2571 void *
2572 kern_os_malloc_external(size_t size);
2573 void *
kern_os_malloc_external(size_t size)2574 kern_os_malloc_external(size_t size)
2575 {
2576 if (size == 0) {
2577 return NULL;
2578 }
2579
2580 return kheap_alloc_tag_bt(KERN_OS_MALLOC, size, Z_WAITOK | Z_ZERO,
2581 VM_KERN_MEMORY_LIBKERN);
2582 }
2583
2584 void
2585 kern_os_free_external(void *addr);
2586 void
kern_os_free_external(void * addr)2587 kern_os_free_external(void *addr)
2588 {
2589 kheap_free_addr(KERN_OS_MALLOC, addr);
2590 }
2591
2592 void *
2593 kern_os_realloc_external(void *addr, size_t nsize);
2594 void *
kern_os_realloc_external(void * addr,size_t nsize)2595 kern_os_realloc_external(void *addr, size_t nsize)
2596 {
2597 VM_ALLOC_SITE_STATIC(VM_TAG_BT, VM_KERN_MEMORY_LIBKERN);
2598
2599 return kheap_realloc_addr(KERN_OS_MALLOC, addr, nsize,
2600 Z_WAITOK | Z_ZERO, &site).addr;
2601 }
2602
2603 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)2604 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
2605 {
2606 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
2607 #pragma unused(size)
2608 zfree(zone, addr);
2609 #else
2610 if (zone_owns(zone, addr)) {
2611 zfree(zone, addr);
2612 } else {
2613 /*
2614 * Third party kexts might not know about the operator new
2615 * and be allocated from the KEXT heap
2616 */
2617 printf("kern_os_zfree: kheap_free called for object from zone %s\n",
2618 zone->z_name);
2619 kheap_free(KHEAP_KEXT, addr, size);
2620 }
2621 #endif
2622 }
2623
2624 void
kern_os_kfree(void * addr,vm_size_t size)2625 kern_os_kfree(void *addr, vm_size_t size)
2626 {
2627 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
2628 kheap_free(KHEAP_DEFAULT, addr, size);
2629 #else
2630 /*
2631 * Third party kexts may not know about newly added operator
2632 * default new/delete. If they call new for any iokit object
2633 * it will end up coming from the KEXT heap. If these objects
2634 * are freed by calling release() or free(), the internal
2635 * version of operator delete is called and the kernel ends
2636 * up freeing the object to the DEFAULT heap.
2637 */
2638 kheap_free(KHEAP_ANY, addr, size);
2639 #endif
2640 }
2641
2642 bool
IOMallocType_from_vm(uint32_t kt_idx,uint32_t kt_size)2643 IOMallocType_from_vm(uint32_t kt_idx, uint32_t kt_size)
2644 {
2645 #if defined(__x86_64__) || !defined(__LP64__)
2646 /*
2647 * Calliste that aren't in the BootKC for macOS and all callsites
2648 * for armv7 are not procesed during startup, so use size to
2649 * determine if the allocation will use the VM instead of slab.
2650 */
2651 (void) kt_idx;
2652 return kt_size >= KHEAP_DEFAULT->kh_zones->kalloc_max;
2653 #else
2654 (void) kt_size;
2655 return kt_idx == KALLOC_TYPE_IDX_MASK;
2656 #endif
2657 }
2658
2659 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)2660 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
2661 {
2662 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
2663 #pragma unused(esize)
2664 #else
2665 /*
2666 * For third party kexts that have been compiled with sdk pre macOS 11,
2667 * an allocation of an OSObject that is defined in xnu or first pary
2668 * kexts, by directly calling new will lead to using the kext heap
2669 * as it will call OSObject_operator_new_external. If this object
2670 * is freed by xnu, it panics as xnu uses the typed free which
2671 * requires the object to have been allocated in a kalloc.type zone.
2672 * To workaround this issue, detect if the allocation being freed is
2673 * from the kext heap and allow freeing to it.
2674 */
2675 zone_id_t zid = zone_id_for_native_element(addr, esize);
2676 if (__probable(zid < MAX_ZONES)) {
2677 zone_security_flags_t zsflags = zone_security_array[zid];
2678 if (zsflags.z_kheap_id == KHEAP_ID_KEXT) {
2679 return kheap_free(KHEAP_KEXT, addr, esize);
2680 }
2681 }
2682 #endif
2683 kfree_type_impl_external(ktv, addr);
2684 }
2685
2686 #if DEBUG || DEVELOPMENT
2687 #include <sys/random.h>
2688 /*
2689 * Ensure that the feature is on when the boot-arg is present.
2690 *
2691 * Note: Presence of zones with name kalloc.type* is used to
2692 * determine if the feature is on.
2693 */
2694 static int
kalloc_type_feature_on(void)2695 kalloc_type_feature_on(void)
2696 {
2697 /*
2698 * Boot-arg not present
2699 */
2700 if (!(kt_options & KT_OPTIONS_ON)) {
2701 return 1;
2702 }
2703
2704 boolean_t zone_found = false;
2705 const char kalloc_type_str[] = "kalloc.type";
2706 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
2707 zone_t z = kalloc_type_zarray[i];
2708 while (z != NULL) {
2709 zone_found = true;
2710 if (strncmp(z->z_name, kalloc_type_str,
2711 strlen(kalloc_type_str)) != 0) {
2712 return 0;
2713 }
2714 z = z->z_kt_next;
2715 }
2716 }
2717
2718 if (!zone_found) {
2719 return 0;
2720 }
2721
2722 return 1;
2723 }
2724
2725 /*
2726 * Ensure that the policy uses the zone budget completely
2727 */
2728 static int
kalloc_type_test_policy(int64_t in)2729 kalloc_type_test_policy(int64_t in)
2730 {
2731 uint16_t zone_budget = (uint16_t) in;
2732 uint16_t max_bucket_freq = 25;
2733 uint16_t freq_list[MAX_K_ZONE(k_zone_cfg)] = {};
2734 uint16_t zones_per_bucket[MAX_K_ZONE(k_zone_cfg)] = {};
2735 uint16_t random[MAX_K_ZONE(k_zone_cfg)];
2736 int ret = 0;
2737
2738 read_random((void *)&random[0], sizeof(random));
2739 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
2740 freq_list[i] = random[i] % max_bucket_freq;
2741 }
2742 uint16_t wasted_zone_budget = kalloc_type_apply_policy(freq_list,
2743 zones_per_bucket, zone_budget);
2744 if (wasted_zone_budget == 0) {
2745 ret = 1;
2746 }
2747 return ret;
2748 }
2749
2750 /*
2751 * Ensure that size of adopters of kalloc_type fit in the zone
2752 * they have been assigned.
2753 */
2754 static int
kalloc_type_check_size(zone_t z)2755 kalloc_type_check_size(zone_t z)
2756 {
2757 uint16_t elem_size = z->z_elem_size;
2758 kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
2759 const char site_str[] = "site.";
2760 const size_t site_str_len = strlen(site_str);
2761 while (kt_cur != NULL) {
2762 /*
2763 * Process only kalloc_type_views and skip the zone_views when
2764 * feature is off.
2765 */
2766 if ((kt_options & KT_OPTIONS_ON) ||
2767 (strncmp(kt_cur->kt_zv.zv_name, site_str, site_str_len) == 0)) {
2768 if (kalloc_type_get_size(kt_cur->kt_size) > elem_size) {
2769 return 0;
2770 }
2771 }
2772 kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
2773 }
2774 return 1;
2775 }
2776
2777 struct test_kt_data {
2778 int a;
2779 };
2780
2781 static int
kalloc_type_test_data_redirect()2782 kalloc_type_test_data_redirect()
2783 {
2784 const char *kt_data_sig = __builtin_xnu_type_signature(
2785 struct test_kt_data);
2786 if (!kalloc_type_is_data(kt_data_sig)) {
2787 printf("%s: data redirect failed\n", __func__);
2788 return 0;
2789 }
2790 return 1;
2791 }
2792
2793 static int
run_kalloc_type_test(int64_t in,int64_t * out)2794 run_kalloc_type_test(int64_t in, int64_t *out)
2795 {
2796 *out = 0;
2797 for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
2798 zone_t z = kalloc_type_zarray[i];
2799 while (z != NULL) {
2800 if (!kalloc_type_check_size(z)) {
2801 printf("%s: size check failed\n", __func__);
2802 return 0;
2803 }
2804 z = z->z_kt_next;
2805 }
2806 }
2807
2808 if (!kalloc_type_test_policy(in)) {
2809 printf("%s: policy check failed\n", __func__);
2810 return 0;
2811 }
2812
2813 if (!kalloc_type_feature_on()) {
2814 printf("%s: boot-arg is on but feature isn't\n", __func__);
2815 return 0;
2816 }
2817
2818 if (!kalloc_type_test_data_redirect()) {
2819 printf("%s: kalloc_type redirect for all data signature failed\n",
2820 __func__);
2821 return 0;
2822 }
2823
2824 printf("%s: test passed\n", __func__);
2825
2826 *out = 1;
2827 return 0;
2828 }
2829 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
2830
2831 static int
run_kalloc_test(int64_t in __unused,int64_t * out)2832 run_kalloc_test(int64_t in __unused, int64_t *out)
2833 {
2834 *out = 0;
2835 uint64_t * data_ptr;
2836 size_t alloc_size, old_alloc_size;
2837
2838 printf("%s: test running\n", __func__);
2839
2840 alloc_size = sizeof(uint64_t) + 1;
2841 data_ptr = kalloc(alloc_size);
2842 if (!data_ptr) {
2843 printf("%s: kalloc sizeof(uint64_t) returned null\n", __func__);
2844 return 0;
2845 }
2846
2847 struct kalloc_result kr = {};
2848 old_alloc_size = alloc_size;
2849 alloc_size++;
2850 kr = krealloc_ext(KHEAP_DEFAULT, data_ptr, old_alloc_size, alloc_size,
2851 Z_WAITOK | Z_NOFAIL, NULL);
2852 if (!kr.addr || kr.addr != data_ptr ||
2853 kalloc_bucket_size(KHEAP_DEFAULT, kr.size)
2854 != kalloc_bucket_size(KHEAP_DEFAULT, old_alloc_size)) {
2855 printf("%s: same size class realloc failed\n", __func__);
2856 return 0;
2857 }
2858
2859 old_alloc_size = alloc_size;
2860 alloc_size *= 2;
2861 kr = krealloc_ext(KHEAP_DEFAULT, kr.addr, old_alloc_size, alloc_size,
2862 Z_WAITOK | Z_NOFAIL, NULL);
2863 if (!kr.addr || kalloc_bucket_size(KHEAP_DEFAULT, kr.size)
2864 == kalloc_bucket_size(KHEAP_DEFAULT, old_alloc_size)) {
2865 printf("%s: new size class realloc failed\n", __func__);
2866 return 0;
2867 }
2868
2869 old_alloc_size = alloc_size;
2870 alloc_size *= 2;
2871 data_ptr = kheap_realloc_addr(KHEAP_DEFAULT, kr.addr, alloc_size,
2872 Z_WAITOK | Z_NOFAIL, NULL).addr;
2873 if (!data_ptr) {
2874 printf("%s: realloc without old size returned null\n", __func__);
2875 return 0;
2876 }
2877 kfree(data_ptr, alloc_size);
2878
2879 alloc_size = 3544;
2880 data_ptr = kalloc(alloc_size);
2881 if (!data_ptr) {
2882 printf("%s: kalloc 3544 returned not null\n", __func__);
2883 return 0;
2884 }
2885 kfree(data_ptr, alloc_size);
2886
2887 printf("%s: test passed\n", __func__);
2888 *out = 1;
2889 return 0;
2890 }
2891 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
2892 #endif
2893