xref: /xnu-11417.121.6/osfmk/kern/kalloc.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern_internal.h>
79 #include <vm/vm_object_xnu.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83 
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88 
89 
90 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
91 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
92 
93 #pragma mark initialization
94 
95 /*
96  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
97  * sized zone.  This allocator is built on top of the zone allocator.  A zone
98  * is created for each potential size that we are willing to get in small
99  * blocks.
100  *
101  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
102  */
103 
104 /*
105  * The kt_zone_cfg table defines the configuration of zones on various
106  * platforms for kalloc_type fixed size allocations.
107  */
108 
109 #if KASAN_CLASSIC
110 #define K_SIZE_CLASS(size)    \
111 	(((size) & PAGE_MASK) == 0 ? (size) : \
112 	((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
113 #else
114 #define K_SIZE_CLASS(size)    (size)
115 #endif
116 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
117 
118 static const uint16_t kt_zone_cfg[] = {
119 	K_SIZE_CLASS(16),
120 	K_SIZE_CLASS(32),
121 	K_SIZE_CLASS(48),
122 	K_SIZE_CLASS(64),
123 	K_SIZE_CLASS(80),
124 	K_SIZE_CLASS(96),
125 	K_SIZE_CLASS(128),
126 	K_SIZE_CLASS(160),
127 	K_SIZE_CLASS(192),
128 	K_SIZE_CLASS(224),
129 	K_SIZE_CLASS(256),
130 	K_SIZE_CLASS(288),
131 	K_SIZE_CLASS(368),
132 	K_SIZE_CLASS(400),
133 	K_SIZE_CLASS(512),
134 	K_SIZE_CLASS(576),
135 	K_SIZE_CLASS(768),
136 	K_SIZE_CLASS(1024),
137 	K_SIZE_CLASS(1152),
138 	K_SIZE_CLASS(1280),
139 	K_SIZE_CLASS(1664),
140 	K_SIZE_CLASS(2048),
141 	K_SIZE_CLASS(4096),
142 	K_SIZE_CLASS(6144),
143 	K_SIZE_CLASS(8192),
144 	K_SIZE_CLASS(12288),
145 	K_SIZE_CLASS(16384),
146 #if __arm64__
147 	K_SIZE_CLASS(24576),
148 	K_SIZE_CLASS(32768),
149 #endif /* __arm64__ */
150 };
151 
152 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
153 
154 /*
155  * kalloc_type callsites are assigned a zone during early boot. They
156  * use the dlut[] (direct lookup table), indexed by size normalized
157  * to the minimum alignment to find the right zone index quickly.
158  */
159 #define INDEX_ZDLUT(size)       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
160 #define KALLOC_DLUT_SIZE        (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
161 #define MAX_SIZE_ZDLUT          ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
162 static __startup_data uint8_t   kalloc_type_dlut[KALLOC_DLUT_SIZE];
163 static __startup_data uint32_t  kheap_zsize[KHEAP_NUM_ZONES];
164 
165 #if VM_TAG_SIZECLASSES
166 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
167 #endif
168 
169 const char * const kalloc_heap_names[] = {
170 	[KHEAP_ID_NONE]          = "",
171 	[KHEAP_ID_EARLY]         = "early.",
172 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
173 	[KHEAP_ID_DATA_SHARED]   = "data_shared.",
174 	[KHEAP_ID_KT_VAR]        = "",
175 };
176 
177 /*
178  * Early heap configuration
179  */
180 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_EARLY[1] = {
181 	{
182 		.kh_name     = "early.kalloc",
183 		.kh_heap_id  = KHEAP_ID_EARLY,
184 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE,
185 	}
186 };
187 
188 /*
189  * Bag of bytes heap configuration
190  */
191 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
192 	{
193 		.kh_name     = "data.kalloc",
194 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
195 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
196 	}
197 };
198 
199 /*
200  * Configuration of variable kalloc type heaps
201  */
202 SECURITY_READ_ONLY_LATE(struct kheap_info)
203 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
204 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
205 	{
206 		.kh_name     = "kalloc.type.var",
207 		.kh_heap_id  = KHEAP_ID_KT_VAR,
208 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
209 	}
210 };
211 
212 /*
213  * Share heap configuration
214  */
215 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_SHARED[1] = {
216 	{
217 		.kh_name     = "data_shared.kalloc",
218 		.kh_heap_id  = KHEAP_ID_DATA_SHARED,
219 		.kh_tag      = VM_KERN_MEMORY_KALLOC_SHARED,
220 	}
221 };
222 
223 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
224 
225 __startup_func
226 static void
kalloc_zsize_compute(void)227 kalloc_zsize_compute(void)
228 {
229 	uint32_t step = KHEAP_STEP_START;
230 	uint32_t size = KHEAP_START_SIZE;
231 
232 	/*
233 	 * Manually initialize extra initial zones
234 	 */
235 	kheap_zsize[0] = size / 2;
236 	kheap_zsize[1] = size;
237 	static_assert(KHEAP_EXTRA_ZONES == 2);
238 
239 	/*
240 	 * Compute sizes for remaining zones
241 	 */
242 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
243 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
244 
245 		kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
246 		kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
247 
248 		step *= 2;
249 		size += step;
250 	}
251 }
252 
253 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)254 kalloc_zone_for_size_with_flags(
255 	zone_id_t               zid,
256 	vm_size_t               size,
257 	zalloc_flags_t          flags)
258 {
259 	vm_size_t max_size = KHEAP_MAX_SIZE;
260 	bool forcopyin = flags & Z_MAY_COPYINMAP;
261 	zone_t zone;
262 
263 	if (flags & Z_KALLOC_ARRAY) {
264 		size = roundup(size, KALLOC_ARRAY_GRANULE);
265 	}
266 
267 	if (forcopyin) {
268 #if __x86_64__
269 		/*
270 		 * On Intel, the OSData() ABI used to allocate
271 		 * from the kernel map starting at PAGE_SIZE.
272 		 *
273 		 * If only vm_map_copyin() or a wrapper is used,
274 		 * then everything will work fine because vm_map_copy_t
275 		 * will perform an actual copy if the data is smaller
276 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
277 		 *
278 		 * However, if anyone is trying to call mach_vm_remap(),
279 		 * then bad things (TM) happen.
280 		 *
281 		 * Avoid this by preserving the ABI and moving
282 		 * to kalloc_large() earlier.
283 		 *
284 		 * Any recent code really ought to use IOMemoryDescriptor
285 		 * for this purpose however.
286 		 */
287 		max_size = PAGE_SIZE - 1;
288 #endif
289 	}
290 
291 	if (size <= max_size) {
292 		uint32_t idx;
293 
294 		if (size <= KHEAP_START_SIZE) {
295 			zid  += (size > 16);
296 		} else {
297 			/*
298 			 * . log2down(size - 1) is log2up(size) - 1
299 			 * . (size - 1) >> (log2down(size - 1) - 1)
300 			 *   is either 0x2 or 0x3
301 			 */
302 			idx   = kalloc_log2down((uint32_t)(size - 1));
303 			zid  += KHEAP_EXTRA_ZONES +
304 			    2 * (idx - KHEAP_START_IDX) +
305 			    ((uint32_t)(size - 1) >> (idx - 1)) - 2;
306 		}
307 
308 		zone = zone_by_id(zid);
309 #if KASAN_CLASSIC
310 		/*
311 		 * Under kasan classic, certain size classes are a redzone
312 		 * away from the mathematical formula above, and we need
313 		 * to "go to the next zone".
314 		 *
315 		 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
316 		 * this will never go to an "invalid" zone that doesn't
317 		 * belong to the kheap.
318 		 */
319 		if (size > zone_elem_inner_size(zone)) {
320 			zone++;
321 		}
322 #endif
323 		return zone;
324 	}
325 
326 	return ZONE_NULL;
327 }
328 
329 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)330 kalloc_zone_for_size(zone_id_t zid, size_t size)
331 {
332 	return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
333 }
334 
335 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)336 kheap_size_from_zone(
337 	void                   *addr,
338 	vm_size_t               size,
339 	zalloc_flags_t          flags)
340 {
341 	vm_size_t max_size = KHEAP_MAX_SIZE;
342 	bool forcopyin = flags & Z_MAY_COPYINMAP;
343 
344 #if __x86_64__
345 	/*
346 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
347 	 * behavior, then the element could have a PAGE_SIZE reported size,
348 	 * yet still be from a zone for Z_MAY_COPYINMAP.
349 	 */
350 	if (forcopyin) {
351 		if (size == PAGE_SIZE &&
352 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
353 			return true;
354 		}
355 
356 		max_size = PAGE_SIZE - 1;
357 	}
358 #else
359 #pragma unused(addr, forcopyin)
360 #endif
361 
362 	return size <= max_size;
363 }
364 
365 /*
366  * All data zones shouldn't use the early zone. Therefore set the no early alloc
367  * bit right after creation.
368  */
369 __startup_func
370 static void
kalloc_set_no_early_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)371 kalloc_set_no_early_for_data(
372 	zone_kheap_id_t       kheap_id,
373 	zone_stats_t          zstats)
374 {
375 	if (zone_is_data_kheap(kheap_id)) {
376 		zpercpu_foreach(zs, zstats) {
377 			os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
378 		}
379 	}
380 }
381 
382 __startup_func
383 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)384 kalloc_zone_init(
385 	const char           *kheap_name,
386 	zone_kheap_id_t       kheap_id,
387 	zone_id_t            *kheap_zstart,
388 	zone_create_flags_t   zc_flags)
389 {
390 	zc_flags |= ZC_PGZ_USE_GUARDS;
391 	if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
392 		zc_flags |= ZC_DATA;
393 	}
394 
395 	if (kheap_id == KHEAP_ID_DATA_SHARED) {
396 		zc_flags |= ZC_SHARED_DATA;
397 	}
398 
399 	for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
400 		uint32_t size = kheap_zsize[i];
401 		char buf[MAX_ZONE_NAME], *z_name;
402 		int len;
403 
404 		len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
405 		z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
406 		strlcpy(z_name, buf, len + 1);
407 
408 		(void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
409 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
410 			uint32_t scale = kalloc_log2down(size / 32);
411 
412 			if (size == 32 << scale) {
413 			        z->z_array_size_class = scale;
414 			} else {
415 			        z->z_array_size_class = scale | 0x10;
416 			}
417 #endif
418 			zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
419 			if (i == 0) {
420 			        *kheap_zstart = zone_index(z);
421 			}
422 			kalloc_set_no_early_for_data(kheap_id, z->z_stats);
423 		});
424 	}
425 }
426 
427 __startup_func
428 static void
kalloc_heap_init(struct kalloc_heap * kheap)429 kalloc_heap_init(struct kalloc_heap *kheap)
430 {
431 	kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
432 	    ZC_NONE);
433 	/*
434 	 * Count all the "raw" views for zones in the heap.
435 	 */
436 	zone_view_count += KHEAP_NUM_ZONES;
437 }
438 
439 #define KEXT_ALIGN_SHIFT           6
440 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
441 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
442 #define kt_scratch_size            (256ul << 10)
443 #define KALLOC_TYPE_SECTION(type) \
444 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
445 
446 /*
447  * Enum to specify the kalloc_type variant being used.
448  */
449 __options_decl(kalloc_type_variant_t, uint16_t, {
450 	KTV_FIXED     = 0x0001,
451 	KTV_VAR       = 0x0002,
452 });
453 
454 /*
455  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
456  * variable) of the desired variable/function.
457  */
458 #define kalloc_type_var(type, var)              \
459 	((type) == KTV_FIXED?                       \
460 	(vm_offset_t) kalloc_type_##var##_fixed:    \
461 	(vm_offset_t) kalloc_type_##var##_var)
462 #define kalloc_type_func(type, func, ...)       \
463 	((type) == KTV_FIXED?                       \
464 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
465 	kalloc_type_##func##_var(__VA_ARGS__))
466 
467 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
468 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
469     ZSECURITY_CONFIG_KT_VAR_BUDGET);
470 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
471     ZSECURITY_CONFIG_KT_BUDGET);
472 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
473 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
474 
475 /*
476  * Section start/end for fixed kalloc_type views
477  */
478 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
479 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
480 
481 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
482 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
483 
484 /*
485  * Section start/end for variable kalloc_type views
486  */
487 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
488 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
489 
490 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
491 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
492 
493 __startup_data
494 static kalloc_type_views_t *kt_buffer = NULL;
495 __startup_data
496 static uint64_t kt_count;
497 __startup_data
498 uint32_t kalloc_type_hash_seed;
499 
500 __startup_data
501 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
502 __startup_data
503 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
504 
505 struct nzones_with_idx {
506 	uint16_t nzones;
507 	uint16_t idx;
508 };
509 int16_t zone_carry = 0;
510 
511 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
512     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
513 
514 /*
515  * For use by lldb to iterate over kalloc types
516  */
517 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
518 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
519 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
520 
521 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
522 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
523     KMEM_DIRECTION_MASK),
524     "Insufficient bits to represent range and dir for VM allocations");
525 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
526     "validate idx mask");
527 /* qsort routines */
528 typedef int (*cmpfunc_t)(const void *a, const void *b);
529 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
530 
531 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)532 kalloc_type_get_idx(uint32_t kt_size)
533 {
534 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
535 }
536 
537 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)538 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
539 {
540 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
541 }
542 
543 static void
kalloc_type_build_dlut(void)544 kalloc_type_build_dlut(void)
545 {
546 	vm_size_t size = 0;
547 	for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
548 		uint8_t zindex = 0;
549 		while (kt_zone_cfg[zindex] < size) {
550 			zindex++;
551 		}
552 		kalloc_type_dlut[i] = zindex;
553 	}
554 }
555 
556 static uint32_t
kalloc_type_idx_for_size(uint32_t size)557 kalloc_type_idx_for_size(uint32_t size)
558 {
559 	assert(size <= KHEAP_MAX_SIZE);
560 	uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
561 	return kalloc_type_set_idx(size, idx);
562 }
563 
564 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t early_zone)565 kalloc_type_assign_zone_fixed(
566 	kalloc_type_view_t     *cur,
567 	kalloc_type_view_t     *end,
568 	zone_t                  z,
569 	zone_t                  sig_zone,
570 	zone_t                  early_zone)
571 {
572 	/*
573 	 * Assign the zone created for every kalloc_type_view
574 	 * of the same unique signature
575 	 */
576 	bool need_raw_view = false;
577 
578 	while (cur < end) {
579 		kalloc_type_view_t kt = *cur;
580 		struct zone_view *zv = &kt->kt_zv;
581 		zv->zv_zone = z;
582 		kalloc_type_flags_t kt_flags = kt->kt_flags;
583 		zone_security_flags_t zsflags = zone_security_config(z);
584 
585 		assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
586 		if (!early_zone) {
587 			assert(zone_is_data_kheap(zsflags.z_kheap_id));
588 		}
589 
590 		if (kt_flags & KT_SLID) {
591 			kt->kt_signature -= vm_kernel_slide;
592 			kt->kt_zv.zv_name -= vm_kernel_slide;
593 		}
594 
595 		if ((kt_flags & KT_PRIV_ACCT) ||
596 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
597 			zv->zv_stats = zalloc_percpu_permanent_type(
598 				struct zone_stats);
599 			need_raw_view = true;
600 			zone_view_count += 1;
601 		} else {
602 			zv->zv_stats = z->z_stats;
603 		}
604 
605 		if ((kt_flags & KT_NOEARLY) || !early_zone) {
606 			if ((kt_flags & KT_NOEARLY) && !(kt_flags & KT_PRIV_ACCT)) {
607 				panic("KT_NOEARLY used w/o private accounting for view %s",
608 				    zv->zv_name);
609 			}
610 
611 			zpercpu_foreach(zs, zv->zv_stats) {
612 				os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
613 			}
614 		}
615 
616 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
617 			kt->kt_zearly = early_zone;
618 			kt->kt_zsig = sig_zone;
619 			/*
620 			 * If we haven't yet set the signature equivalance then set it
621 			 * otherwise validate that the zone has the same signature equivalance
622 			 * as the sig_zone provided
623 			 */
624 			if (!zone_get_sig_eq(z)) {
625 				zone_set_sig_eq(z, zone_index(sig_zone));
626 			} else {
627 				assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
628 			}
629 		}
630 		zv->zv_next = (zone_view_t) z->z_views;
631 		zv->zv_zone->z_views = (zone_view_t) kt;
632 		cur++;
633 	}
634 	if (need_raw_view) {
635 		zone_view_count += 1;
636 	}
637 }
638 
639 __startup_func
640 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)641 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
642     kalloc_type_var_view_t *end, uint32_t heap_idx)
643 {
644 	struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
645 	while (cur < end) {
646 		kalloc_type_var_view_t kt = *cur;
647 		kt->kt_heap_start = cfg->kh_zstart;
648 		kalloc_type_flags_t kt_flags = kt->kt_flags;
649 
650 		if (kt_flags & KT_SLID) {
651 			if (kt->kt_sig_hdr) {
652 				kt->kt_sig_hdr -= vm_kernel_slide;
653 			}
654 			kt->kt_sig_type -= vm_kernel_slide;
655 			kt->kt_name -= vm_kernel_slide;
656 		}
657 
658 		if ((kt_flags & KT_PRIV_ACCT) ||
659 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
660 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
661 			zone_view_count += 1;
662 		}
663 
664 		kt->kt_next = (zone_view_t) cfg->kt_views;
665 		cfg->kt_views = kt;
666 		cur++;
667 	}
668 }
669 
670 __startup_func
671 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)672 kalloc_type_slide_fixed(vm_offset_t addr)
673 {
674 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
675 	ktv->kt_signature += vm_kernel_slide;
676 	ktv->kt_zv.zv_name += vm_kernel_slide;
677 	ktv->kt_flags |= KT_SLID;
678 }
679 
680 __startup_func
681 static inline void
kalloc_type_slide_var(vm_offset_t addr)682 kalloc_type_slide_var(vm_offset_t addr)
683 {
684 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
685 	if (ktv->kt_sig_hdr) {
686 		ktv->kt_sig_hdr += vm_kernel_slide;
687 	}
688 	ktv->kt_sig_type += vm_kernel_slide;
689 	ktv->kt_name += vm_kernel_slide;
690 	ktv->kt_flags |= KT_SLID;
691 }
692 
693 __startup_func
694 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)695 kalloc_type_validate_flags(
696 	kalloc_type_flags_t   kt_flags,
697 	const char           *kt_name,
698 	uuid_string_t         kext_uuid)
699 {
700 	if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
701 		panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
702 		    "required xnu headers", kt_name, kext_uuid);
703 	}
704 }
705 
706 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)707 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
708 {
709 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
710 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
711 	return ktv->kt_flags;
712 }
713 
714 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)715 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
716 {
717 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
718 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
719 	return ktv->kt_flags;
720 }
721 
722 /*
723  * Check if signature of type is made up of only data and padding
724  */
725 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)726 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
727 {
728 	assert(kt_flags & KT_CHANGED);
729 	return kt_flags & KT_DATA_ONLY;
730 }
731 
732 /*
733  * Check if signature of type is made up of only pointers
734  */
735 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)736 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
737 {
738 	assert(kt_flags & KT_CHANGED2);
739 	return kt_flags & KT_PTR_ARRAY;
740 }
741 
742 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)743 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
744 {
745 	assert(kt_flags & KT_CHANGED);
746 	return kt_flags & KT_VM;
747 }
748 
749 __startup_func
750 static inline vm_size_t
kalloc_type_view_sz_fixed(void)751 kalloc_type_view_sz_fixed(void)
752 {
753 	return sizeof(struct kalloc_type_view);
754 }
755 
756 __startup_func
757 static inline vm_size_t
kalloc_type_view_sz_var(void)758 kalloc_type_view_sz_var(void)
759 {
760 	return sizeof(struct kalloc_type_var_view);
761 }
762 
763 __startup_func
764 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)765 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
766     vm_offset_t end)
767 {
768 	return (end - start) / kalloc_type_func(type, view_sz);
769 }
770 
771 __startup_func
772 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)773 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
774 {
775 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
776 }
777 
778 __startup_func
779 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)780 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
781 {
782 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
783 }
784 
785 __startup_func
786 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)787 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
788 {
789 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
790 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
791 	    cur_data_view->kt_size);
792 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
793 	    NULL);
794 }
795 
796 __startup_func
797 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)798 kalloc_type_handle_data_view_var(vm_offset_t addr)
799 {
800 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
801 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
802 }
803 
804 __startup_func
805 static uint32_t
kalloc_type_handle_parray_var(void)806 kalloc_type_handle_parray_var(void)
807 {
808 	uint32_t i = 0;
809 	kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
810 	const char *p_name = kt->kt_name;
811 
812 	/*
813 	 * The sorted list of variable kalloc_type_view has pointer arrays at the
814 	 * beginning. Walk through them and assign a random pointer heap to each
815 	 * type detected by typename.
816 	 */
817 	while (kalloc_type_is_ptr_array(kt->kt_flags)) {
818 		uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
819 		const char *c_name = kt->kt_name;
820 		uint32_t p_i = i;
821 
822 		while (strcmp(c_name, p_name) == 0) {
823 			i++;
824 			kt = kt_buffer[i].ktv_var;
825 			c_name = kt->kt_name;
826 		}
827 		p_name = c_name;
828 		kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
829 		    &kt_buffer[i].ktv_var, heap_id);
830 	}
831 
832 	/*
833 	 * Returns the the index of the first view that isn't a pointer array
834 	 */
835 	return i;
836 }
837 
838 __startup_func
839 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)840 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
841 {
842 	/*
843 	 * Limit range_id to ptr ranges
844 	 */
845 	uint32_t range_id = kmem_adjust_range_id(hash);
846 	uint32_t direction = hash & 0x8000;
847 	return (range_id | KMEM_HASH_SET | direction) << shift;
848 }
849 
850 __startup_func
851 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)852 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
853     kalloc_type_flags_t *kt_flags)
854 {
855 	uint32_t hash = 0;
856 
857 	assert(sig_ty != NULL);
858 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
859 	    kalloc_type_hash_seed);
860 	if (sig_hdr) {
861 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
862 	}
863 	os_hash_jenkins_finish(hash);
864 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
865 
866 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
867 }
868 
869 __startup_func
870 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)871 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
872 {
873 	/*
874 	 * Use backtraces on fixed as we don't have signatures for types that go
875 	 * to the VM due to rdar://85182551.
876 	 */
877 	(void) addr;
878 }
879 
880 __startup_func
881 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)882 kalloc_type_set_type_hash_var(vm_offset_t addr)
883 {
884 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
885 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
886 	    &ktv->kt_flags);
887 }
888 
889 __startup_func
890 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)891 kalloc_type_mark_processed_fixed(vm_offset_t addr)
892 {
893 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
894 	ktv->kt_flags |= KT_PROCESSED;
895 }
896 
897 __startup_func
898 static void
kalloc_type_mark_processed_var(vm_offset_t addr)899 kalloc_type_mark_processed_var(vm_offset_t addr)
900 {
901 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
902 	ktv->kt_flags |= KT_PROCESSED;
903 }
904 
905 __startup_func
906 static void
kalloc_type_update_view_fixed(vm_offset_t addr)907 kalloc_type_update_view_fixed(vm_offset_t addr)
908 {
909 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
910 	ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
911 }
912 
913 __startup_func
914 static void
kalloc_type_update_view_var(vm_offset_t addr)915 kalloc_type_update_view_var(vm_offset_t addr)
916 {
917 	(void) addr;
918 }
919 
920 __startup_func
921 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)922 kalloc_type_view_copy(
923 	const kalloc_type_variant_t   type,
924 	vm_offset_t                   start,
925 	vm_offset_t                   end,
926 	uint64_t                     *cur_count,
927 	bool                          slide,
928 	uuid_string_t                 kext_uuid)
929 {
930 	uint64_t count = kalloc_type_view_count(type, start, end);
931 	if (count + *cur_count >= kt_count) {
932 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
933 	}
934 	vm_offset_t cur = start;
935 	while (cur < end) {
936 		if (slide) {
937 			kalloc_type_func(type, slide, cur);
938 		}
939 		kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
940 		    kext_uuid);
941 		kalloc_type_func(type, mark_processed, cur);
942 		/*
943 		 * Skip views that go to the VM
944 		 */
945 		if (kalloc_type_from_vm(kt_flags)) {
946 			cur += kalloc_type_func(type, view_sz);
947 			continue;
948 		}
949 
950 		/*
951 		 * If signature indicates that the entire allocation is data move it to
952 		 * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap,
953 		 * variable kalloc_type handles the actual redirection in the entry points
954 		 * kalloc/kfree_type_var_impl.
955 		 */
956 		if (kalloc_type_is_data(kt_flags)) {
957 			kalloc_type_func(type, handle_data_view, cur);
958 			cur += kalloc_type_func(type, view_sz);
959 			continue;
960 		}
961 
962 		/*
963 		 * Set type hash that is used by kmem_*_guard
964 		 */
965 		kalloc_type_func(type, set_type_hash, cur);
966 		kalloc_type_func(type, update_view, cur);
967 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
968 		cur += kalloc_type_func(type, view_sz);
969 		*cur_count = *cur_count + 1;
970 	}
971 }
972 
973 __startup_func
974 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)975 kalloc_type_view_parse(const kalloc_type_variant_t type)
976 {
977 	kc_format_t kc_format;
978 	uint64_t cur_count = 0;
979 
980 	if (!PE_get_primary_kc_format(&kc_format)) {
981 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
982 	}
983 
984 	if (kc_format == KCFormatStatic) {
985 		/*
986 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
987 		 * xnu are coalesced.
988 		 */
989 		kalloc_type_view_copy(type,
990 		    kalloc_type_var(type, sec_start),
991 		    kalloc_type_var(type, sec_end),
992 		    &cur_count, false, NULL);
993 	} else if (kc_format == KCFormatFileset) {
994 		/*
995 		 * If kc uses filesets, traverse __kalloc_type section for each
996 		 * macho in the BootKC.
997 		 */
998 		kernel_mach_header_t *kc_mh = NULL;
999 		kernel_mach_header_t *kext_mh = NULL;
1000 
1001 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1002 		struct load_command *lc =
1003 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1004 		for (uint32_t i = 0; i < kc_mh->ncmds;
1005 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1006 			if (lc->cmd != LC_FILESET_ENTRY) {
1007 				continue;
1008 			}
1009 			struct fileset_entry_command *fse =
1010 			    (struct fileset_entry_command *)(vm_offset_t)lc;
1011 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1012 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1013 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1014 			if (sect != NULL) {
1015 				unsigned long uuidlen = 0;
1016 				void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
1017 				uuid_string_t kext_uuid_str;
1018 				if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
1019 					uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1020 				}
1021 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1022 				    &cur_count, false, kext_uuid_str);
1023 			}
1024 		}
1025 	} else if (kc_format == KCFormatKCGEN) {
1026 		/*
1027 		 * Parse __kalloc_type section from xnu
1028 		 */
1029 		kalloc_type_view_copy(type,
1030 		    kalloc_type_var(type, sec_start),
1031 		    kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1032 
1033 		/*
1034 		 * Parse __kalloc_type section for kexts
1035 		 *
1036 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1037 		 * as this platform has insufficient memory for type based
1038 		 * segregation. kalloc_type_impl_external will direct callsites
1039 		 * based on their size.
1040 		 */
1041 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1042 		vm_offset_t cur = 0;
1043 		vm_offset_t end = 0;
1044 
1045 		/*
1046 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1047 		 * and traverse it.
1048 		 */
1049 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1050 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1051 		assert(prelink_sect);
1052 		cur = prelink_sect->addr;
1053 		end = prelink_sect->addr + prelink_sect->size;
1054 
1055 		while (cur < end) {
1056 			uint64_t kext_text_sz = 0;
1057 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1058 
1059 			if (kext_mh->magic == 0) {
1060 				/*
1061 				 * Assert that we have processed all kexts and all that is left
1062 				 * is padding
1063 				 */
1064 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1065 				break;
1066 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1067 			    kext_mh->magic != MH_CIGAM_64) {
1068 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1069 				    cur);
1070 			}
1071 
1072 			/*
1073 			 * Kext macho found, iterate through its segments
1074 			 */
1075 			struct load_command *lc =
1076 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1077 			bool isSplitKext = false;
1078 
1079 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1080 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1081 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1082 					isSplitKext = true;
1083 					continue;
1084 				} else if (lc->cmd != LC_SEGMENT_64) {
1085 					continue;
1086 				}
1087 
1088 				kernel_segment_command_t *seg_cmd =
1089 				    (struct segment_command_64 *)(vm_offset_t)lc;
1090 				/*
1091 				 * Parse kalloc_type section
1092 				 */
1093 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1094 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1095 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1096 					if (kt_sect) {
1097 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1098 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1099 						    true, NULL);
1100 					}
1101 				}
1102 				/*
1103 				 * If the kext has a __TEXT segment, that is the only thing that
1104 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1105 				 * macho is right after.
1106 				 */
1107 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1108 					kext_text_sz = seg_cmd->filesize;
1109 				}
1110 			}
1111 			/*
1112 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1113 			 * only a __LINKEDIT segment) then the next macho will be after all the
1114 			 * header commands.
1115 			 */
1116 			if (!kext_text_sz) {
1117 				kext_text_sz = kext_mh->sizeofcmds;
1118 			} else if (!isSplitKext) {
1119 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1120 				break;
1121 			}
1122 
1123 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1124 		}
1125 	} else {
1126 		/*
1127 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1128 		 * parsing kalloc_type_view structs during startup.
1129 		 */
1130 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1131 		    " for kc_format = %d\n", kc_format);
1132 	}
1133 	return cur_count;
1134 }
1135 
1136 __startup_func
1137 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1138 kalloc_type_cmp_fixed(const void *a, const void *b)
1139 {
1140 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1141 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1142 
1143 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1144 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1145 	/*
1146 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1147 	 * signature else sort by size
1148 	 */
1149 	if (idxA == idxB) {
1150 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1151 		/*
1152 		 * If the kalloc_type_views have the same signature sort by site
1153 		 * name
1154 		 */
1155 		if (result == 0) {
1156 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1157 		}
1158 		return result;
1159 	}
1160 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1161 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1162 	return (int)(sizeA - sizeB);
1163 }
1164 
1165 __startup_func
1166 static int
kalloc_type_cmp_var(const void * a,const void * b)1167 kalloc_type_cmp_var(const void *a, const void *b)
1168 {
1169 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1170 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1171 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1172 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1173 	bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1174 	bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1175 	int result = 0;
1176 
1177 	/*
1178 	 * Switched around (B - A) because we want the pointer arrays to be at the
1179 	 * top
1180 	 */
1181 	result = ktB_ptrArray - ktA_ptrArray;
1182 	if (result == 0) {
1183 		result = strcmp(ktA_hdr, ktB_hdr);
1184 		if (result == 0) {
1185 			result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1186 			if (result == 0) {
1187 				result = strcmp(ktA->kt_name, ktB->kt_name);
1188 			}
1189 		}
1190 	}
1191 	return result;
1192 }
1193 
1194 __startup_func
1195 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1196 kalloc_type_create_iterators_fixed(
1197 	uint16_t           *kt_skip_list_start,
1198 	uint64_t            count)
1199 {
1200 	uint16_t *kt_skip_list = kt_skip_list_start;
1201 	uint16_t p_idx = UINT16_MAX; /* previous size idx */
1202 	uint16_t c_idx = 0; /* current size idx */
1203 	uint16_t unique_sig = 0;
1204 	uint16_t total_sig = 0;
1205 	const char *p_sig = NULL;
1206 	const char *p_name = "";
1207 	const char *c_sig = NULL;
1208 	const char *c_name = NULL;
1209 
1210 	/*
1211 	 * Walk over each kalloc_type_view
1212 	 */
1213 	for (uint16_t i = 0; i < count; i++) {
1214 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1215 
1216 		c_idx = kalloc_type_get_idx(kt->kt_size);
1217 		c_sig = kt->kt_signature;
1218 		c_name = kt->kt_zv.zv_name;
1219 		/*
1220 		 * When current kalloc_type_view is in a different kalloc size
1221 		 * bucket than the previous, it means we have processed all in
1222 		 * the previous size bucket, so store the accumulated values
1223 		 * and advance the indices.
1224 		 */
1225 		if (p_idx == UINT16_MAX || c_idx != p_idx) {
1226 			/*
1227 			 * Updates for frequency lists
1228 			 */
1229 			if (p_idx != UINT16_MAX) {
1230 				kt_freq_list[p_idx] = unique_sig;
1231 				kt_freq_list_total[p_idx] = total_sig - unique_sig;
1232 			}
1233 			unique_sig = 1;
1234 			total_sig = 1;
1235 
1236 			p_idx = c_idx;
1237 			p_sig = c_sig;
1238 			p_name = c_name;
1239 
1240 			/*
1241 			 * Updates to signature skip list
1242 			 */
1243 			*kt_skip_list = i;
1244 			kt_skip_list++;
1245 
1246 			continue;
1247 		}
1248 
1249 		/*
1250 		 * When current kalloc_type_views is in the kalloc size bucket as
1251 		 * previous, analyze the siganture to see if it is unique.
1252 		 *
1253 		 * Signatures are collapsible if one is a substring of the next.
1254 		 */
1255 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1256 			/*
1257 			 * Unique signature detected. Update counts and advance index
1258 			 */
1259 			unique_sig++;
1260 			total_sig++;
1261 
1262 			*kt_skip_list = i;
1263 			kt_skip_list++;
1264 			p_sig = c_sig;
1265 			p_name = c_name;
1266 			continue;
1267 		}
1268 		/*
1269 		 * Need this here as we do substring matching for signatures so you
1270 		 * want to track the longer signature seen rather than the substring
1271 		 */
1272 		p_sig = c_sig;
1273 
1274 		/*
1275 		 * Check if current kalloc_type_view corresponds to a new type
1276 		 */
1277 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1278 			total_sig++;
1279 			p_name = c_name;
1280 		}
1281 	}
1282 	/*
1283 	 * Final update
1284 	 */
1285 	assert(c_idx == p_idx);
1286 	assert(kt_freq_list[c_idx] == 0);
1287 	kt_freq_list[c_idx] = unique_sig;
1288 	kt_freq_list_total[c_idx] = total_sig - unique_sig;
1289 	*kt_skip_list = (uint16_t) count;
1290 
1291 	return ++kt_skip_list;
1292 }
1293 
1294 __startup_func
1295 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1296 kalloc_type_create_iterators_var(
1297 	uint32_t           *kt_skip_list_start,
1298 	uint32_t            buf_start)
1299 {
1300 	uint32_t *kt_skip_list = kt_skip_list_start;
1301 	uint32_t n = 0;
1302 
1303 	kt_skip_list[n] = buf_start;
1304 	assert(kt_count > buf_start + 1);
1305 	for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1306 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1307 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1308 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1309 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1310 		assert(ktA->kt_sig_type != NULL);
1311 		assert(ktB->kt_sig_type != NULL);
1312 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1313 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1314 			n++;
1315 			kt_skip_list[n] = i;
1316 		}
1317 	}
1318 	/*
1319 	 * Final update
1320 	 */
1321 	n++;
1322 	kt_skip_list[n] = (uint32_t) kt_count;
1323 	return n;
1324 }
1325 
1326 __startup_func
1327 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1328 kalloc_type_distribute_budget(
1329 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1330 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1331 	uint16_t            zone_budget,
1332 	uint16_t            min_zones_per_size)
1333 {
1334 	uint16_t total_sig = 0;
1335 	uint16_t min_sig = 0;
1336 	uint16_t assigned_zones = 0;
1337 	uint16_t remaining_zones = zone_budget;
1338 	uint16_t modulo = 0;
1339 
1340 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1341 		uint16_t sig_freq = freq_list[i];
1342 		uint16_t min_zones = min_zones_per_size;
1343 
1344 		if (sig_freq < min_zones_per_size) {
1345 			min_zones = sig_freq;
1346 		}
1347 		total_sig += sig_freq;
1348 		kt_zones[i] = min_zones;
1349 		min_sig += min_zones;
1350 	}
1351 	if (remaining_zones > total_sig) {
1352 		remaining_zones = total_sig;
1353 	}
1354 	assert(remaining_zones >= min_sig);
1355 	remaining_zones -= min_sig;
1356 	total_sig -= min_sig;
1357 	assigned_zones += min_sig;
1358 
1359 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1360 		uint16_t freq = freq_list[i];
1361 
1362 		if (freq < min_zones_per_size) {
1363 			continue;
1364 		}
1365 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1366 		uint16_t n_zones = (uint16_t) numer / total_sig;
1367 
1368 		/*
1369 		 * Accumulate remainder and increment n_zones when it goes above
1370 		 * denominator
1371 		 */
1372 		modulo += numer % total_sig;
1373 		if (modulo >= total_sig) {
1374 			n_zones++;
1375 			modulo -= total_sig;
1376 		}
1377 
1378 		/*
1379 		 * Cap the total number of zones to the unique signatures
1380 		 */
1381 		if ((n_zones + min_zones_per_size) > freq) {
1382 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1383 			modulo += (extra_zones * total_sig);
1384 			n_zones -= extra_zones;
1385 		}
1386 		kt_zones[i] += n_zones;
1387 		assigned_zones += n_zones;
1388 	}
1389 
1390 	if (kt_options & KT_OPTIONS_DEBUG) {
1391 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1392 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1393 	}
1394 	return remaining_zones + min_sig - assigned_zones;
1395 }
1396 
1397 __startup_func
1398 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1399 kalloc_type_cmp_type_zones(const void *a, const void *b)
1400 {
1401 	const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1402 	const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1403 
1404 	return (int)(B.nzones - A.nzones);
1405 }
1406 
1407 __startup_func
1408 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1409 kalloc_type_redistribute_budget(
1410 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1411 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1412 {
1413 	uint16_t count = 0, cur_count = 0;
1414 	struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1415 	uint16_t top_zone_total = 0;
1416 
1417 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1418 		uint16_t zones = kt_zones[i];
1419 
1420 		/*
1421 		 * If a sizeclass got no zones but has types to divide make a note
1422 		 * of it
1423 		 */
1424 		if (zones == 0 && (freq_total_list[i] != 0)) {
1425 			count++;
1426 		}
1427 
1428 		sorted_zones[i].nzones = kt_zones[i];
1429 		sorted_zones[i].idx = i;
1430 	}
1431 
1432 	qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1433 	    sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1434 
1435 	for (uint16_t i = 0; i < 3; i++) {
1436 		top_zone_total += sorted_zones[i].nzones;
1437 	}
1438 
1439 	/*
1440 	 * Borrow zones from the top 3 sizeclasses and redistribute to those
1441 	 * that didn't get a zone but that types to divide
1442 	 */
1443 	cur_count = count;
1444 	for (uint16_t i = 0; i < 3; i++) {
1445 		uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1446 		uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1447 
1448 		if (zone_borrow > (zone_available / 2)) {
1449 			zone_borrow = zone_available / 2;
1450 		}
1451 		kt_zones[sorted_zones[i].idx] -= zone_borrow;
1452 		cur_count -= zone_borrow;
1453 	}
1454 
1455 	for (uint16_t i = 0; i < 3; i++) {
1456 		if (cur_count == 0) {
1457 			break;
1458 		}
1459 		kt_zones[sorted_zones[i].idx]--;
1460 		cur_count--;
1461 	}
1462 
1463 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1464 		if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1465 		    (count > cur_count)) {
1466 			kt_zones[i]++;
1467 			count--;
1468 		}
1469 	}
1470 }
1471 
1472 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1473 kalloc_type_apply_policy(
1474 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1475 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1476 	uint16_t            kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1477 	uint16_t            kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1478 	uint16_t            zone_budget)
1479 {
1480 	uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1481 	uint16_t zbudget_type = zone_budget - zbudget_sig;
1482 	uint16_t wasted_zones = 0;
1483 
1484 #if DEBUG || DEVELOPMENT
1485 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1486 		__assert_only uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1487 		assert(zone_budget + current_zones <= MAX_ZONES);
1488 	}
1489 #endif
1490 
1491 	wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1492 	    zbudget_sig, 2);
1493 	wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1494 	    kt_zones_type, zbudget_type, 0);
1495 	kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1496 
1497 	/*
1498 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1499 	 */
1500 	if (kt_options & KT_OPTIONS_DEBUG) {
1501 		printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1502 		    "zones_type\n");
1503 		for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1504 			printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1505 			    freq_total_list[i] + freq_list[i], freq_list[i],
1506 			    kt_zones_sig[i] + kt_zones_type[i],
1507 			    kt_zones_sig[i], kt_zones_type[i]);
1508 		}
1509 	}
1510 
1511 	return wasted_zones;
1512 }
1513 
1514 
1515 __startup_func
1516 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1517 kalloc_type_create_zone_for_size(
1518 	zone_t             *kt_zones_for_size,
1519 	uint16_t            kt_zones,
1520 	vm_size_t           z_size)
1521 {
1522 	zone_t p_zone = NULL;
1523 	char *z_name = NULL;
1524 	zone_t shared_z = NULL;
1525 
1526 	for (uint16_t i = 0; i < kt_zones; i++) {
1527 		z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1528 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1529 		    (size_t) z_size);
1530 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1531 		if (i != 0) {
1532 			p_zone->z_kt_next = z;
1533 		}
1534 		p_zone = z;
1535 		kt_zones_for_size[i] = z;
1536 	}
1537 	/*
1538 	 * Create shared zone for sizeclass if it doesn't already exist
1539 	 */
1540 	if (kt_shared_fixed) {
1541 		shared_z = kalloc_zone_for_size(KHEAP_EARLY->kh_zstart, z_size);
1542 		if (zone_elem_inner_size(shared_z) != z_size) {
1543 			z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1544 			snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1545 			    (size_t) z_size);
1546 			shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1547 			    ^(zone_t zone){
1548 				zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_EARLY;
1549 			});
1550 		}
1551 	}
1552 	kt_zones_for_size[kt_zones] = shared_z;
1553 }
1554 
1555 __startup_func
1556 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1557 kalloc_type_zones_for_type(
1558 	uint16_t            zones_total_type,
1559 	uint16_t            unique_types,
1560 	uint16_t            total_types,
1561 	bool                last_sig)
1562 {
1563 	uint16_t zones_for_type = 0, n_mod = 0;
1564 
1565 	if (zones_total_type == 0) {
1566 		return 0;
1567 	}
1568 
1569 	zones_for_type = (zones_total_type * unique_types) / total_types;
1570 	n_mod = (zones_total_type * unique_types) % total_types;
1571 	zone_carry += n_mod;
1572 
1573 	/*
1574 	 * Drain carry opportunistically
1575 	 */
1576 	if (((unique_types > 3) && (zone_carry > 0)) ||
1577 	    (zone_carry >= (int) total_types) ||
1578 	    (last_sig && (zone_carry > 0))) {
1579 		zone_carry -= total_types;
1580 		zones_for_type++;
1581 	}
1582 
1583 	if (last_sig) {
1584 		assert(zone_carry == 0);
1585 	}
1586 
1587 	return zones_for_type;
1588 }
1589 
1590 __startup_func
1591 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1592 kalloc_type_build_skip_list(
1593 	kalloc_type_view_t     *start,
1594 	kalloc_type_view_t     *end,
1595 	uint16_t               *kt_skip_list)
1596 {
1597 	kalloc_type_view_t *cur = start;
1598 	kalloc_type_view_t prev = *start;
1599 	uint16_t i = 0, idx = 0;
1600 
1601 	kt_skip_list[idx] = i;
1602 	idx++;
1603 
1604 	while (cur < end) {
1605 		kalloc_type_view_t kt_cur = *cur;
1606 
1607 		if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1608 			kt_skip_list[idx] = i;
1609 
1610 			prev = kt_cur;
1611 			idx++;
1612 		}
1613 		i++;
1614 		cur++;
1615 	}
1616 
1617 	/*
1618 	 * Final update
1619 	 */
1620 	kt_skip_list[idx] = i;
1621 	return idx;
1622 }
1623 
1624 __startup_func
1625 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1626 kalloc_type_init_sig_eq(
1627 	zone_t             *zones,
1628 	uint16_t            n_zones,
1629 	zone_t              sig_zone)
1630 {
1631 	for (uint16_t i = 0; i < n_zones; i++) {
1632 		zone_t z = zones[i];
1633 
1634 		assert(!zone_get_sig_eq(z));
1635 		zone_set_sig_eq(z, zone_index(sig_zone));
1636 	}
1637 }
1638 
1639 __startup_func
1640 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[32],uint16_t type_zones_start,zone_t sig_zone,zone_t early_zone)1641 kalloc_type_distribute_zone_for_type(
1642 	kalloc_type_view_t *start,
1643 	kalloc_type_view_t *end,
1644 	bool                last_sig,
1645 	uint16_t            zones_total_type,
1646 	uint16_t            total_types,
1647 	uint16_t           *kt_skip_list,
1648 	zone_t              kt_zones_for_size[32],
1649 	uint16_t            type_zones_start,
1650 	zone_t              sig_zone,
1651 	zone_t              early_zone)
1652 {
1653 	uint16_t count = 0, n_zones = 0;
1654 	uint16_t *shuffle_buf = NULL;
1655 	zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1656 
1657 	/*
1658 	 * Assert there is space in buffer
1659 	 */
1660 	count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1661 	n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1662 	    last_sig);
1663 	shuffle_buf = &kt_skip_list[count + 1];
1664 
1665 	/*
1666 	 * Initalize signature equivalence zone for type zones
1667 	 */
1668 	kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1669 
1670 	if (n_zones == 0) {
1671 		kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1672 		    early_zone);
1673 		return n_zones;
1674 	}
1675 
1676 	/*
1677 	 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1678 	 */
1679 	if (count == 1) {
1680 		kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1681 		    early_zone);
1682 		return n_zones;
1683 	}
1684 
1685 	/*
1686 	 * Add the signature based zone to n_zones
1687 	 */
1688 	n_zones++;
1689 
1690 	for (uint16_t i = 0; i < count; i++) {
1691 		uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1692 		uint16_t type_start = kt_skip_list[i];
1693 		kalloc_type_view_t *kt_type_start = &start[type_start];
1694 		uint16_t type_end = kt_skip_list[i + 1];
1695 		kalloc_type_view_t *kt_type_end = &start[type_end];
1696 		zone_t zone;
1697 
1698 		if (zidx == 0) {
1699 			kmem_shuffle(shuffle_buf, n_zones);
1700 		}
1701 
1702 		shuffled_zidx = shuffle_buf[zidx];
1703 		zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1704 		kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1705 		    early_zone);
1706 	}
1707 
1708 	return n_zones - 1;
1709 }
1710 
1711 __startup_func
1712 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1713 kalloc_type_create_zones_fixed(
1714 	uint16_t           *kt_skip_list_start,
1715 	uint16_t           *kt_shuffle_buf)
1716 {
1717 	uint16_t *kt_skip_list = kt_skip_list_start;
1718 	uint16_t p_j = 0;
1719 	uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1720 	uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1721 #if DEBUG || DEVELOPMENT
1722 	__assert_only uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1723 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1724 #endif
1725 	/*
1726 	 * Apply policy to determine how many zones to create for each size
1727 	 * class.
1728 	 */
1729 	kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1730 	    kt_zones_sig, kt_zones_type, kt_fixed_zones);
1731 
1732 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1733 		uint16_t n_unique_sig = kt_freq_list[i];
1734 		vm_size_t z_size = kt_zone_cfg[i];
1735 		uint16_t n_zones_sig = kt_zones_sig[i];
1736 		uint16_t n_zones_type = kt_zones_type[i];
1737 		uint16_t total_types = kt_freq_list_total[i];
1738 		uint16_t type_zones_used = 0;
1739 
1740 		if (n_unique_sig == 0) {
1741 			continue;
1742 		}
1743 
1744 		zone_carry = 0;
1745 		assert(n_zones_sig + n_zones_type + 1 <= 32);
1746 		zone_t kt_zones_for_size[32] = {};
1747 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1748 		    n_zones_sig + n_zones_type, z_size);
1749 
1750 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1751 		/*
1752 		 * Ensure that there is enough space to shuffle n_unique_sig
1753 		 * indices
1754 		 */
1755 		assert(n_unique_sig < kt_shuffle_count);
1756 
1757 		/*
1758 		 * Get a shuffled set of signature indices
1759 		 */
1760 		*kt_shuffle_buf = 0;
1761 		if (n_unique_sig > 1) {
1762 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1763 		}
1764 
1765 		for (uint16_t j = 0; j < n_zones_sig; j++) {
1766 			zone_t *z_ptr = &kt_zones_for_size[j];
1767 
1768 			kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1769 		}
1770 
1771 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1772 			/*
1773 			 * For every size that has unique types
1774 			 */
1775 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1776 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1777 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1778 			zone_t zone = kt_zones_for_size[j % n_zones_sig];
1779 			zone_t early_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1780 			bool last_sig;
1781 
1782 			last_sig = (j == (n_unique_sig - 1)) ? true : false;
1783 			type_zones_used += kalloc_type_distribute_zone_for_type(
1784 				&kt_buffer[cur].ktv_fixed,
1785 				&kt_buffer[end].ktv_fixed, last_sig,
1786 				n_zones_type, total_types + n_unique_sig,
1787 				&kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1788 				n_zones_sig + type_zones_used, zone, early_zone);
1789 		}
1790 		assert(type_zones_used <= n_zones_type);
1791 		p_j += n_unique_sig;
1792 	}
1793 }
1794 
1795 __startup_func
1796 static void
kalloc_type_view_init_fixed(void)1797 kalloc_type_view_init_fixed(void)
1798 {
1799 	kalloc_type_hash_seed = (uint32_t) early_random();
1800 	kalloc_type_build_dlut();
1801 	/*
1802 	 * Parse __kalloc_type sections and build array of pointers to
1803 	 * all kalloc type views in kt_buffer.
1804 	 */
1805 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1806 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1807 
1808 #if MACH_ASSERT
1809 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1810 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1811 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1812 #endif
1813 
1814 	/*
1815 	 * Sort based on size class and signature
1816 	 */
1817 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1818 	    kalloc_type_cmp_fixed);
1819 
1820 	/*
1821 	 * Build a skip list that holds starts of unique signatures and a
1822 	 * frequency list of number of unique and total signatures per kalloc
1823 	 * size class
1824 	 */
1825 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1826 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1827 		kt_skip_list_start, kt_count);
1828 
1829 	/*
1830 	 * Create zones based on signatures
1831 	 */
1832 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1833 }
1834 
1835 __startup_func
1836 static void
kalloc_type_heap_init(void)1837 kalloc_type_heap_init(void)
1838 {
1839 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1840 	char kh_name[MAX_ZONE_NAME];
1841 	uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1842 
1843 	for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1844 		snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1845 		kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1846 		    &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1847 	}
1848 	/*
1849 	 * All variable kalloc type allocations are collapsed into a single
1850 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1851 	 */
1852 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1853 	zone_view_count += 1;
1854 }
1855 
1856 __startup_func
1857 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1858 kalloc_type_assign_heap(
1859 	uint32_t            start,
1860 	uint32_t            end,
1861 	uint32_t            heap_id)
1862 {
1863 	bool use_split = kmem_get_random16(1);
1864 
1865 	if (use_split) {
1866 		heap_id = kt_var_heaps;
1867 	}
1868 	kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1869 	    &kt_buffer[end].ktv_var, heap_id);
1870 }
1871 
1872 __startup_func
1873 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1874 kalloc_type_split_heap(
1875 	uint32_t            start,
1876 	uint32_t            end,
1877 	uint32_t            heap_id)
1878 {
1879 	uint32_t count = start;
1880 	const char *p_name = NULL;
1881 
1882 	while (count < end) {
1883 		kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1884 		const char *c_name = cur->kt_name;
1885 
1886 		if (!p_name) {
1887 			assert(count == start);
1888 			p_name = c_name;
1889 		}
1890 		if (strcmp(c_name, p_name) != 0) {
1891 			kalloc_type_assign_heap(start, count, heap_id);
1892 			start = count;
1893 			p_name = c_name;
1894 		}
1895 		count++;
1896 	}
1897 	kalloc_type_assign_heap(start, end, heap_id);
1898 }
1899 
1900 __startup_func
1901 static void
kalloc_type_view_init_var(void)1902 kalloc_type_view_init_var(void)
1903 {
1904 	uint32_t buf_start = 0, unique_sig = 0;
1905 	uint32_t *kt_skip_list_start;
1906 	uint16_t *shuffle_buf;
1907 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
1908 	uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
1909 	/*
1910 	 * Pick a random heap to split
1911 	 */
1912 	uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
1913 
1914 	/*
1915 	 * Zones are created prior to parsing the views as zone budget is fixed
1916 	 * per sizeclass and special types identified while parsing are redirected
1917 	 * as they are discovered.
1918 	 */
1919 	kalloc_type_heap_init();
1920 
1921 	/*
1922 	 * Parse __kalloc_var sections and build array of pointers to views that
1923 	 * aren't rediected in kt_buffer.
1924 	 */
1925 	kt_count = kalloc_type_view_parse(KTV_VAR);
1926 	assert(kt_count < UINT32_MAX);
1927 
1928 #if MACH_ASSERT
1929 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
1930 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
1931 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1932 #endif
1933 
1934 	/*
1935 	 * Sort based on size class and signature
1936 	 */
1937 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
1938 	    kalloc_type_cmp_var);
1939 
1940 	buf_start = kalloc_type_handle_parray_var();
1941 
1942 	/*
1943 	 * Build a skip list that holds starts of unique signatures
1944 	 */
1945 	kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
1946 	unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
1947 	    buf_start);
1948 	shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
1949 	/*
1950 	 * If we have only one heap then other elements share heap with pointer
1951 	 * arrays
1952 	 */
1953 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
1954 		panic("kt_var_heaps is too small");
1955 	}
1956 
1957 	kmem_shuffle(shuffle_buf, flex_heap_count);
1958 	/*
1959 	 * The index of the heap we decide to split is placed twice in the shuffle
1960 	 * buffer so that it gets twice the number of signatures that we split
1961 	 * evenly
1962 	 */
1963 	shuffle_buf[flex_heap_count] = split_heap;
1964 	split_heap += (fixed_heaps + 1);
1965 
1966 	for (uint32_t i = 1; i <= unique_sig; i++) {
1967 		uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
1968 		    fixed_heaps + 1;
1969 		uint32_t start = kt_skip_list_start[i - 1];
1970 		uint32_t end = kt_skip_list_start[i];
1971 
1972 		assert(heap_id <= kt_var_heaps);
1973 		if (heap_id == split_heap) {
1974 			kalloc_type_split_heap(start, end, heap_id);
1975 			continue;
1976 		}
1977 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1978 		    &kt_buffer[end].ktv_var, heap_id);
1979 	}
1980 }
1981 
1982 __startup_func
1983 static void
kalloc_init(void)1984 kalloc_init(void)
1985 {
1986 	/*
1987 	 * Allocate scratch space to parse kalloc_type_views and create
1988 	 * other structures necessary to process them.
1989 	 */
1990 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
1991 
1992 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
1993 	kalloc_zsize_compute();
1994 
1995 	/* Initialize kalloc data buffers heap */
1996 	kalloc_heap_init(KHEAP_DATA_BUFFERS);
1997 
1998 	/* Initialize kalloc shared data buffers heap */
1999 	kalloc_heap_init(KHEAP_DATA_SHARED);
2000 
2001 	/* Initialize kalloc shared buffers heap */
2002 	kalloc_heap_init(KHEAP_EARLY);
2003 
2004 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
2005 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT | KMA_SPRAYQTN, VM_KERN_MEMORY_KALLOC);
2006 
2007 	/*
2008 	 * Handle fixed size views
2009 	 */
2010 	kalloc_type_view_init_fixed();
2011 
2012 	/*
2013 	 * Reset
2014 	 */
2015 	bzero(kt_buffer, kt_scratch_size);
2016 	kt_count = max_count;
2017 
2018 	/*
2019 	 * Handle variable size views
2020 	 */
2021 	kalloc_type_view_init_var();
2022 
2023 	/*
2024 	 * Free resources used
2025 	 */
2026 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2027 }
2028 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2029 
2030 #pragma mark accessors
2031 
2032 #define KFREE_ABSURD_SIZE \
2033 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2034 
2035 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2036 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2037 {
2038 	thread_t thr = current_thread();
2039 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2040 }
2041 
2042 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2043 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2044 {
2045 	thread_t thr = current_thread();
2046 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2047 }
2048 
2049 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2050 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2051 {
2052 	kmem_guard_t guard = {
2053 		.kmg_atomic      = true,
2054 		.kmg_tag         = tag,
2055 		.kmg_type_hash   = type_hash,
2056 		.kmg_context     = os_hash_kernel_pointer(owner),
2057 	};
2058 
2059 	/*
2060 	 * TODO: this use is really not sufficiently smart.
2061 	 */
2062 
2063 	return guard;
2064 }
2065 
2066 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
2067 
2068 #if __arm64e__
2069 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2070 
2071 /*
2072  * Zone encoding is:
2073  *
2074  *   <PAC SIG><1><1><PTR value><5 bits of size class>
2075  *
2076  * VM encoding is:
2077  *
2078  *   <PAC SIG><1><0><PTR value><14 bits of page count>
2079  *
2080  * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2081  * so that PAC authentication extends the proper sign bit.
2082  */
2083 
2084 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2085 #else /* __arm64e__ */
2086 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2087 
2088 /*
2089  * Zone encoding is:
2090  *
2091  *   <TBI><1><PTR value><5 bits of size class>
2092  *
2093  * VM encoding is:
2094  *
2095  *   <TBI><0><PTR value><14 bits of page count>
2096  */
2097 
2098 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2099 #endif /* __arm64e__*/
2100 
2101 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2102 
2103 __attribute__((always_inline))
2104 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2105 __kalloc_array_decode(vm_address_t ptr)
2106 {
2107 	struct kalloc_result kr;
2108 	vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2109 
2110 	if (ptr & zone_mask) {
2111 		kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2112 		ptr &= ~0x1full;
2113 	} else if (__probable(ptr)) {
2114 		kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2115 		ptr &= ~PAGE_MASK;
2116 		ptr |= zone_mask;
2117 	} else {
2118 		kr.size = 0;
2119 	}
2120 
2121 	kr.addr = (void *)ptr;
2122 	return kr;
2123 }
2124 
2125 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2126 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2127 {
2128 	return (void *)((vm_address_t)ptr | z->z_array_size_class);
2129 }
2130 
2131 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2132 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2133 {
2134 	addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2135 
2136 	return addr | atop(size);
2137 }
2138 
2139 #else /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2140 
2141 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2142 
2143 /*
2144  * Encoding is:
2145  * bits  0..46: pointer value
2146  * bits 47..47: 0: zones, 1: VM
2147  * bits 48..63: zones: elem size, VM: number of pages
2148  */
2149 
2150 #define KALLOC_ARRAY_TYPE_BIT   47
2151 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2152 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2153 
2154 __attribute__((always_inline))
2155 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2156 __kalloc_array_decode(vm_address_t ptr)
2157 {
2158 	struct kalloc_result kr;
2159 	uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2160 
2161 	kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2162 	if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2163 		kr.size <<= PAGE_SHIFT;
2164 	}
2165 	/* sign extend, so that it also works with NULL */
2166 	kr.addr = (void *)((long)(ptr << shift) >> shift);
2167 
2168 	return kr;
2169 }
2170 
2171 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2172 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2173 {
2174 	vm_address_t addr = (vm_address_t)ptr;
2175 
2176 	addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2177 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2178 
2179 	return (void *)addr;
2180 }
2181 
2182 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2183 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2184 {
2185 	addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2186 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2187 
2188 	return addr;
2189 }
2190 
2191 #endif /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2192 
2193 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2194 kalloc_next_good_size(vm_size_t size, uint32_t period)
2195 {
2196 	uint32_t scale = kalloc_log2down((uint32_t)size);
2197 	vm_size_t step, size_class;
2198 
2199 	if (size < KHEAP_STEP_START) {
2200 		return KHEAP_STEP_START;
2201 	}
2202 	if (size < 2 * KHEAP_STEP_START) {
2203 		return 2 * KHEAP_STEP_START;
2204 	}
2205 
2206 	if (size < KHEAP_MAX_SIZE) {
2207 		step = 1ul << (scale - 1);
2208 	} else {
2209 		step = round_page(1ul << (scale - kalloc_log2down(period)));
2210 	}
2211 
2212 	size_class = (size + step) & -step;
2213 #if KASAN_CLASSIC
2214 	if (size > K_SIZE_CLASS(size_class)) {
2215 		return kalloc_next_good_size(size_class, period);
2216 	}
2217 	size_class = K_SIZE_CLASS(size_class);
2218 #endif
2219 	return size_class;
2220 }
2221 
2222 
2223 #pragma mark kalloc
2224 
2225 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_flags_t kt_flags)2226 kalloc_type_get_heap(kalloc_type_flags_t kt_flags)
2227 {
2228 	/*
2229 	 * Redirect data-only views
2230 	 */
2231 	if (kalloc_type_is_data(kt_flags)) {
2232 		return KHEAP_DATA_BUFFERS;
2233 	}
2234 
2235 	if (kt_flags & KT_PROCESSED) {
2236 		return KHEAP_KT_VAR;
2237 	}
2238 
2239 	return KHEAP_DEFAULT;
2240 }
2241 
2242 
2243 __attribute__((noinline))
2244 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2245 kalloc_large(
2246 	kalloc_heap_t         kheap,
2247 	vm_size_t             req_size,
2248 	zalloc_flags_t        flags,
2249 	uint16_t              kt_hash,
2250 	void                 *owner __unused)
2251 {
2252 	kma_flags_t kma_flags = KMA_KASAN_GUARD;
2253 	vm_tag_t tag;
2254 	vm_offset_t addr, size;
2255 
2256 	if (flags & Z_NOFAIL) {
2257 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2258 		    (size_t)req_size);
2259 	}
2260 
2261 	/*
2262 	 * kmem_alloc could block so we return if noblock
2263 	 *
2264 	 * also, reject sizes larger than our address space is quickly,
2265 	 * as kt_size or IOMallocArraySize() expect this.
2266 	 */
2267 	if ((flags & Z_NOWAIT) ||
2268 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2269 		return (struct kalloc_result){ };
2270 	}
2271 
2272 	if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2273 		return (struct kalloc_result){ };
2274 	}
2275 
2276 	/*
2277 	 * (73465472) on Intel we didn't use to pass this flag,
2278 	 * which in turned allowed kalloc_large() memory to be shared
2279 	 * with user directly.
2280 	 *
2281 	 * We're bound by this unfortunate ABI.
2282 	 */
2283 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2284 #ifndef __x86_64__
2285 		kma_flags |= KMA_KOBJECT;
2286 #endif
2287 	} else {
2288 		assert(kheap == KHEAP_DATA_BUFFERS);
2289 	}
2290 	if (flags & Z_NOPAGEWAIT) {
2291 		kma_flags |= KMA_NOPAGEWAIT;
2292 	}
2293 	if (flags & Z_ZERO) {
2294 		kma_flags |= KMA_ZERO;
2295 	}
2296 	if (kheap == KHEAP_DATA_BUFFERS) {
2297 		kma_flags |= KMA_DATA;
2298 	} else if (kheap == KHEAP_DATA_SHARED) {
2299 		kma_flags |= KMA_DATA_SHARED;
2300 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2301 		kma_flags |= KMA_SPRAYQTN;
2302 	}
2303 
2304 
2305 	tag = zalloc_flags_get_tag(flags);
2306 	if (flags & Z_VM_TAG_BT_BIT) {
2307 		tag = vm_tag_bt() ?: tag;
2308 	}
2309 	if (tag == VM_KERN_MEMORY_NONE) {
2310 		tag = kheap->kh_tag;
2311 	}
2312 
2313 	size = round_page(req_size);
2314 	if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2315 		req_size = round_page(size);
2316 	}
2317 
2318 	addr = kmem_alloc_guard(kernel_map, req_size, 0,
2319 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2320 
2321 	if (addr != 0) {
2322 		counter_inc(&kalloc_large_count);
2323 		counter_add(&kalloc_large_total, size);
2324 		KALLOC_ZINFO_SALLOC(size);
2325 		if (flags & Z_KALLOC_ARRAY) {
2326 			addr = __kalloc_array_encode_vm(addr, req_size);
2327 		}
2328 	} else {
2329 		addr = 0;
2330 	}
2331 
2332 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2333 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2334 }
2335 
2336 #if KASAN
2337 
2338 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2339 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2340 {
2341 #if KASAN_CLASSIC
2342 	/*
2343 	 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2344 	 * tagging of the memory region is performed here.
2345 	 */
2346 	kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2347 	    __builtin_frame_address(0));
2348 #endif /* KASAN_CLASSIC */
2349 
2350 #if KASAN_TBI
2351 	kasan_tbi_retag_unused_space(addr, size, used ? :1);
2352 #endif /* KASAN_TBI */
2353 }
2354 #endif /* KASAN */
2355 
2356 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2357 kalloc_zone(
2358 	zone_t                  z,
2359 	zone_stats_t            zstats,
2360 	zalloc_flags_t          flags,
2361 	vm_size_t               req_size)
2362 {
2363 	struct kalloc_result kr;
2364 	vm_size_t esize;
2365 
2366 	kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2367 	esize = kr.size;
2368 
2369 	if (__probable(kr.addr)) {
2370 		if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2371 			req_size = esize;
2372 		} else {
2373 			kr.size = req_size;
2374 		}
2375 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2376 		kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2377 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2378 
2379 #if KASAN
2380 		kalloc_mark_unused_space(kr.addr, esize, kr.size);
2381 #endif /* KASAN */
2382 
2383 		if (flags & Z_KALLOC_ARRAY) {
2384 			kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2385 		}
2386 	}
2387 
2388 	DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2389 	return kr;
2390 }
2391 
2392 static zone_id_t
kalloc_use_early_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2393 kalloc_use_early_heap(
2394 	kalloc_heap_t           kheap,
2395 	zone_stats_t            zstats,
2396 	zone_id_t               zstart,
2397 	zalloc_flags_t         *flags)
2398 {
2399 	if (!zone_is_data_kheap(kheap->kh_heap_id)) {
2400 		zone_stats_t zstats_cpu = zpercpu_get(zstats);
2401 
2402 		if (os_atomic_load(&zstats_cpu->zs_alloc_not_early, relaxed) == 0) {
2403 			*flags |= Z_SET_NOTEARLY;
2404 			return KHEAP_EARLY->kh_zstart;
2405 		}
2406 	}
2407 
2408 	return zstart;
2409 }
2410 
2411 #undef kalloc_ext
2412 
2413 struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2414 kalloc_ext(
2415 	void                   *kheap_or_kt_view,
2416 	vm_size_t               size,
2417 	zalloc_flags_t          flags,
2418 	void                   *owner)
2419 {
2420 	kalloc_type_var_view_t kt_view;
2421 	kalloc_heap_t kheap;
2422 	zone_stats_t zstats = NULL;
2423 	zone_t z;
2424 	uint16_t kt_hash;
2425 	zone_id_t zstart;
2426 
2427 	if (kt_is_var_view(kheap_or_kt_view)) {
2428 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2429 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2430 		/*
2431 		 * Use stats from view if present, else use stats from kheap.
2432 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2433 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2434 		 * use stats from the respective zones.
2435 		 */
2436 		zstats  = kt_view->kt_stats;
2437 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2438 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
2439 	} else {
2440 		kt_view = NULL;
2441 		kheap   = kheap_or_kt_view;
2442 		kt_hash = kheap->kh_type_hash;
2443 		zstart  = kheap->kh_zstart;
2444 	}
2445 
2446 	if (!zstats) {
2447 		zstats = kheap->kh_stats;
2448 	}
2449 
2450 	zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
2451 	z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2452 	if (z) {
2453 		return kalloc_zone(z, zstats, flags, size);
2454 	} else {
2455 		return kalloc_large(kheap, size, flags, kt_hash, owner);
2456 	}
2457 }
2458 
2459 #if XNU_PLATFORM_MacOSX
2460 void *
2461 kalloc_external(vm_size_t size);
2462 void *
kalloc_external(vm_size_t size)2463 kalloc_external(vm_size_t size)
2464 {
2465 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2466 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2467 }
2468 #endif /* XNU_PLATFORM_MacOSX */
2469 
2470 void *
2471 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2472 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2473 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2474 {
2475 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2476 	return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags);
2477 }
2478 
2479 void *
2480 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags);
2481 void *
kalloc_shared_data_external(vm_size_t size,zalloc_flags_t flags)2482 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags)
2483 {
2484 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
2485 	return kheap_alloc(KHEAP_DATA_SHARED, size, flags);
2486 }
2487 
2488 __abortlike
2489 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2490 kalloc_data_require_panic(void *addr, vm_size_t size)
2491 {
2492 	zone_id_t zid = zone_id_for_element(addr, size);
2493 
2494 	if (zid != ZONE_ID_INVALID) {
2495 		zone_t z = &zone_array[zid];
2496 		zone_security_flags_t zsflags = zone_security_array[zid];
2497 
2498 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
2499 			panic("kalloc_data_require failed: address %p in [%s%s]",
2500 			    addr, zone_heap_name(z), zone_name(z));
2501 		}
2502 
2503 		panic("kalloc_data_require failed: address %p in [%s%s], "
2504 		    "size too large %zd > %zd", addr,
2505 		    zone_heap_name(z), zone_name(z),
2506 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2507 	} else {
2508 		panic("kalloc_data_require failed: address %p not in zone native map",
2509 		    addr);
2510 	}
2511 }
2512 
2513 __abortlike
2514 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2515 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2516 {
2517 	zone_id_t zid = zone_id_for_element(addr, size);
2518 
2519 	if (zid != ZONE_ID_INVALID) {
2520 		zone_t z = &zone_array[zid];
2521 		zone_security_flags_t zsflags = zone_security_array[zid];
2522 
2523 		switch (zsflags.z_kheap_id) {
2524 		case KHEAP_ID_NONE:
2525 		case KHEAP_ID_DATA_BUFFERS:
2526 		case KHEAP_ID_DATA_SHARED:
2527 		case KHEAP_ID_KT_VAR:
2528 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2529 			    addr, zone_heap_name(z), zone_name(z));
2530 		default:
2531 			break;
2532 		}
2533 
2534 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2535 		    "size too large %zd > %zd", addr,
2536 		    zone_heap_name(z), zone_name(z),
2537 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2538 	} else {
2539 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2540 		    addr);
2541 	}
2542 }
2543 
2544 void
kalloc_data_require(void * addr,vm_size_t size)2545 kalloc_data_require(void *addr, vm_size_t size)
2546 {
2547 	zone_id_t zid = zone_id_for_element(addr, size);
2548 
2549 	if (zid != ZONE_ID_INVALID) {
2550 		zone_t z = &zone_array[zid];
2551 		zone_security_flags_t zsflags = zone_security_array[zid];
2552 		if (zone_is_data_kheap(zsflags.z_kheap_id) &&
2553 		    size <= zone_elem_inner_size(z)) {
2554 			return;
2555 		}
2556 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2557 	    (vm_address_t)pgz_decode(addr, size), size)) {
2558 		return;
2559 	}
2560 
2561 	kalloc_data_require_panic(addr, size);
2562 }
2563 
2564 void
kalloc_non_data_require(void * addr,vm_size_t size)2565 kalloc_non_data_require(void *addr, vm_size_t size)
2566 {
2567 	zone_id_t zid = zone_id_for_element(addr, size);
2568 
2569 	if (zid != ZONE_ID_INVALID) {
2570 		zone_t z = &zone_array[zid];
2571 		zone_security_flags_t zsflags = zone_security_array[zid];
2572 		switch (zsflags.z_kheap_id) {
2573 		case KHEAP_ID_NONE:
2574 			if (!zsflags.z_kalloc_type) {
2575 				break;
2576 			}
2577 			OS_FALLTHROUGH;
2578 		case KHEAP_ID_KT_VAR:
2579 			if (size < zone_elem_inner_size(z)) {
2580 				return;
2581 			}
2582 			break;
2583 		default:
2584 			break;
2585 		}
2586 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2587 	    (vm_address_t)pgz_decode(addr, size), size)) {
2588 		return;
2589 	}
2590 
2591 	kalloc_non_data_require_panic(addr, size);
2592 }
2593 
2594 void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2595 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2596 {
2597 	/*
2598 	 * Callsites from a kext that aren't in the BootKC on macOS or
2599 	 * any callsites on armv7 are not processed during startup,
2600 	 * default to using kheap_alloc
2601 	 *
2602 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2603 	 * NULL as we need to use the vm for the allocation
2604 	 *
2605 	 */
2606 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2607 		kalloc_heap_t kheap;
2608 		vm_size_t size;
2609 
2610 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2611 		size  = kalloc_type_get_size(kt_view->kt_size);
2612 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
2613 		return kalloc_ext(kheap, size, flags, NULL).addr;
2614 	}
2615 
2616 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2617 	return kalloc_type_impl(kt_view, flags);
2618 }
2619 
2620 void *
2621 kalloc_type_var_impl_external(
2622 	kalloc_type_var_view_t  kt_view,
2623 	vm_size_t               size,
2624 	zalloc_flags_t          flags,
2625 	void                   *owner);
2626 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2627 kalloc_type_var_impl_external(
2628 	kalloc_type_var_view_t  kt_view,
2629 	vm_size_t               size,
2630 	zalloc_flags_t          flags,
2631 	void                   *owner)
2632 {
2633 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2634 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2635 }
2636 
2637 #pragma mark kfree
2638 
2639 __abortlike
2640 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2641 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2642 {
2643 	zone_security_flags_t zsflags = zone_security_config(z);
2644 	const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2645 
2646 	if (zsflags.z_kalloc_type) {
2647 		panic_include_kalloc_types = true;
2648 		kalloc_type_src_zone = z;
2649 		panic("kfree: addr %p found in kalloc type zone '%s'"
2650 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2651 	}
2652 
2653 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2654 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2655 		    data, size, zone_heap_name(z), z->z_name);
2656 	} else {
2657 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2658 		    data, size, zone_heap_name(z), kheap_name);
2659 	}
2660 }
2661 
2662 __abortlike
2663 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2664 kfree_size_confusion_panic(zone_t z, void *data,
2665     size_t oob_offs, size_t size, size_t zsize)
2666 {
2667 	if (z) {
2668 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2669 		    "with elem_size %zd",
2670 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2671 	} else {
2672 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2673 		    data, size, oob_offs);
2674 	}
2675 }
2676 
2677 __abortlike
2678 static void
kfree_size_invalid_panic(void * data,size_t size)2679 kfree_size_invalid_panic(void *data, size_t size)
2680 {
2681 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2682 	    data, size);
2683 }
2684 
2685 __abortlike
2686 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2687 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2688     size_t max_size)
2689 {
2690 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2691 	    data, size, min_size, max_size);
2692 }
2693 
2694 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2695 kfree_size_require(
2696 	kalloc_heap_t kheap,
2697 	void *addr,
2698 	vm_size_t min_size,
2699 	vm_size_t max_size)
2700 {
2701 	assert3u(min_size, <=, max_size);
2702 	zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2703 	vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2704 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2705 	if (elem_size > max_zone_size || elem_size < min_size) {
2706 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2707 	}
2708 }
2709 
2710 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2711 kfree_large(
2712 	vm_offset_t             addr,
2713 	vm_size_t               size,
2714 	kmf_flags_t             flags,
2715 	void                   *owner)
2716 {
2717 	size = kmem_free_guard(kernel_map, addr, size,
2718 	    flags | KMF_TAG | KMF_KASAN_GUARD,
2719 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2720 
2721 	counter_dec(&kalloc_large_count);
2722 	counter_add(&kalloc_large_total, -(uint64_t)size);
2723 	KALLOC_ZINFO_SFREE(size);
2724 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2725 }
2726 
2727 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2728 kfree_zone(
2729 	void                   *kheap_or_kt_view __unsafe_indexable,
2730 	void                   *data,
2731 	vm_size_t               size,
2732 	zone_t                  z,
2733 	vm_size_t               zsize)
2734 {
2735 	zone_security_flags_t zsflags = zone_security_config(z);
2736 	kalloc_type_var_view_t kt_view;
2737 	kalloc_heap_t kheap;
2738 	zone_stats_t zstats = NULL;
2739 
2740 	if (kt_is_var_view(kheap_or_kt_view)) {
2741 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2742 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2743 		/*
2744 		 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2745 		 * we will end up having incorrect stats. Cross frees may happen on
2746 		 * macOS due to allocation from an unprocessed view and free from
2747 		 * a processed view or vice versa.
2748 		 */
2749 		zstats  = kt_view->kt_stats;
2750 	} else {
2751 		kt_view = NULL;
2752 		kheap   = kheap_or_kt_view;
2753 	}
2754 
2755 	if (!zstats) {
2756 		zstats = kheap->kh_stats;
2757 	}
2758 
2759 	zsflags = zone_security_config(z);
2760 	if (kheap == KHEAP_DATA_BUFFERS) {
2761 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2762 			kfree_heap_confusion_panic(kheap, data, size, z);
2763 		}
2764 	} else {
2765 		if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2766 		    (zsflags.z_kheap_id != KHEAP_ID_EARLY)) {
2767 			kfree_heap_confusion_panic(kheap, data, size, z);
2768 		}
2769 	}
2770 
2771 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2772 
2773 	/* needs to be __nosan because the user size might be partial */
2774 	__nosan_bzero(data, zsize);
2775 	zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2776 }
2777 
2778 void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2779 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2780 {
2781 	vm_size_t bucket_size;
2782 	zone_t z;
2783 
2784 	if (data == NULL) {
2785 		return;
2786 	}
2787 
2788 	if (size > KFREE_ABSURD_SIZE) {
2789 		kfree_size_invalid_panic(data, size);
2790 	}
2791 
2792 	if (size <= KHEAP_MAX_SIZE) {
2793 		vm_size_t oob_offs;
2794 
2795 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2796 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2797 			kfree_size_confusion_panic(z, data,
2798 			    oob_offs, size, bucket_size);
2799 		}
2800 
2801 		data = (char *)data - oob_offs;
2802 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2803 	} else {
2804 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2805 	}
2806 }
2807 
2808 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2809 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2810 {
2811 	vm_offset_t oob_offs;
2812 	vm_size_t size, usize = 0;
2813 	zone_t z;
2814 
2815 	if (data == NULL) {
2816 		return;
2817 	}
2818 
2819 	size = zone_element_size(data, &z, true, &oob_offs);
2820 	if (size) {
2821 #if KASAN_CLASSIC
2822 		usize = kasan_user_size((vm_offset_t)data);
2823 #endif
2824 		data = (char *)data - oob_offs;
2825 		kfree_zone(kheap, data, usize, z, size);
2826 	} else {
2827 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2828 	}
2829 }
2830 
2831 #if XNU_PLATFORM_MacOSX
2832 void
2833 kfree_external(void *addr, vm_size_t size);
2834 void
kfree_external(void * addr,vm_size_t size)2835 kfree_external(void *addr, vm_size_t size)
2836 {
2837 	kalloc_heap_t kheap = KHEAP_DEFAULT;
2838 
2839 	kfree_ext(kheap, addr, size);
2840 }
2841 #endif /* XNU_PLATFORM_MacOSX */
2842 
2843 void
2844 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2845     vm_size_t min_sz, vm_size_t max_sz)
2846 {
2847 	if (__improbable(addr == NULL)) {
2848 		return;
2849 	}
2850 	kfree_size_require(kheap, addr, min_sz, max_sz);
2851 	kfree_addr_ext(kheap, addr);
2852 }
2853 
2854 void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)2855 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2856 {
2857 	zone_stats_t zs = kt_view->kt_zv.zv_stats;
2858 	zone_t       z  = kt_view->kt_zv.zv_zone;
2859 	zone_stats_t zs_cpu = zpercpu_get(zs);
2860 
2861 	if ((flags & Z_SET_NOTEARLY) ||
2862 	    os_atomic_load(&zs_cpu->zs_alloc_not_early, relaxed)) {
2863 		return zalloc_ext(z, zs, flags).addr;
2864 	}
2865 
2866 	assert(!zone_is_data_kheap(zone_security_config(z).z_kheap_id));
2867 	return zalloc_ext(kt_view->kt_zearly, zs, flags | Z_SET_NOTEARLY).addr;
2868 }
2869 
2870 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)2871 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
2872 {
2873 	/*
2874 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
2875 	 * processed during startup so default to using kheap_alloc
2876 	 *
2877 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2878 	 * NULL as we need to use the vm for the allocation/free
2879 	 */
2880 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2881 		kalloc_heap_t kheap;
2882 		vm_size_t size;
2883 
2884 		size  = kalloc_type_get_size(kt_view->kt_size);
2885 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
2886 		return kheap_free(kheap, ptr, size);
2887 	}
2888 	return kfree_type_impl(kt_view, ptr);
2889 }
2890 
2891 void
2892 kfree_type_var_impl_external(
2893 	kalloc_type_var_view_t  kt_view,
2894 	void                   *ptr,
2895 	vm_size_t               size);
2896 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)2897 kfree_type_var_impl_external(
2898 	kalloc_type_var_view_t  kt_view,
2899 	void                   *ptr,
2900 	vm_size_t               size)
2901 {
2902 	return kfree_type_var_impl(kt_view, ptr, size);
2903 }
2904 
2905 void
2906 kfree_data_external(void *ptr, vm_size_t size);
2907 void
kfree_data_external(void * ptr,vm_size_t size)2908 kfree_data_external(void *ptr, vm_size_t size)
2909 {
2910 	return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2911 }
2912 
2913 void
2914 kfree_data_addr_external(void *ptr);
2915 void
kfree_data_addr_external(void * ptr)2916 kfree_data_addr_external(void *ptr)
2917 {
2918 	return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2919 }
2920 
2921 void
2922 kfree_shared_data_external(void *ptr, vm_size_t size);
2923 void
kfree_shared_data_external(void * ptr,vm_size_t size)2924 kfree_shared_data_external(void *ptr, vm_size_t size)
2925 {
2926 	return kheap_free(KHEAP_DATA_SHARED, ptr, size);
2927 }
2928 
2929 void
2930 kfree_shared_data_addr_external(void *ptr);
2931 void
kfree_shared_data_addr_external(void * ptr)2932 kfree_shared_data_addr_external(void *ptr)
2933 {
2934 	return kheap_free_addr(KHEAP_DATA_SHARED, ptr);
2935 }
2936 
2937 #pragma mark krealloc
2938 
2939 __abortlike
2940 static void
krealloc_size_invalid_panic(void * data,size_t size)2941 krealloc_size_invalid_panic(void *data, size_t size)
2942 {
2943 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
2944 	    data, size);
2945 }
2946 
2947 
2948 __attribute__((noinline))
2949 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2950 krealloc_large(
2951 	kalloc_heap_t         kheap,
2952 	vm_offset_t           addr,
2953 	vm_size_t             old_size,
2954 	vm_size_t             new_size,
2955 	zalloc_flags_t        flags,
2956 	uint16_t              kt_hash,
2957 	void                 *owner __unused)
2958 {
2959 	kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_KASAN_GUARD;
2960 	vm_size_t new_req_size = new_size;
2961 	vm_size_t old_req_size = old_size;
2962 	uint64_t delta;
2963 	kmem_return_t kmr;
2964 	vm_tag_t tag;
2965 
2966 	if (flags & Z_NOFAIL) {
2967 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2968 		    (size_t)new_req_size);
2969 	}
2970 
2971 	/*
2972 	 * kmem_alloc could block so we return if noblock
2973 	 *
2974 	 * also, reject sizes larger than our address space is quickly,
2975 	 * as kt_size or IOMallocArraySize() expect this.
2976 	 */
2977 	if ((flags & Z_NOWAIT) ||
2978 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2979 		return (struct kalloc_result){ };
2980 	}
2981 
2982 	/*
2983 	 * (73465472) on Intel we didn't use to pass this flag,
2984 	 * which in turned allowed kalloc_large() memory to be shared
2985 	 * with user directly.
2986 	 *
2987 	 * We're bound by this unfortunate ABI.
2988 	 */
2989 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2990 #ifndef __x86_64__
2991 		kmr_flags |= KMR_KOBJECT;
2992 #endif
2993 	} else {
2994 		assert(kheap == KHEAP_DATA_BUFFERS);
2995 	}
2996 	if (flags & Z_NOPAGEWAIT) {
2997 		kmr_flags |= KMR_NOPAGEWAIT;
2998 	}
2999 	if (flags & Z_ZERO) {
3000 		kmr_flags |= KMR_ZERO;
3001 	}
3002 	if (kheap == KHEAP_DATA_BUFFERS) {
3003 		kmr_flags |= KMR_DATA;
3004 	} else if (kheap == KHEAP_DATA_SHARED) {
3005 		kmr_flags |= KMR_DATA_SHARED;
3006 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
3007 		kmr_flags |= KMR_SPRAYQTN;
3008 	}
3009 	if (flags & Z_REALLOCF) {
3010 		kmr_flags |= KMR_REALLOCF;
3011 	}
3012 
3013 #if ZSECURITY_CONFIG(ZONE_TAGGING)
3014 	krealloc_enforce_large_tagging_policy(&kmr_flags, kheap);
3015 #endif /* ZSECURITY_CONFIG(ZONE_TAGGING) */
3016 
3017 	tag = zalloc_flags_get_tag(flags);
3018 	if (flags & Z_VM_TAG_BT_BIT) {
3019 		tag = vm_tag_bt() ?: tag;
3020 	}
3021 	if (tag == VM_KERN_MEMORY_NONE) {
3022 		tag = kheap->kh_tag;
3023 	}
3024 
3025 	kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
3026 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
3027 
3028 	new_size = round_page(new_req_size);
3029 	old_size = round_page(old_req_size);
3030 
3031 	if (kmr.kmr_address != 0) {
3032 		delta = (uint64_t)(new_size - old_size);
3033 	} else if (flags & Z_REALLOCF) {
3034 		counter_dec(&kalloc_large_count);
3035 		delta = (uint64_t)(-old_size);
3036 	} else {
3037 		delta = 0;
3038 	}
3039 
3040 	counter_add(&kalloc_large_total, delta);
3041 	KALLOC_ZINFO_SALLOC(delta);
3042 
3043 	if (addr != 0 || (flags & Z_REALLOCF)) {
3044 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
3045 		    void*, addr);
3046 	}
3047 	if (__improbable(kmr.kmr_address == 0)) {
3048 		return (struct kalloc_result){ };
3049 	}
3050 
3051 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
3052 	    void*, kmr.kmr_address);
3053 
3054 	if (flags & Z_KALLOC_ARRAY) {
3055 		kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
3056 		    new_req_size);
3057 	}
3058 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
3059 }
3060 
3061 #undef krealloc_ext
3062 
3063 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3064 krealloc_ext(
3065 	void                 *kheap_or_kt_view __unsafe_indexable,
3066 	void                 *addr,
3067 	vm_size_t             old_size,
3068 	vm_size_t             new_size,
3069 	zalloc_flags_t        flags,
3070 	void                 *owner)
3071 {
3072 	vm_size_t old_bucket_size, new_bucket_size, min_size;
3073 	kalloc_type_var_view_t kt_view;
3074 	kalloc_heap_t kheap;
3075 	zone_stats_t zstats = NULL;
3076 	struct kalloc_result kr;
3077 	vm_offset_t oob_offs = 0;
3078 	zone_t old_z, new_z;
3079 	uint16_t kt_hash = 0;
3080 	zone_id_t zstart;
3081 
3082 	if (old_size > KFREE_ABSURD_SIZE) {
3083 		krealloc_size_invalid_panic(addr, old_size);
3084 	}
3085 
3086 	if (addr == NULL && new_size == 0) {
3087 		return (struct kalloc_result){ };
3088 	}
3089 
3090 	if (kt_is_var_view(kheap_or_kt_view)) {
3091 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
3092 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
3093 		/*
3094 		 * Similar to kalloc_ext: Use stats from view if present,
3095 		 * else use stats from kheap.
3096 		 *
3097 		 * krealloc_type isn't exposed to kexts, so we don't need to
3098 		 * handle cross frees and can rely on stats from view or kheap.
3099 		 */
3100 		zstats  = kt_view->kt_stats;
3101 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
3102 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
3103 	} else {
3104 		kt_view = NULL;
3105 		kheap   = kheap_or_kt_view;
3106 		kt_hash = kheap->kh_type_hash;
3107 		zstart  = kheap->kh_zstart;
3108 	}
3109 
3110 	if (!zstats) {
3111 		zstats = kheap->kh_stats;
3112 	}
3113 	/*
3114 	 * Find out the size of the bucket in which the new sized allocation
3115 	 * would land. If it matches the bucket of the original allocation,
3116 	 * simply return the same address.
3117 	 */
3118 	if (new_size == 0) {
3119 		new_z = ZONE_NULL;
3120 		new_bucket_size = new_size = 0;
3121 	} else {
3122 		zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
3123 		new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3124 		new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3125 	}
3126 #if !KASAN_CLASSIC
3127 	if (flags & Z_FULLSIZE) {
3128 		new_size = new_bucket_size;
3129 	}
3130 #endif /* !KASAN_CLASSIC */
3131 
3132 	if (addr == NULL) {
3133 		old_z = ZONE_NULL;
3134 		old_size = old_bucket_size = 0;
3135 	} else if (kheap_size_from_zone(addr, old_size, flags)) {
3136 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3137 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3138 			kfree_size_confusion_panic(old_z, addr,
3139 			    oob_offs, old_size, old_bucket_size);
3140 		}
3141 		__builtin_assume(old_z != ZONE_NULL);
3142 	} else {
3143 		old_z = ZONE_NULL;
3144 		old_bucket_size = round_page(old_size);
3145 	}
3146 	min_size = MIN(old_size, new_size);
3147 
3148 	if (old_bucket_size == new_bucket_size && old_z) {
3149 		kr.addr = (char *)addr - oob_offs;
3150 		kr.size = new_size;
3151 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3152 		kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3153 		    new_size, new_bucket_size);
3154 		if (kr.addr != addr) {
3155 			memmove(kr.addr, addr, min_size);
3156 			bzero((char *)kr.addr + min_size,
3157 			    kr.size - min_size);
3158 		}
3159 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3160 #if KASAN
3161 		/*
3162 		 * On KASAN kernels, treat a reallocation effectively as a new
3163 		 * allocation and add a sanity check around the existing one
3164 		 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3165 		 * to much extra work, on KASAN_TBI, assign a new tag both to the
3166 		 * buffer and to the potential free space.
3167 		 */
3168 #if KASAN_CLASSIC
3169 		kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3170 		kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3171 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3172 #endif /* KASAN_CLASSIC */
3173 #if KASAN_TBI
3174 		/*
3175 		 * Validate the current buffer, then generate a new tag,
3176 		 * even if the address is stable, it's a "new" allocation.
3177 		 */
3178 		__asan_loadN((vm_offset_t)addr, old_size);
3179 		kr.addr = vm_memtag_generate_and_store_tag(kr.addr, kr.size);
3180 		kasan_tbi_retag_unused_space(kr.addr, new_bucket_size, kr.size);
3181 #endif /* KASAN_TBI */
3182 #endif /* KASAN */
3183 		goto out_success;
3184 	}
3185 
3186 #if !KASAN
3187 	/*
3188 	 * Fallthrough to krealloc_large() for KASAN,
3189 	 * because we can't use kasan_check_alloc()
3190 	 * on kalloc_large() memory.
3191 	 *
3192 	 * kmem_realloc_guard() will perform all the validations,
3193 	 * and re-tagging.
3194 	 */
3195 	if (old_bucket_size == new_bucket_size) {
3196 		kr.addr = (char *)addr - oob_offs;
3197 		kr.size = new_size;
3198 		goto out_success;
3199 	}
3200 #endif
3201 
3202 	if (addr && !old_z && new_size && !new_z) {
3203 		return krealloc_large(kheap, (vm_offset_t)addr,
3204 		           old_size, new_size, flags, kt_hash, owner);
3205 	}
3206 
3207 	if (!new_size) {
3208 		kr.addr = NULL;
3209 		kr.size = 0;
3210 	} else if (new_z) {
3211 		kr = kalloc_zone(new_z, zstats,
3212 		    flags & ~Z_KALLOC_ARRAY, new_size);
3213 	} else if (old_z || addr == NULL) {
3214 		kr = kalloc_large(kheap, new_size,
3215 		    flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3216 	}
3217 
3218 	if (addr && kr.addr) {
3219 		__nosan_memcpy(kr.addr, addr, min_size);
3220 	}
3221 
3222 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3223 		if (old_z) {
3224 			kfree_zone(kheap_or_kt_view,
3225 			    (char *)addr - oob_offs, old_size,
3226 			    old_z, old_bucket_size);
3227 		} else {
3228 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3229 		}
3230 	}
3231 
3232 	if (__improbable(kr.addr == NULL)) {
3233 		return kr;
3234 	}
3235 
3236 out_success:
3237 	if ((flags & Z_KALLOC_ARRAY) == 0) {
3238 		return kr;
3239 	}
3240 
3241 	if (new_z) {
3242 		kr.addr = __kalloc_array_encode_zone(new_z,
3243 		    kr.addr, kr.size);
3244 	} else {
3245 		kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3246 		    kr.size);
3247 	}
3248 	return kr;
3249 }
3250 
3251 void *
3252 krealloc_data_external(
3253 	void               *ptr,
3254 	vm_size_t           old_size,
3255 	vm_size_t           new_size,
3256 	zalloc_flags_t      flags);
3257 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3258 krealloc_data_external(
3259 	void               *ptr,
3260 	vm_size_t           old_size,
3261 	vm_size_t           new_size,
3262 	zalloc_flags_t      flags)
3263 {
3264 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3265 	return krealloc_ext(KHEAP_DATA_BUFFERS, ptr, old_size, new_size, flags, NULL).addr;
3266 }
3267 
3268 void *
3269 krealloc_shared_data_external(
3270 	void               *ptr,
3271 	vm_size_t           old_size,
3272 	vm_size_t           new_size,
3273 	zalloc_flags_t      flags);
3274 void *
krealloc_shared_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3275 krealloc_shared_data_external(
3276 	void               *ptr,
3277 	vm_size_t           old_size,
3278 	vm_size_t           new_size,
3279 	zalloc_flags_t      flags)
3280 {
3281 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
3282 	return krealloc_ext(KHEAP_DATA_SHARED, ptr, old_size, new_size, flags, NULL).addr;
3283 }
3284 
3285 __startup_func
3286 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3287 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3288 {
3289 	kheap->kh_zstart      = parent_heap->kh_zstart;
3290 	kheap->kh_heap_id     = parent_heap->kh_heap_id;
3291 	kheap->kh_tag         = parent_heap->kh_tag;
3292 	kheap->kh_stats       = zalloc_percpu_permanent_type(struct zone_stats);
3293 	zone_view_count += 1;
3294 }
3295 
3296 __startup_func
3297 static void
kheap_init_data(kalloc_heap_t kheap)3298 kheap_init_data(kalloc_heap_t kheap)
3299 {
3300 	kheap_init(KHEAP_DATA_BUFFERS, kheap);
3301 	kheap->kh_views               = KHEAP_DATA_BUFFERS->kh_views;
3302 	KHEAP_DATA_BUFFERS->kh_views  = kheap;
3303 }
3304 
3305 __startup_func
3306 static void
kheap_init_data_shared(kalloc_heap_t kheap)3307 kheap_init_data_shared(kalloc_heap_t kheap)
3308 {
3309 	kheap_init(KHEAP_DATA_SHARED, kheap);
3310 	kheap->kh_views               = KHEAP_DATA_SHARED->kh_views;
3311 	KHEAP_DATA_SHARED->kh_views   = kheap;
3312 }
3313 
3314 __startup_func
3315 static void
kheap_init_var(kalloc_heap_t kheap)3316 kheap_init_var(kalloc_heap_t kheap)
3317 {
3318 	uint16_t idx;
3319 	struct kheap_info *parent_heap;
3320 
3321 	kheap_init(KHEAP_KT_VAR, kheap);
3322 	idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3323 	    KT_VAR__FIRST_FLEXIBLE_HEAP;
3324 	parent_heap = &kalloc_type_heap_array[idx];
3325 	kheap->kh_zstart = parent_heap->kh_zstart;
3326 	kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3327 		(uint32_t) early_random(), 0);
3328 	kheap->kh_views       = parent_heap->kh_views;
3329 	parent_heap->kh_views = kheap;
3330 }
3331 
3332 __startup_func
3333 void
kheap_startup_init(kalloc_heap_t kheap)3334 kheap_startup_init(kalloc_heap_t kheap)
3335 {
3336 	switch (kheap->kh_heap_id) {
3337 	case KHEAP_ID_DATA_BUFFERS:
3338 		kheap_init_data(kheap);
3339 		break;
3340 	case KHEAP_ID_DATA_SHARED:
3341 		kheap_init_data_shared(kheap);
3342 		break;
3343 	case KHEAP_ID_KT_VAR:
3344 		kheap_init_var(kheap);
3345 		break;
3346 	default:
3347 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3348 		    kheap->kh_heap_id);
3349 	}
3350 }
3351 
3352 #pragma mark IOKit/libkern helpers
3353 
3354 #if XNU_PLATFORM_MacOSX
3355 
3356 void *
3357 kern_os_malloc_external(size_t size);
3358 void *
kern_os_malloc_external(size_t size)3359 kern_os_malloc_external(size_t size)
3360 {
3361 	if (size == 0) {
3362 		return NULL;
3363 	}
3364 
3365 	return kheap_alloc(KERN_OS_MALLOC, size,
3366 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3367 }
3368 
3369 void
3370 kern_os_free_external(void *addr);
3371 void
kern_os_free_external(void * addr)3372 kern_os_free_external(void *addr)
3373 {
3374 	kheap_free_addr(KERN_OS_MALLOC, addr);
3375 }
3376 
3377 void *
3378 kern_os_realloc_external(void *addr, size_t nsize);
3379 void *
kern_os_realloc_external(void * addr,size_t nsize)3380 kern_os_realloc_external(void *addr, size_t nsize)
3381 {
3382 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3383 	vm_size_t osize, oob_offs = 0;
3384 
3385 	if (addr == NULL) {
3386 		return kern_os_malloc_external(nsize);
3387 	}
3388 
3389 	osize = zone_element_size(addr, NULL, false, &oob_offs);
3390 	if (osize == 0) {
3391 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3392 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3393 #if KASAN_CLASSIC
3394 	} else {
3395 		osize = kasan_user_size((vm_offset_t)addr);
3396 #endif
3397 	}
3398 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3399 }
3400 
3401 #endif /* XNU_PLATFORM_MacOSX */
3402 
3403 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3404 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3405 {
3406 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3407 #pragma unused(size)
3408 	zfree(zone, addr);
3409 #else
3410 	if (zone_owns(zone, addr)) {
3411 		zfree(zone, addr);
3412 	} else {
3413 		/*
3414 		 * Third party kexts might not know about the operator new
3415 		 * and be allocated from the default heap
3416 		 */
3417 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3418 		    zone->z_name);
3419 		kheap_free(KHEAP_DEFAULT, addr, size);
3420 	}
3421 #endif
3422 }
3423 
3424 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3425 IOMallocType_from_vm(kalloc_type_view_t ktv)
3426 {
3427 	return kalloc_type_from_vm(ktv->kt_flags);
3428 }
3429 
3430 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3431 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3432 {
3433 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3434 #pragma unused(esize)
3435 #else
3436 	/*
3437 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3438 	 * an allocation of an OSObject that is defined in xnu or first pary
3439 	 * kexts, by directly calling new will lead to using the default heap
3440 	 * as it will call OSObject_operator_new_external. If this object
3441 	 * is freed by xnu, it panics as xnu uses the typed free which
3442 	 * requires the object to have been allocated in a kalloc.type zone.
3443 	 * To workaround this issue, detect if the allocation being freed is
3444 	 * from the default heap and allow freeing to it.
3445 	 */
3446 	zone_id_t zid = zone_id_for_element(addr, esize);
3447 	if (__probable(zid < MAX_ZONES)) {
3448 		zone_security_flags_t zsflags = zone_security_array[zid];
3449 		if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3450 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3451 		}
3452 	}
3453 #endif
3454 	kfree_type_impl_external(ktv, addr);
3455 }
3456 
3457 #pragma mark tests
3458 #if DEBUG || DEVELOPMENT
3459 
3460 #include <sys/random.h>
3461 
3462 /*
3463  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3464  *
3465  * Note: Presence of zones with name kalloc.type* is used to
3466  * determine if the feature is on.
3467  */
3468 static int
kalloc_type_feature_on(void)3469 kalloc_type_feature_on(void)
3470 {
3471 	boolean_t zone_found = false;
3472 	const char kalloc_type_str[] = "kalloc.type";
3473 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3474 		zone_t z = kalloc_type_zarray[i];
3475 		while (z != NULL) {
3476 			zone_found = true;
3477 			if (strncmp(z->z_name, kalloc_type_str,
3478 			    strlen(kalloc_type_str)) != 0) {
3479 				return 0;
3480 			}
3481 			z = z->z_kt_next;
3482 		}
3483 	}
3484 
3485 	if (!zone_found) {
3486 		return 0;
3487 	}
3488 
3489 	return 1;
3490 }
3491 
3492 /*
3493  * Ensure that the policy uses the zone budget completely
3494  */
3495 static int
kalloc_type_test_policy(int64_t in)3496 kalloc_type_test_policy(int64_t in)
3497 {
3498 	uint16_t zone_budget = (uint16_t) in;
3499 	uint16_t max_bucket_freq = 25;
3500 	uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3501 	uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3502 	uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3503 	uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3504 	uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3505 	uint16_t wasted_zone_budget = 0, total_types = 0;
3506 	uint16_t n_zones = 0, n_zones_cal = 0;
3507 	int ret = 0;
3508 
3509 	/*
3510 	 * Need a minimum of 2 zones per size class
3511 	 */
3512 	if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3513 		return ret;
3514 	}
3515 	read_random((void *)&random[0], sizeof(random));
3516 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3517 		uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3518 		uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3519 
3520 		freq_list[i] = r1 > r2 ? r2 : r1;
3521 		freq_total_list[i] = r1 > r2 ? r1 : r2;
3522 	}
3523 	wasted_zone_budget = kalloc_type_apply_policy(
3524 		freq_list, freq_total_list,
3525 		zones_per_sig, zones_per_type, zone_budget);
3526 
3527 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3528 		total_types += freq_total_list[i];
3529 	}
3530 
3531 	n_zones = kmem_get_random16(total_types);
3532 	printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3533 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3534 		uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3535 		    freq_total_list[i], total_types,
3536 		    (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3537 
3538 		n_zones_cal += n_zones_for_type;
3539 
3540 		printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3541 	}
3542 	printf("-----------------------\n%u\t%u\n", total_types,
3543 	    n_zones_cal);
3544 
3545 	if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3546 		ret = 1;
3547 	}
3548 	return ret;
3549 }
3550 
3551 /*
3552  * Ensure that size of adopters of kalloc_type fit in the zone
3553  * they have been assigned.
3554  */
3555 static int
kalloc_type_check_size(zone_t z)3556 kalloc_type_check_size(zone_t z)
3557 {
3558 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3559 
3560 	while (kt_cur != NULL) {
3561 		if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3562 			return 0;
3563 		}
3564 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3565 	}
3566 
3567 	return 1;
3568 }
3569 
3570 struct test_kt_data {
3571 	int a;
3572 };
3573 
3574 static int
kalloc_type_test_data_redirect(void)3575 kalloc_type_test_data_redirect(void)
3576 {
3577 	struct kalloc_type_view ktv_data = {
3578 		.kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3579 		.kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3580 	};
3581 	if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3582 		printf("%s: data redirect failed\n", __func__);
3583 		return 0;
3584 	}
3585 	return 1;
3586 }
3587 
3588 static int
run_kalloc_type_test(int64_t in,int64_t * out)3589 run_kalloc_type_test(int64_t in, int64_t *out)
3590 {
3591 	*out = 0;
3592 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3593 		zone_t z = kalloc_type_zarray[i];
3594 		while (z != NULL) {
3595 			if (!kalloc_type_check_size(z)) {
3596 				printf("%s: size check failed\n", __func__);
3597 				return 0;
3598 			}
3599 			z = z->z_kt_next;
3600 		}
3601 	}
3602 
3603 	if (!kalloc_type_test_policy(in)) {
3604 		printf("%s: policy check failed\n", __func__);
3605 		return 0;
3606 	}
3607 
3608 	if (!kalloc_type_feature_on()) {
3609 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3610 		return 0;
3611 	}
3612 
3613 	if (!kalloc_type_test_data_redirect()) {
3614 		printf("%s: kalloc_type redirect for all data signature failed\n",
3615 		    __func__);
3616 		return 0;
3617 	}
3618 
3619 	printf("%s: test passed\n", __func__);
3620 
3621 	*out = 1;
3622 	return 0;
3623 }
3624 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3625 
3626 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3627 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3628 {
3629 	zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3630 
3631 	return z ? zone_elem_inner_size(z) : round_page(size);
3632 }
3633 
3634 static int
run_kalloc_test_kheap(kalloc_heap_t kheap)3635 run_kalloc_test_kheap(kalloc_heap_t kheap)
3636 {
3637 	uint64_t *data_ptr;
3638 	void *strippedp_old, *strippedp_new;
3639 	size_t alloc_size = 0, old_alloc_size = 0;
3640 	struct kalloc_result kr = {};
3641 
3642 	printf("%s: %s test running\n", __func__, kheap->kh_name);
3643 
3644 	/*
3645 	 * Test size 0: alloc, free, realloc
3646 	 */
3647 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3648 	    NULL).addr;
3649 	if (!data_ptr) {
3650 		printf("%s: kalloc 0 returned null\n", __func__);
3651 		return 1;
3652 	}
3653 	kheap_free(kheap, data_ptr, alloc_size);
3654 
3655 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3656 	    NULL).addr;
3657 	alloc_size = sizeof(uint64_t) + 1;
3658 	data_ptr = krealloc_ext(kheap, kr.addr, old_alloc_size,
3659 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3660 	if (!data_ptr) {
3661 		printf("%s: krealloc -> old size 0 failed\n", __func__);
3662 		return 1;
3663 	}
3664 	*data_ptr = 0;
3665 
3666 	/*
3667 	 * Test krealloc: same sizeclass, different size classes, 2pgs,
3668 	 * VM (with owner)
3669 	 */
3670 	old_alloc_size = alloc_size;
3671 	alloc_size++;
3672 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3673 	    Z_WAITOK | Z_NOFAIL, NULL);
3674 
3675 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3676 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3677 
3678 	if (!kr.addr || (strippedp_old != strippedp_new) ||
3679 	    (test_bucket_size(kheap, kr.size) !=
3680 	    test_bucket_size(kheap, old_alloc_size))) {
3681 		printf("%s: krealloc -> same size class failed\n", __func__);
3682 		return 1;
3683 	}
3684 	data_ptr = kr.addr;
3685 	*data_ptr = 0;
3686 
3687 	old_alloc_size = alloc_size;
3688 	alloc_size *= 2;
3689 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3690 	    Z_WAITOK | Z_NOFAIL, NULL);
3691 
3692 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3693 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3694 
3695 	if (!kr.addr || (strippedp_old == strippedp_new) ||
3696 	    (test_bucket_size(kheap, kr.size) ==
3697 	    test_bucket_size(kheap, old_alloc_size))) {
3698 		printf("%s: krealloc -> different size class failed\n", __func__);
3699 		return 1;
3700 	}
3701 	data_ptr = kr.addr;
3702 	*data_ptr = 0;
3703 
3704 	kheap_free(kheap, kr.addr, alloc_size);
3705 
3706 	alloc_size = 3544;
3707 	data_ptr = kalloc_ext(kheap, alloc_size,
3708 	    Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3709 	if (!data_ptr) {
3710 		printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3711 		    __func__);
3712 		return 1;
3713 	}
3714 	*data_ptr = 0;
3715 
3716 	data_ptr = krealloc_ext(kheap, data_ptr, alloc_size,
3717 	    PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3718 	if (!data_ptr) {
3719 		printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3720 		return 1;
3721 	}
3722 	*data_ptr = 0;
3723 
3724 	data_ptr = krealloc_ext(kheap, data_ptr, PAGE_SIZE * 2,
3725 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3726 	if (!data_ptr) {
3727 		printf("%s: krealloc -> VM1 returned not null\n", __func__);
3728 		return 1;
3729 	}
3730 	*data_ptr = 0;
3731 
3732 	data_ptr = krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 2,
3733 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3734 	*data_ptr = 0;
3735 	if (!data_ptr) {
3736 		printf("%s: krealloc -> VM2 returned not null\n", __func__);
3737 		return 1;
3738 	}
3739 
3740 	krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 4,
3741 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3742 
3743 	printf("%s: test passed\n", __func__);
3744 	return 0;
3745 }
3746 
3747 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3748 run_kalloc_test(int64_t in __unused, int64_t *out)
3749 {
3750 	*out = 1;
3751 
3752 	if (run_kalloc_test_kheap(KHEAP_DATA_BUFFERS) != 0 ||
3753 	    run_kalloc_test_kheap(KHEAP_DATA_SHARED) != 0) {
3754 		*out = 0;
3755 	}
3756 
3757 	return 0;
3758 }
3759 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3760 
3761 #endif /* DEBUG || DEVELOPMENT */
3762