xref: /xnu-8020.140.41/osfmk/kern/kalloc.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include <mach/boolean.h>
68 #include <mach/sdt.h>
69 #include <mach/machine/vm_types.h>
70 #include <mach/vm_param.h>
71 #include <kern/misc_protos.h>
72 #include <kern/counter.h>
73 #include <kern/zalloc_internal.h>
74 #include <kern/kalloc.h>
75 #include <kern/ledger.h>
76 #include <kern/backtrace.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_map.h>
80 #include <sys/kdebug.h>
81 
82 #include <os/hash.h>
83 #include <san/kasan.h>
84 #include <libkern/section_keywords.h>
85 #include <libkern/prelink.h>
86 
87 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
88 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
89 
90 #pragma mark initialization
91 
92 /*
93  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
94  * sized zone.  This allocator is built on top of the zone allocator.  A zone
95  * is created for each potential size that we are willing to get in small
96  * blocks.
97  *
98  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
99  */
100 
101 /*
102  * The k_zone_cfg table defines the configuration of zones on various platforms.
103  * The currently defined list of zones and their per-CPU caching behavior are as
104  * follows
105  *
106  *     X:zone not present
107  *     N:zone present no cpu-caching
108  *     Y:zone present with cpu-caching
109  *
110  * Size       macOS(64-bit)       embedded(32-bit)    embedded(64-bit)
111  *--------    ----------------    ----------------    ----------------
112  *
113  * 8          X                    Y                   X
114  * 16         Y                    Y                   Y
115  * 24         X                    Y                   X
116  * 32         Y                    Y                   Y
117  * 40         X                    Y                   X
118  * 48         Y                    Y                   Y
119  * 64         Y                    Y                   Y
120  * 72         X                    Y                   X
121  * 80         Y                    X                   Y
122  * 88         X                    Y                   X
123  * 96         Y                    X                   Y
124  * 112        X                    Y                   X
125  * 128        Y                    Y                   Y
126  * 160        Y                    X                   Y
127  * 192        Y                    Y                   Y
128  * 224        Y                    X                   Y
129  * 256        Y                    Y                   Y
130  * 288        Y                    Y                   Y
131  * 368        Y                    X                   Y
132  * 384        X                    Y                   X
133  * 400        Y                    X                   Y
134  * 440        X                    Y                   X
135  * 512        Y                    Y                   Y
136  * 576        Y                    N                   N
137  * 768        Y                    N                   N
138  * 1024       Y                    Y                   Y
139  * 1152       N                    N                   N
140  * 1280       N                    N                   N
141  * 1536       X                    N                   X
142  * 1664       N                    X                   N
143  * 2048       Y                    N                   N
144  * 2128       X                    N                   X
145  * 3072       X                    N                   X
146  * 4096       Y                    N                   N
147  * 6144       N                    N                   N
148  * 8192       Y                    N                   N
149  * 12288      N                    X                   X
150  * 16384      N                    X                   N
151  * 32768      X                    X                   N
152  *
153  */
154 struct kalloc_zone_cfg {
155 	bool kzc_caching;
156 	uint32_t kzc_size;
157 	char kzc_name[MAX_ZONE_NAME];
158 };
159 
160 #define KZC_ENTRY(SIZE, caching) { \
161 	.kzc_caching = (caching), \
162 	.kzc_size = (SIZE), \
163 	.kzc_name = "kalloc." #SIZE \
164 }
165 static SECURITY_READ_ONLY_LATE(struct kalloc_zone_cfg) k_zone_cfg[] = {
166 #if !defined(XNU_TARGET_OS_OSX)
167 
168 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
169 	/* Zone config for embedded 64-bit platforms */
170 	KZC_ENTRY(16, true),
171 	KZC_ENTRY(32, true),
172 	KZC_ENTRY(48, true),
173 	KZC_ENTRY(64, true),
174 	KZC_ENTRY(80, true),
175 	KZC_ENTRY(96, true),
176 	KZC_ENTRY(128, true),
177 	KZC_ENTRY(160, true),
178 	KZC_ENTRY(192, true),
179 	KZC_ENTRY(224, true),
180 	KZC_ENTRY(256, true),
181 	KZC_ENTRY(288, true),
182 	KZC_ENTRY(368, true),
183 	KZC_ENTRY(400, true),
184 	KZC_ENTRY(512, true),
185 	KZC_ENTRY(576, false),
186 	KZC_ENTRY(768, false),
187 	KZC_ENTRY(1024, true),
188 	KZC_ENTRY(1152, false),
189 	KZC_ENTRY(1280, false),
190 	KZC_ENTRY(1664, false),
191 	KZC_ENTRY(2048, false),
192 	KZC_ENTRY(4096, false),
193 	KZC_ENTRY(6144, false),
194 	KZC_ENTRY(8192, false),
195 	KZC_ENTRY(16384, false),
196 	KZC_ENTRY(32768, false),
197 
198 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
199 	/* Zone config for embedded 32-bit platforms */
200 	KZC_ENTRY(8, true),
201 	KZC_ENTRY(16, true),
202 	KZC_ENTRY(24, true),
203 	KZC_ENTRY(32, true),
204 	KZC_ENTRY(40, true),
205 	KZC_ENTRY(48, true),
206 	KZC_ENTRY(64, true),
207 	KZC_ENTRY(72, true),
208 	KZC_ENTRY(88, true),
209 	KZC_ENTRY(112, true),
210 	KZC_ENTRY(128, true),
211 	KZC_ENTRY(192, true),
212 	KZC_ENTRY(256, true),
213 	KZC_ENTRY(288, true),
214 	KZC_ENTRY(384, true),
215 	KZC_ENTRY(440, true),
216 	KZC_ENTRY(512, true),
217 	KZC_ENTRY(576, false),
218 	KZC_ENTRY(768, false),
219 	KZC_ENTRY(1024, true),
220 	KZC_ENTRY(1152, false),
221 	KZC_ENTRY(1280, false),
222 	KZC_ENTRY(1536, false),
223 	KZC_ENTRY(2048, false),
224 	KZC_ENTRY(2128, false),
225 	KZC_ENTRY(3072, false),
226 	KZC_ENTRY(4096, false),
227 	KZC_ENTRY(6144, false),
228 	KZC_ENTRY(8192, false),
229 	/* To limit internal fragmentation, only add the following zones if the
230 	 * page size is greater than 4K.
231 	 * Note that we use ARM_PGBYTES here (instead of one of the VM macros)
232 	 * since it's guaranteed to be a compile time constant.
233 	 */
234 #if ARM_PGBYTES > 4096
235 	KZC_ENTRY(16384, false),
236 	KZC_ENTRY(32768, false),
237 #endif /* ARM_PGBYTES > 4096 */
238 
239 #else
240 #error missing or invalid zone size parameters for kalloc
241 #endif
242 
243 #else /* !defined(XNU_TARGET_OS_OSX) */
244 
245 	/* Zone config for macOS 64-bit platforms */
246 	KZC_ENTRY(16, true),
247 	KZC_ENTRY(32, true),
248 	KZC_ENTRY(48, true),
249 	KZC_ENTRY(64, true),
250 	KZC_ENTRY(80, true),
251 	KZC_ENTRY(96, true),
252 	KZC_ENTRY(128, true),
253 	KZC_ENTRY(160, true),
254 	KZC_ENTRY(192, true),
255 	KZC_ENTRY(224, true),
256 	KZC_ENTRY(256, true),
257 	KZC_ENTRY(288, true),
258 	KZC_ENTRY(368, true),
259 	KZC_ENTRY(400, true),
260 	KZC_ENTRY(512, true),
261 	KZC_ENTRY(576, true),
262 	KZC_ENTRY(768, true),
263 	KZC_ENTRY(1024, true),
264 	KZC_ENTRY(1152, false),
265 	KZC_ENTRY(1280, false),
266 	KZC_ENTRY(1664, false),
267 	KZC_ENTRY(2048, true),
268 	KZC_ENTRY(4096, true),
269 	KZC_ENTRY(6144, false),
270 	KZC_ENTRY(8192, true),
271 #if __x86_64__
272 	KZC_ENTRY(12288, false),
273 #endif /* __x86_64__ */
274 	KZC_ENTRY(16384, false),
275 #if __arm64__
276 	KZC_ENTRY(32768, false),
277 #endif
278 #endif /* !defined(XNU_TARGET_OS_OSX) */
279 };
280 
281 
282 static SECURITY_READ_ONLY_LATE(struct kalloc_zone_cfg) k_zone_cfg_data[] = {
283 	KZC_ENTRY(16, true),
284 	KZC_ENTRY(32, true),
285 	KZC_ENTRY(48, true),
286 	KZC_ENTRY(64, true),
287 	KZC_ENTRY(96, true),
288 	KZC_ENTRY(128, true),
289 	KZC_ENTRY(160, true),
290 	KZC_ENTRY(192, true),
291 	KZC_ENTRY(256, true),
292 	KZC_ENTRY(368, true),
293 	KZC_ENTRY(512, true),
294 	KZC_ENTRY(768, false),
295 	KZC_ENTRY(1024, true),
296 	KZC_ENTRY(1152, false),
297 	KZC_ENTRY(1664, false),
298 	KZC_ENTRY(2048, false),
299 	KZC_ENTRY(4096, false),
300 	KZC_ENTRY(6144, false),
301 	KZC_ENTRY(8192, false),
302 	KZC_ENTRY(16384, false),
303 #if __arm64__
304 	KZC_ENTRY(32768, false),
305 #endif
306 };
307 #undef KZC_ENTRY
308 
309 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
310 
311 /*
312  * Many kalloc() allocations are for small structures containing a few
313  * pointers and longs - the dlut[] direct lookup table, indexed by
314  * size normalized to the minimum alignment, finds the right zone index
315  * for them in one dereference.
316  */
317 
318 #define INDEX_ZDLUT(size)  (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
319 #define MAX_SIZE_ZDLUT     ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
320 
321 static SECURITY_READ_ONLY_LATE(zone_t) k_zone_default[MAX_K_ZONE(k_zone_cfg)];
322 static SECURITY_READ_ONLY_LATE(zone_t) k_zone_data[MAX_K_ZONE(k_zone_cfg_data)];
323 
324 #if VM_TAG_SIZECLASSES
325 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(k_zone_cfg));
326 #endif
327 
328 const char * const kalloc_heap_names[] = {
329 	[KHEAP_ID_NONE]          = "",
330 	[KHEAP_ID_DEFAULT]       = "default.",
331 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
332 	[KHEAP_ID_KT_VAR]        = "",
333 };
334 
335 /*
336  * Default kalloc heap configuration
337  */
338 static SECURITY_READ_ONLY_LATE(struct kheap_zones) kalloc_zones_default = {
339 	.cfg         = k_zone_cfg,
340 	.heap_id     = KHEAP_ID_DEFAULT,
341 	.k_zone      = k_zone_default,
342 	.max_k_zone  = MAX_K_ZONE(k_zone_cfg)
343 };
344 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DEFAULT[1] = {
345 	{
346 		.kh_zones    = &kalloc_zones_default,
347 		.kh_name     = "default.",
348 		.kh_heap_id  = KHEAP_ID_DEFAULT,
349 		.kh_tag      = VM_KERN_MEMORY_KALLOC
350 	}
351 };
352 
353 
354 /*
355  * Bag of bytes heap configuration
356  */
357 static SECURITY_READ_ONLY_LATE(struct kheap_zones) kalloc_zones_data = {
358 	.cfg         = k_zone_cfg_data,
359 	.heap_id     = KHEAP_ID_DATA_BUFFERS,
360 	.k_zone      = k_zone_data,
361 	.max_k_zone  = MAX_K_ZONE(k_zone_cfg_data)
362 };
363 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
364 	{
365 		.kh_zones    = &kalloc_zones_data,
366 		.kh_name     = "data.",
367 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
368 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
369 	}
370 };
371 
372 /*
373  * Configuration of variable kalloc type heaps
374  */
375 SECURITY_READ_ONLY_LATE(struct kt_heap_zones)
376 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
377 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
378 	{
379 		.kh_name     = "kalloc.type.var",
380 		.kh_heap_id  = KHEAP_ID_KT_VAR,
381 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
382 	}
383 };
384 
385 /*
386  * Initialize kalloc heap: Create zones, generate direct lookup table and
387  * do a quick test on lookups
388  */
389 __startup_func
390 static void
kalloc_zones_init(struct kalloc_heap * kheap)391 kalloc_zones_init(struct kalloc_heap *kheap)
392 {
393 	struct kheap_zones *zones = kheap->kh_zones;
394 	struct kalloc_zone_cfg *cfg = zones->cfg;
395 	zone_t *k_zone = zones->k_zone;
396 	vm_size_t size;
397 
398 	/*
399 	 * Allocate a zone for each size we are going to handle.
400 	 */
401 	assert(zones->cfg[zones->max_k_zone - 1].kzc_size == KHEAP_MAX_SIZE);
402 
403 	for (uint32_t i = 0; i < zones->max_k_zone &&
404 	    (size = cfg[i].kzc_size) <= KHEAP_MAX_SIZE; i++) {
405 		zone_create_flags_t flags = ZC_KASAN_NOREDZONE |
406 		    ZC_KASAN_NOQUARANTINE | ZC_KALLOC_HEAP | ZC_PGZ_USE_GUARDS;
407 		if (cfg[i].kzc_caching) {
408 			flags |= ZC_CACHING;
409 		}
410 
411 		k_zone[i] = zone_create_ext(cfg[i].kzc_name, size, flags,
412 		    ZONE_ID_ANY, ^(zone_t z){
413 			zone_security_array[zone_index(z)].z_kheap_id = (uint8_t)zones->heap_id;
414 		});
415 	}
416 
417 	/*
418 	 * Count all the "raw" views for zones in the heap.
419 	 */
420 	zone_view_count += zones->max_k_zone;
421 
422 	/*
423 	 * Build the Direct LookUp Table for small allocations
424 	 * As k_zone_cfg is shared between the heaps the
425 	 * Direct LookUp Table is also shared and doesn't need to
426 	 * be rebuilt per heap.
427 	 */
428 	size = 0;
429 	for (int i = 0; i <= KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
430 		uint8_t zindex = 0;
431 
432 		while ((vm_size_t)(cfg[zindex].kzc_size) < size) {
433 			zindex++;
434 		}
435 
436 		if (i == KALLOC_DLUT_SIZE) {
437 			zones->k_zindex_start = zindex;
438 			break;
439 		}
440 		zones->dlut[i] = zindex;
441 	}
442 }
443 
444 __startup_func
445 static void
kalloc_init(void)446 kalloc_init(void)
447 {
448 	/* Initialize kalloc default heap */
449 	kalloc_zones_init(KHEAP_DEFAULT);
450 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
451 
452 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
453 	/* Initialize kalloc data buffers heap */
454 	kalloc_zones_init(KHEAP_DATA_BUFFERS);
455 #else
456 	*KHEAP_DATA_BUFFERS = *KHEAP_DEFAULT;
457 #endif
458 }
459 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
460 
461 #define KEXT_ALIGN_SHIFT           6
462 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
463 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
464 #define kt_scratch_size            (256ul << 10)
465 #define KALLOC_TYPE_SECTION(type) \
466 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
467 
468 /*
469  * Enum to specify the kalloc_type variant being used.
470  */
471 __options_decl(kalloc_type_variant_t, uint16_t, {
472 	KTV_FIXED     = 0x0001,
473 	KTV_VAR       = 0x0002,
474 });
475 
476 /*
477  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
478  * variable) of the desired variable/function.
479  */
480 #define kalloc_type_var(type, var)              \
481 	((type) == KTV_FIXED?                       \
482 	(vm_offset_t) kalloc_type_##var##_fixed:    \
483 	(vm_offset_t) kalloc_type_##var##_var)
484 #define kalloc_type_func(type, func, ...)       \
485 	((type) == KTV_FIXED?                       \
486 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
487 	kalloc_type_##func##_var(__VA_ARGS__))
488 
489 /*
490  * Fields of kalloc_type views that are required to make a redirection
491  * decision i.e VM or data-only
492  */
493 struct kalloc_type_atom {
494 	kalloc_type_flags_t  kt_flags;
495 	vm_size_t            kt_size;
496 	const char          *kt_sig_hdr;
497 	const char          *kt_sig_type;
498 };
499 
500 TUNABLE(kalloc_type_options_t, kt_options, "kt", KT_OPTIONS_LOOSE_FREE);
501 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
502     ZSECURITY_CONFIG_KT_VAR_BUDGET);
503 /*
504  * Section start/end for fixed kalloc_type views
505  */
506 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
507 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
508 
509 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
510 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
511 
512 /*
513  * Section start/end for variable kalloc_type views
514  */
515 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
516 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
517 
518 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
519 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
520 
521 typedef union kalloc_type_views {
522 	struct kalloc_type_view     *ktv_fixed;
523 	struct kalloc_type_var_view *ktv_var;
524 } kalloc_type_views_t;
525 
526 __startup_data
527 static kalloc_type_views_t *kt_buffer = NULL;
528 __startup_data
529 static uint64_t kt_count;
530 __startup_data
531 uint32_t kalloc_type_hash_seed;
532 
533 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
534     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
535 
536 #if DEBUG || DEVELOPMENT
537 /*
538  * For use by lldb to iterate over kalloc types
539  */
540 uint64_t num_kt_sizeclass = MAX_K_ZONE(k_zone_cfg);
541 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(k_zone_cfg)];
542 #endif
543 
544 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
545 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
546     KMEM_DIRECTION_MASK),
547     "Insufficient bits to represent range and dir for VM allocations");
548 static_assert(MAX_K_ZONE(k_zone_cfg) < KALLOC_TYPE_IDX_MASK,
549     "validate idx mask");
550 /* qsort routines */
551 typedef int (*cmpfunc_t)(const void *a, const void *b);
552 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
553 
554 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)555 kalloc_type_get_idx(uint32_t kt_size)
556 {
557 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
558 }
559 
560 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)561 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
562 {
563 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
564 }
565 
566 static uint32_t
kalloc_idx_for_size(kalloc_heap_t kheap,uint32_t size)567 kalloc_idx_for_size(kalloc_heap_t kheap, uint32_t size)
568 {
569 	struct kheap_zones *khz = kheap->kh_zones;
570 	uint16_t idx;
571 
572 	assert(size <= KHEAP_MAX_SIZE);
573 
574 	if (size < MAX_SIZE_ZDLUT) {
575 		idx = khz->dlut[INDEX_ZDLUT(size)];
576 		return kalloc_type_set_idx(size, idx);
577 	}
578 
579 	idx = khz->k_zindex_start;
580 	while (khz->cfg[idx].kzc_size < size) {
581 		idx++;
582 	}
583 	return kalloc_type_set_idx(size, idx);
584 }
585 
586 static zone_t
kalloc_heap_zone_for_idx(kalloc_heap_t kheap,uint16_t zindex)587 kalloc_heap_zone_for_idx(kalloc_heap_t kheap, uint16_t zindex)
588 {
589 	struct kheap_zones *khz = kheap->kh_zones;
590 	return khz->k_zone[zindex];
591 }
592 
593 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z)594 kalloc_type_assign_zone_fixed(kalloc_type_view_t *cur, kalloc_type_view_t *end,
595     zone_t z)
596 {
597 	/*
598 	 * Assign the zone created for every kalloc_type_view
599 	 * of the same unique signature
600 	 */
601 	bool need_raw_view = false;
602 	while (cur < end) {
603 		kalloc_type_view_t kt = *cur;
604 		struct zone_view *zv = &kt->kt_zv;
605 		zv->zv_zone = z;
606 		kalloc_type_flags_t kt_flags = kt->kt_flags;
607 
608 		if (kt_flags & KT_SLID) {
609 			kt->kt_signature -= vm_kernel_slide;
610 			kt->kt_zv.zv_name -= vm_kernel_slide;
611 		}
612 
613 		if ((kt_flags & KT_PRIV_ACCT) ||
614 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
615 			zv->zv_stats = zalloc_percpu_permanent_type(
616 				struct zone_stats);
617 			need_raw_view = true;
618 			zone_view_count += 1;
619 		} else {
620 			zv->zv_stats = z->z_stats;
621 		}
622 		zv->zv_next = (zone_view_t) z->z_views;
623 		zv->zv_zone->z_views = (zone_view_t) kt;
624 		cur++;
625 	}
626 	if (need_raw_view) {
627 		zone_view_count += 1;
628 	}
629 }
630 
631 __startup_func
632 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)633 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
634     kalloc_type_var_view_t *end, uint32_t heap_idx)
635 {
636 	struct kt_heap_zones *cfg = &kalloc_type_heap_array[heap_idx];
637 	while (cur < end) {
638 		kalloc_type_var_view_t kt = *cur;
639 		zone_id_t zid = cfg->kh_zstart;
640 		kt->kt_heap_start = zid;
641 		kalloc_type_flags_t kt_flags = kt->kt_flags;
642 
643 		if (kt_flags & KT_SLID) {
644 			if (kt->kt_sig_hdr) {
645 				kt->kt_sig_hdr -= vm_kernel_slide;
646 			}
647 			kt->kt_sig_type -= vm_kernel_slide;
648 			kt->kt_name -= vm_kernel_slide;
649 		}
650 
651 		if ((kt_flags & KT_PRIV_ACCT) ||
652 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
653 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
654 			zone_view_count += 1;
655 		}
656 
657 		kt->kt_next = (zone_view_t) cfg->views;
658 		cfg->views = kt;
659 		cur++;
660 	}
661 }
662 
663 static inline char
kalloc_type_granule_to_char(kt_granule_t granule)664 kalloc_type_granule_to_char(kt_granule_t granule)
665 {
666 	return (char) (granule + '0');
667 }
668 
669 static bool
kalloc_type_sig_check(const char * sig,const kt_granule_t gr)670 kalloc_type_sig_check(const char *sig, const kt_granule_t gr)
671 {
672 	while (*sig == kalloc_type_granule_to_char(gr & KT_GRANULE_PADDING) ||
673 	    *sig == kalloc_type_granule_to_char(gr & KT_GRANULE_POINTER) ||
674 	    *sig == kalloc_type_granule_to_char(gr & KT_GRANULE_DATA) ||
675 	    *sig == kalloc_type_granule_to_char(gr & KT_GRANULE_PAC)) {
676 		sig++;
677 	}
678 	return *sig == '\0';
679 }
680 
681 /*
682  * Check if signature of type is made up of only the specified granules
683  */
684 static bool
kalloc_type_check(struct kalloc_type_atom kt_atom,kalloc_type_flags_t change_flag,kalloc_type_flags_t check_flag,const kt_granule_t check_gr)685 kalloc_type_check(struct kalloc_type_atom kt_atom,
686     kalloc_type_flags_t change_flag, kalloc_type_flags_t check_flag,
687     const kt_granule_t check_gr)
688 {
689 	kalloc_type_flags_t flags = kt_atom.kt_flags;
690 	if (flags & change_flag) {
691 		return flags & check_flag;
692 	} else {
693 		bool kt_hdr_check = kt_atom.kt_sig_hdr?
694 		    kalloc_type_sig_check(kt_atom.kt_sig_hdr, check_gr): true;
695 		bool kt_type_check = kalloc_type_sig_check(kt_atom.kt_sig_type, check_gr);
696 		return kt_hdr_check && kt_type_check;
697 	}
698 }
699 
700 /*
701  * Check if signature of type is made up of only data and padding
702  */
703 static bool
kalloc_type_is_data(struct kalloc_type_atom kt_atom)704 kalloc_type_is_data(struct kalloc_type_atom kt_atom)
705 {
706 	return kalloc_type_check(kt_atom, KT_CHANGED, KT_DATA_ONLY,
707 	           KT_GRANULE_DATA);
708 }
709 
710 /*
711  * Use this version after early boot as sites from kexts that haven't yet
712  * been recompiled with the latest kalloc type defintions, have been fixed
713  * up.
714  */
715 static bool
kalloc_type_is_data_fast(struct kalloc_type_atom kt_atom)716 kalloc_type_is_data_fast(struct kalloc_type_atom kt_atom)
717 {
718 	return kt_atom.kt_flags & KT_DATA_ONLY;
719 }
720 
721 /*
722  * Check if signature of type is made up of only pointers
723  */
724 static bool
kalloc_type_is_ptr_array(struct kalloc_type_atom kt_atom)725 kalloc_type_is_ptr_array(struct kalloc_type_atom kt_atom)
726 {
727 	return kalloc_type_check(kt_atom, KT_CHANGED2, KT_PTR_ARRAY,
728 	           KT_GRANULE_POINTER | KT_GRANULE_PAC);
729 }
730 
731 static bool
kalloc_type_from_vm(struct kalloc_type_atom kt_atom)732 kalloc_type_from_vm(struct kalloc_type_atom kt_atom)
733 {
734 	kalloc_type_flags_t flags = kt_atom.kt_flags;
735 	if (flags & KT_CHANGED) {
736 		return flags & KT_VM;
737 	} else {
738 		return kt_atom.kt_size > KHEAP_MAX_SIZE;
739 	}
740 }
741 
742 __startup_func
743 static inline vm_size_t
kalloc_type_view_sz_fixed(void)744 kalloc_type_view_sz_fixed(void)
745 {
746 	return sizeof(struct kalloc_type_view);
747 }
748 
749 __startup_func
750 static inline vm_size_t
kalloc_type_view_sz_var(void)751 kalloc_type_view_sz_var(void)
752 {
753 	return sizeof(struct kalloc_type_var_view);
754 }
755 
756 __startup_func
757 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)758 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
759     vm_offset_t end)
760 {
761 	return (end - start) / kalloc_type_func(type, view_sz);
762 }
763 
764 static inline struct kalloc_type_atom
kalloc_type_get_atom_fixed(vm_offset_t addr,bool slide)765 kalloc_type_get_atom_fixed(vm_offset_t addr, bool slide)
766 {
767 	struct kalloc_type_atom kt_atom = {};
768 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
769 	kt_atom.kt_flags = ktv->kt_flags;
770 	kt_atom.kt_size = ktv->kt_size;
771 	if (slide) {
772 		ktv->kt_signature += vm_kernel_slide;
773 		ktv->kt_zv.zv_name += vm_kernel_slide;
774 		ktv->kt_flags |= KT_SLID;
775 	}
776 	kt_atom.kt_sig_type = ktv->kt_signature;
777 	return kt_atom;
778 }
779 
780 static inline struct kalloc_type_atom
kalloc_type_get_atom_var(vm_offset_t addr,bool slide)781 kalloc_type_get_atom_var(vm_offset_t addr, bool slide)
782 {
783 	struct kalloc_type_atom kt_atom = {};
784 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
785 	kt_atom.kt_flags = ktv->kt_flags;
786 	kt_atom.kt_size = ktv->kt_size_hdr + ktv->kt_size_type;
787 	if (slide) {
788 		if (ktv->kt_sig_hdr) {
789 			ktv->kt_sig_hdr += vm_kernel_slide;
790 		}
791 		ktv->kt_sig_type += vm_kernel_slide;
792 		ktv->kt_name += vm_kernel_slide;
793 		ktv->kt_flags |= KT_SLID;
794 	}
795 	kt_atom.kt_sig_hdr = ktv->kt_sig_hdr;
796 	kt_atom.kt_sig_type = ktv->kt_sig_type;
797 	return kt_atom;
798 }
799 
800 __startup_func
801 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)802 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
803 {
804 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
805 }
806 
807 __startup_func
808 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)809 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
810 {
811 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
812 }
813 
814 __startup_func
815 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)816 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
817 {
818 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
819 	cur_data_view->kt_size = kalloc_idx_for_size(KHEAP_DATA_BUFFERS,
820 	    cur_data_view->kt_size);
821 	uint16_t kt_idx = kalloc_type_get_idx(cur_data_view->kt_size);
822 	zone_t z = kalloc_heap_zone_for_idx(KHEAP_DATA_BUFFERS, kt_idx);
823 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z);
824 }
825 
826 __startup_func
827 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)828 kalloc_type_handle_data_view_var(vm_offset_t addr)
829 {
830 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
831 	kalloc_type_flags_t kt_flags = ktv->kt_flags;
832 
833 	/*
834 	 * To avoid having to recompute this until rdar://85182551 lands
835 	 * in the build and kexts are rebuilt.
836 	 */
837 	if (!(kt_flags & KT_CHANGED)) {
838 		ktv->kt_flags |= (KT_CHANGED | KT_DATA_ONLY);
839 	}
840 
841 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
842 }
843 
844 __startup_func
845 static void
kalloc_type_handle_parray_var(vm_offset_t addr)846 kalloc_type_handle_parray_var(vm_offset_t addr)
847 {
848 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
849 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_PTR_HEAP);
850 }
851 
852 __startup_func
853 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)854 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
855 {
856 	/*
857 	 * Limit range_id to ptr ranges
858 	 */
859 	uint32_t range_id = kmem_adjust_range_id(hash);
860 	uint32_t direction = hash & 0x8000;
861 	return (range_id | KMEM_HASH_SET | direction) << shift;
862 }
863 
864 __startup_func
865 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)866 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
867     kalloc_type_flags_t *kt_flags)
868 {
869 	uint32_t hash = 0;
870 
871 	assert(sig_ty != NULL);
872 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
873 	    kalloc_type_hash_seed);
874 	if (sig_hdr) {
875 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
876 	}
877 	os_hash_jenkins_finish(hash);
878 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
879 
880 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
881 }
882 
883 __startup_func
884 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)885 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
886 {
887 	/*
888 	 * Use backtraces on fixed as we don't have signatures for types that go
889 	 * to the VM due to rdar://85182551.
890 	 */
891 	(void) addr;
892 }
893 
894 __startup_func
895 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)896 kalloc_type_set_type_hash_var(vm_offset_t addr)
897 {
898 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
899 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
900 	    &ktv->kt_flags);
901 }
902 
903 __startup_func
904 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)905 kalloc_type_mark_processed_fixed(vm_offset_t addr)
906 {
907 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
908 	ktv->kt_flags |= KT_PROCESSED;
909 }
910 
911 __startup_func
912 static void
kalloc_type_mark_processed_var(vm_offset_t addr)913 kalloc_type_mark_processed_var(vm_offset_t addr)
914 {
915 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
916 	ktv->kt_flags |= KT_PROCESSED;
917 }
918 
919 __startup_func
920 static void
kalloc_type_update_view_fixed(vm_offset_t addr)921 kalloc_type_update_view_fixed(vm_offset_t addr)
922 {
923 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
924 	ktv->kt_size = kalloc_idx_for_size(KHEAP_DEFAULT, ktv->kt_size);
925 }
926 
927 __startup_func
928 static void
kalloc_type_update_view_var(vm_offset_t addr)929 kalloc_type_update_view_var(vm_offset_t addr)
930 {
931 	(void) addr;
932 }
933 
934 __startup_func
935 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide)936 kalloc_type_view_copy(const kalloc_type_variant_t type, vm_offset_t start,
937     vm_offset_t end, uint64_t *cur_count, bool slide)
938 {
939 	uint64_t count = kalloc_type_view_count(type, start, end);
940 	if (count + *cur_count >= kt_count) {
941 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
942 	}
943 	vm_offset_t cur = start;
944 	while (cur < end) {
945 		struct kalloc_type_atom kt_atom = kalloc_type_func(type, get_atom, cur,
946 		    slide);
947 		kalloc_type_func(type, mark_processed, cur);
948 		/*
949 		 * Skip views that go to the VM
950 		 */
951 		if (kalloc_type_from_vm(kt_atom)) {
952 			cur += kalloc_type_func(type, view_sz);
953 			continue;
954 		}
955 
956 		/*
957 		 * If signature indicates that the entire allocation is data move it to
958 		 * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap,
959 		 * variable kalloc_type handles the actual redirection in the entry points
960 		 * kalloc/kfree_type_var_impl.
961 		 */
962 		if (kalloc_type_is_data(kt_atom)) {
963 			kalloc_type_func(type, handle_data_view, cur);
964 			cur += kalloc_type_func(type, view_sz);
965 			continue;
966 		}
967 
968 		/*
969 		 * Set type hash that is used by kmem_*_guard
970 		 */
971 		kalloc_type_func(type, set_type_hash, cur);
972 
973 		/*
974 		 * Redirect variable sized pointer arrays to KT_VAR_PTR_HEAP
975 		 */
976 		if (type == KTV_VAR && kalloc_type_is_ptr_array(kt_atom)) {
977 			kalloc_type_handle_parray_var(cur);
978 			cur += kalloc_type_func(type, view_sz);
979 			continue;
980 		}
981 
982 		kalloc_type_func(type, update_view, cur);
983 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
984 		cur += kalloc_type_func(type, view_sz);
985 		*cur_count = *cur_count + 1;
986 	}
987 }
988 
989 __startup_func
990 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)991 kalloc_type_view_parse(const kalloc_type_variant_t type)
992 {
993 	kc_format_t kc_format;
994 	uint64_t cur_count = 0;
995 
996 	if (!PE_get_primary_kc_format(&kc_format)) {
997 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
998 	}
999 
1000 	if (kc_format == KCFormatStatic) {
1001 		/*
1002 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
1003 		 * xnu are coalesced.
1004 		 */
1005 		kalloc_type_view_copy(type,
1006 		    kalloc_type_var(type, sec_start),
1007 		    kalloc_type_var(type, sec_end),
1008 		    &cur_count, 0);
1009 	} else if (kc_format == KCFormatFileset) {
1010 		/*
1011 		 * If kc uses filesets, traverse __kalloc_type section for each
1012 		 * macho in the BootKC.
1013 		 */
1014 		kernel_mach_header_t *kc_mh = NULL;
1015 		kernel_mach_header_t *kext_mh = NULL;
1016 
1017 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1018 		struct load_command *lc =
1019 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1020 		for (uint32_t i = 0; i < kc_mh->ncmds;
1021 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1022 			if (lc->cmd != LC_FILESET_ENTRY) {
1023 				continue;
1024 			}
1025 			struct fileset_entry_command *fse =
1026 			    (struct fileset_entry_command *)(vm_offset_t)lc;
1027 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1028 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1029 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1030 			if (sect != NULL) {
1031 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1032 				    &cur_count, false);
1033 			}
1034 		}
1035 	} else if (kc_format == KCFormatKCGEN) {
1036 		/*
1037 		 * Parse __kalloc_type section from xnu
1038 		 */
1039 		kalloc_type_view_copy(type,
1040 		    kalloc_type_var(type, sec_start),
1041 		    kalloc_type_var(type, sec_end), &cur_count, false);
1042 
1043 #if defined(__LP64__)
1044 		/*
1045 		 * Parse __kalloc_type section for kexts
1046 		 *
1047 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1048 		 * as this platform has insufficient memory for type based
1049 		 * segregation. kalloc_type_impl_external will direct callsites
1050 		 * based on their size.
1051 		 */
1052 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1053 		vm_offset_t cur = 0;
1054 		vm_offset_t end = 0;
1055 
1056 		/*
1057 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1058 		 * and traverse it.
1059 		 */
1060 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1061 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1062 		assert(prelink_sect);
1063 		cur = prelink_sect->addr;
1064 		end = prelink_sect->addr + prelink_sect->size;
1065 
1066 		while (cur < end) {
1067 			uint64_t kext_text_sz = 0;
1068 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1069 
1070 			if (kext_mh->magic == 0) {
1071 				/*
1072 				 * Assert that we have processed all kexts and all that is left
1073 				 * is padding
1074 				 */
1075 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1076 				break;
1077 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1078 			    kext_mh->magic != MH_CIGAM_64) {
1079 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1080 				    cur);
1081 			}
1082 
1083 			/*
1084 			 * Kext macho found, iterate through its segments
1085 			 */
1086 			struct load_command *lc =
1087 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1088 			bool isSplitKext = false;
1089 
1090 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1091 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1092 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1093 					isSplitKext = true;
1094 					continue;
1095 				} else if (lc->cmd != LC_SEGMENT_64) {
1096 					continue;
1097 				}
1098 
1099 				kernel_segment_command_t *seg_cmd =
1100 				    (struct segment_command_64 *)(vm_offset_t)lc;
1101 				/*
1102 				 * Parse kalloc_type section
1103 				 */
1104 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1105 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1106 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1107 					if (kt_sect) {
1108 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1109 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1110 						    true);
1111 					}
1112 				}
1113 				/*
1114 				 * If the kext has a __TEXT segment, that is the only thing that
1115 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1116 				 * macho is right after.
1117 				 */
1118 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1119 					kext_text_sz = seg_cmd->filesize;
1120 				}
1121 			}
1122 			/*
1123 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1124 			 * only a __LINKEDIT segment) then the next macho will be after all the
1125 			 * header commands.
1126 			 */
1127 			if (!kext_text_sz) {
1128 				kext_text_sz = kext_mh->sizeofcmds;
1129 			} else if (!isSplitKext) {
1130 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1131 				break;
1132 			}
1133 
1134 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1135 		}
1136 
1137 #endif
1138 	} else {
1139 		/*
1140 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1141 		 * parsing kalloc_type_view structs during startup.
1142 		 */
1143 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1144 		    " for kc_format = %d\n", kc_format);
1145 	}
1146 	return cur_count;
1147 }
1148 
1149 __startup_func
1150 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1151 kalloc_type_cmp_fixed(const void *a, const void *b)
1152 {
1153 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1154 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1155 
1156 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1157 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1158 	/*
1159 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1160 	 * signature else sort by size
1161 	 */
1162 	if (idxA == idxB) {
1163 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1164 		/*
1165 		 * If the kalloc_type_views have the same signature sort by site
1166 		 * name
1167 		 */
1168 		if (result == 0) {
1169 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1170 		}
1171 		return result;
1172 	}
1173 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1174 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1175 	return (int)(sizeA - sizeB);
1176 }
1177 
1178 __startup_func
1179 static int
kalloc_type_cmp_var(const void * a,const void * b)1180 kalloc_type_cmp_var(const void *a, const void *b)
1181 {
1182 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1183 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1184 
1185 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1186 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1187 
1188 	int result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1189 	if (result == 0) {
1190 		return strcmp(ktA_hdr, ktB_hdr);
1191 	}
1192 	return result;
1193 }
1194 
1195 __startup_func
1196 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_freq_list,uint16_t * kt_freq_list_total,uint64_t count)1197 kalloc_type_create_iterators_fixed(uint16_t *kt_skip_list_start,
1198     uint16_t *kt_freq_list, uint16_t *kt_freq_list_total, uint64_t count)
1199 {
1200 	uint16_t *kt_skip_list = kt_skip_list_start;
1201 	/*
1202 	 * cur and prev kalloc size bucket
1203 	 */
1204 	uint16_t p_idx = 0;
1205 	uint16_t c_idx = 0;
1206 
1207 	/*
1208 	 * Init values
1209 	 */
1210 	uint16_t unique_sig = 1;
1211 	uint16_t total_sig = 0;
1212 	kt_skip_list++;
1213 	const char *p_sig = "";
1214 	const char *p_name = "";
1215 
1216 	/*
1217 	 * Walk over each kalloc_type_view
1218 	 */
1219 	for (uint16_t i = 0; i < count; i++) {
1220 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1221 		c_idx = kalloc_type_get_idx(kt->kt_size);
1222 		/*
1223 		 * When current kalloc_type_view is in a different kalloc size
1224 		 * bucket than the previous, it means we have processed all in
1225 		 * the previous size bucket, so store the accumulated values
1226 		 * and advance the indices.
1227 		 */
1228 		if (c_idx != p_idx) {
1229 			/*
1230 			 * Updates for frequency lists
1231 			 */
1232 			kt_freq_list[p_idx] = unique_sig;
1233 			unique_sig = 1;
1234 			kt_freq_list_total[p_idx] = total_sig;
1235 			total_sig = 1;
1236 			p_idx = c_idx;
1237 
1238 			/*
1239 			 * Updates to signature skip list
1240 			 */
1241 			*kt_skip_list = i;
1242 			kt_skip_list++;
1243 			p_sig = kt->kt_signature;
1244 			continue;
1245 		}
1246 
1247 		/*
1248 		 * When current kalloc_type_views is in the kalloc size bucket as
1249 		 * previous, analyze the siganture to see if it is unique.
1250 		 *
1251 		 * Signatures are collapsible if one is a substring of the next.
1252 		 */
1253 		const char *c_sig = kt->kt_signature;
1254 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1255 			/*
1256 			 * Unique signature detected. Update counts and advance index
1257 			 */
1258 			unique_sig++;
1259 			*kt_skip_list = i;
1260 			kt_skip_list++;
1261 		}
1262 
1263 		/*
1264 		 * Check if current kalloc_type_view corresponds to a new type
1265 		 */
1266 		const char *c_name = kt->kt_zv.zv_name;
1267 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1268 			total_sig++;
1269 		}
1270 		p_name = c_name;
1271 		p_sig = c_sig;
1272 	}
1273 	/*
1274 	 * Final update
1275 	 */
1276 	assert(c_idx == p_idx);
1277 	assert(kt_freq_list[c_idx] == 0);
1278 	kt_freq_list[c_idx] = unique_sig;
1279 	kt_freq_list_total[c_idx] = (uint16_t) total_sig;
1280 	*kt_skip_list = (uint16_t) count;
1281 	return ++kt_skip_list;
1282 }
1283 
1284 #if ZSECURITY_CONFIG(KALLOC_TYPE)
1285 __startup_func
1286 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start)1287 kalloc_type_create_iterators_var(uint32_t *kt_skip_list_start)
1288 {
1289 	uint32_t *kt_skip_list = kt_skip_list_start;
1290 	uint32_t n = 0;
1291 	kt_skip_list[n] = 0;
1292 	assert(kt_count > 1);
1293 	for (uint32_t i = 1; i < kt_count; i++) {
1294 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1295 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1296 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1297 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1298 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1299 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1300 			n++;
1301 			kt_skip_list[n] = i;
1302 		}
1303 	}
1304 	/*
1305 	 * Final update
1306 	 */
1307 	n++;
1308 	kt_skip_list[n] = (uint32_t) kt_count;
1309 	return n;
1310 }
1311 
1312 __startup_func
1313 static uint16_t
kalloc_type_apply_policy(uint16_t * kt_freq_list,uint16_t * kt_zones,uint16_t zone_budget)1314 kalloc_type_apply_policy(uint16_t *kt_freq_list, uint16_t *kt_zones,
1315     uint16_t zone_budget)
1316 {
1317 	uint16_t total_sig = 0;
1318 	uint16_t min_sig = 0;
1319 	uint16_t assigned_zones = 0;
1320 	uint16_t remaining_zones = zone_budget;
1321 	uint16_t min_zones_per_size = 2;
1322 
1323 #if DEBUG || DEVELOPMENT
1324 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1325 		uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1326 		assert(zone_budget + current_zones <= MAX_ZONES);
1327 	}
1328 #endif
1329 
1330 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1331 		uint16_t sig_freq = kt_freq_list[i];
1332 		uint16_t min_zones = min_zones_per_size;
1333 		if (sig_freq < min_zones_per_size) {
1334 			min_zones = sig_freq;
1335 		}
1336 		total_sig += sig_freq;
1337 		kt_zones[i] = min_zones;
1338 		min_sig += min_zones;
1339 	}
1340 	if (remaining_zones > total_sig) {
1341 		remaining_zones = total_sig;
1342 	}
1343 	assert(remaining_zones >= min_sig);
1344 	remaining_zones -= min_sig;
1345 	total_sig -= min_sig;
1346 	assigned_zones += min_sig;
1347 	uint16_t modulo = 0;
1348 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1349 		uint16_t freq = kt_freq_list[i];
1350 		if (freq < min_zones_per_size) {
1351 			continue;
1352 		}
1353 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1354 		uint16_t n_zones = (uint16_t) numer / total_sig;
1355 
1356 		/*
1357 		 * Accumulate remainder and increment n_zones when it goes above
1358 		 * denominator
1359 		 */
1360 		modulo += numer % total_sig;
1361 		if (modulo >= total_sig) {
1362 			n_zones++;
1363 			modulo -= total_sig;
1364 		}
1365 
1366 		/*
1367 		 * Cap the total number of zones to the unique signatures
1368 		 */
1369 		if ((n_zones + min_zones_per_size) > freq) {
1370 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1371 			modulo += (extra_zones * total_sig);
1372 			n_zones -= extra_zones;
1373 		}
1374 		kt_zones[i] += n_zones;
1375 		assigned_zones += n_zones;
1376 	}
1377 
1378 	if (kt_options & KT_OPTIONS_DEBUG) {
1379 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1380 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1381 	}
1382 	return remaining_zones + min_sig - assigned_zones;
1383 }
1384 
1385 __startup_func
1386 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1387 kalloc_type_create_zone_for_size(zone_t *kt_zones_for_size,
1388     uint16_t kt_zones, vm_size_t z_size)
1389 {
1390 	zone_t p_zone = NULL;
1391 
1392 	for (uint16_t i = 0; i < kt_zones; i++) {
1393 		char *z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1394 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1395 		    (size_t) z_size);
1396 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1397 #if DEBUG || DEVELOPMENT
1398 		if (i != 0) {
1399 			p_zone->z_kt_next = z;
1400 		}
1401 #endif
1402 		p_zone = z;
1403 		kt_zones_for_size[i] = z;
1404 	}
1405 }
1406 #endif /* ZSECURITY_CONFIG(KALLOC_TYPE) */
1407 
1408 __startup_func
1409 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_freq_list,uint16_t * kt_freq_list_total,uint16_t * kt_shuffle_buf)1410 kalloc_type_create_zones_fixed(uint16_t *kt_skip_list_start,
1411     uint16_t *kt_freq_list, uint16_t *kt_freq_list_total,
1412     uint16_t *kt_shuffle_buf)
1413 {
1414 	uint16_t *kt_skip_list = kt_skip_list_start;
1415 	uint16_t p_j = 0;
1416 
1417 	uint16_t kt_zones[MAX_K_ZONE(k_zone_cfg)] = {};
1418 
1419 #if DEBUG || DEVELOPMENT
1420 	uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1421 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1422 #endif
1423 	/*
1424 	 * Apply policy to determine how many zones to create for each size
1425 	 * class.
1426 	 */
1427 #if ZSECURITY_CONFIG(KALLOC_TYPE)
1428 	kalloc_type_apply_policy(kt_freq_list, kt_zones,
1429 	    ZSECURITY_CONFIG_KT_BUDGET);
1430 	/*
1431 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1432 	 */
1433 	if (kt_options & KT_OPTIONS_DEBUG) {
1434 		printf("Size\ttotal_sig\tunique_signatures\tzones\n");
1435 		for (uint32_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1436 			printf("%u\t%u\t%u\t%u\n", k_zone_cfg[i].kzc_size,
1437 			    kt_freq_list_total[i], kt_freq_list[i], kt_zones[i]);
1438 		}
1439 	}
1440 #else /* ZSECURITY_CONFIG(KALLOC_TYPE) */
1441 #pragma unused(kt_freq_list_total)
1442 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
1443 
1444 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
1445 		uint16_t n_unique_sig = kt_freq_list[i];
1446 		vm_size_t z_size = k_zone_cfg[i].kzc_size;
1447 		uint16_t n_zones = kt_zones[i];
1448 
1449 		if (n_unique_sig == 0) {
1450 			continue;
1451 		}
1452 
1453 		assert(n_zones <= 20);
1454 		zone_t kt_zones_for_size[20] = {};
1455 #if ZSECURITY_CONFIG(KALLOC_TYPE)
1456 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1457 		    n_zones, z_size);
1458 #else /* ZSECURITY_CONFIG(KALLOC_TYPE) */
1459 		/*
1460 		 * Default to using KHEAP_DEFAULT if this feature is off
1461 		 */
1462 		n_zones = 1;
1463 		kt_zones_for_size[0] = kalloc_heap_zone_for_size(
1464 			KHEAP_DEFAULT, z_size);
1465 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
1466 
1467 #if DEBUG || DEVELOPMENT
1468 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1469 		/*
1470 		 * Ensure that there is enough space to shuffle n_unique_sig
1471 		 * indices
1472 		 */
1473 		assert(n_unique_sig < kt_shuffle_count);
1474 #endif
1475 
1476 		/*
1477 		 * Get a shuffled set of signature indices
1478 		 */
1479 		*kt_shuffle_buf = 0;
1480 		if (n_unique_sig > 1) {
1481 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1482 		}
1483 
1484 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1485 			/*
1486 			 * For every size that has unique types
1487 			 */
1488 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1489 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1490 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1491 			zone_t zone = kt_zones_for_size[j % n_zones];
1492 			kalloc_type_assign_zone_fixed(&kt_buffer[cur].ktv_fixed,
1493 			    &kt_buffer[end].ktv_fixed, zone);
1494 		}
1495 		p_j += n_unique_sig;
1496 	}
1497 }
1498 
1499 #if ZSECURITY_CONFIG(KALLOC_TYPE)
1500 __startup_func
1501 static void
kalloc_type_create_zones_var(void)1502 kalloc_type_create_zones_var(void)
1503 {
1504 	size_t kheap_zsize[KHEAP_NUM_ZONES] = {};
1505 	size_t step = KHEAP_STEP_START;
1506 	uint32_t start = 0;
1507 	/*
1508 	 * Manually initialize extra initial zones
1509 	 */
1510 #if !__LP64__
1511 	kheap_zsize[start] = 8;
1512 	start++;
1513 #endif
1514 	kheap_zsize[start] = 16;
1515 	kheap_zsize[start + 1] = KHEAP_START_SIZE;
1516 
1517 	/*
1518 	 * Compute sizes for remaining zones
1519 	 */
1520 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
1521 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
1522 		kheap_zsize[step_idx] = kheap_zsize[step_idx - 1] + step;
1523 		kheap_zsize[step_idx + 1] = kheap_zsize[step_idx] + step;
1524 		step *= 2;
1525 	}
1526 
1527 	/*
1528 	 * Create zones
1529 	 */
1530 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1531 	for (uint32_t i = KT_VAR_PTR_HEAP; i < kt_var_heaps + 1; i++) {
1532 		for (uint32_t j = 0; j < KHEAP_NUM_ZONES; j++) {
1533 			char *z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1534 			snprintf(z_name, MAX_ZONE_NAME, "%s%u.%zu", KHEAP_KT_VAR->kh_name, i,
1535 			    kheap_zsize[j]);
1536 			zone_create_flags_t flags = ZC_KASAN_NOREDZONE |
1537 			    ZC_KASAN_NOQUARANTINE | ZC_KALLOC_TYPE |
1538 			    ZC_PGZ_USE_GUARDS;
1539 
1540 			zone_t z_ptr = zone_create_ext(z_name, kheap_zsize[j], flags,
1541 			    ZONE_ID_ANY, ^(zone_t z){
1542 				zone_security_array[zone_index(z)].z_kheap_id = KHEAP_ID_KT_VAR;
1543 			});
1544 			if (j == 0) {
1545 				kalloc_type_heap_array[i].kh_zstart = zone_index(z_ptr);
1546 			}
1547 		}
1548 	}
1549 
1550 	/*
1551 	 * All variable kalloc type allocations are collapsed into a single
1552 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1553 	 */
1554 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1555 	zone_view_count += 1;
1556 }
1557 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
1558 
1559 
1560 __startup_func
1561 static void
kalloc_type_view_init_fixed(void)1562 kalloc_type_view_init_fixed(void)
1563 {
1564 	kalloc_type_hash_seed = (uint32_t) early_random();
1565 	/*
1566 	 * Parse __kalloc_type sections and build array of pointers to
1567 	 * all kalloc type views in kt_buffer.
1568 	 */
1569 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1570 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1571 
1572 #if DEBUG || DEVELOPMENT
1573 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1574 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1575 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1576 #endif
1577 
1578 	/*
1579 	 * Sort based on size class and signature
1580 	 */
1581 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1582 	    kalloc_type_cmp_fixed);
1583 
1584 	/*
1585 	 * Build a skip list that holds starts of unique signatures and a
1586 	 * frequency list of number of unique and total signatures per kalloc
1587 	 * size class
1588 	 */
1589 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1590 	uint16_t kt_freq_list[MAX_K_ZONE(k_zone_cfg)] = { 0 };
1591 	uint16_t kt_freq_list_total[MAX_K_ZONE(k_zone_cfg)] = { 0 };
1592 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1593 		kt_skip_list_start, kt_freq_list, kt_freq_list_total, kt_count);
1594 
1595 	/*
1596 	 * Create zones based on signatures
1597 	 */
1598 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_freq_list,
1599 	    kt_freq_list_total, kt_shuffle_buf);
1600 }
1601 
1602 #if ZSECURITY_CONFIG(KALLOC_TYPE)
1603 __startup_func
1604 static void
kalloc_type_view_init_var(void)1605 kalloc_type_view_init_var(void)
1606 {
1607 	/*
1608 	 * Zones are created prior to parsing the views as zone budget is fixed
1609 	 * per sizeclass and special types identified while parsing are redirected
1610 	 * as they are discovered.
1611 	 */
1612 	kalloc_type_create_zones_var();
1613 
1614 	/*
1615 	 * Parse __kalloc_var sections and build array of pointers to views that
1616 	 * aren't rediected in kt_buffer.
1617 	 */
1618 	kt_count = kalloc_type_view_parse(KTV_VAR);
1619 	assert(kt_count < UINT32_MAX);
1620 
1621 #if DEBUG || DEVELOPMENT
1622 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
1623 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
1624 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1625 #endif
1626 
1627 	/*
1628 	 * Sort based on size class and signature
1629 	 */
1630 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
1631 	    kalloc_type_cmp_var);
1632 
1633 	/*
1634 	 * Build a skip list that holds starts of unique signatures
1635 	 */
1636 	uint32_t *kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
1637 	uint32_t unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start);
1638 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP;
1639 	/*
1640 	 * If we have only one heap then other elements share heap with pointer
1641 	 * arrays
1642 	 */
1643 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
1644 		fixed_heaps = KT_VAR_PTR_HEAP;
1645 	}
1646 
1647 	for (uint32_t i = 1; i <= unique_sig; i++) {
1648 		uint32_t heap_id = kmem_get_random16(kt_var_heaps - fixed_heaps) +
1649 		    fixed_heaps;
1650 		uint32_t start = kt_skip_list_start[i - 1];
1651 		uint32_t end = kt_skip_list_start[i];
1652 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1653 		    &kt_buffer[end].ktv_var, heap_id);
1654 	}
1655 }
1656 #else /* ZSECURITY_CONFIG(KALLOC_TYPE) */
1657 __startup_func
1658 static void
kalloc_type_view_init_var(void)1659 kalloc_type_view_init_var(void)
1660 {
1661 	*KHEAP_KT_VAR = *KHEAP_DEFAULT;
1662 }
1663 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
1664 
1665 __startup_func
1666 static void
kalloc_type_views_init(void)1667 kalloc_type_views_init(void)
1668 {
1669 	/*
1670 	 * Allocate scratch space to parse kalloc_type_views and create
1671 	 * other structures necessary to process them.
1672 	 */
1673 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
1674 
1675 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
1676 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_KALLOC);
1677 
1678 	/*
1679 	 * Handle fixed size views
1680 	 */
1681 	kalloc_type_view_init_fixed();
1682 
1683 	/*
1684 	 * Reset
1685 	 */
1686 	bzero(kt_buffer, kt_scratch_size);
1687 	kt_count = max_count;
1688 
1689 	/*
1690 	 * Handle variable size views
1691 	 */
1692 	kalloc_type_view_init_var();
1693 
1694 	/*
1695 	 * Free resources used
1696 	 */
1697 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
1698 }
1699 STARTUP(ZALLOC, STARTUP_RANK_FOURTH, kalloc_type_views_init);
1700 
1701 #pragma mark accessors
1702 
1703 #define KFREE_ABSURD_SIZE \
1704 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
1705 
1706 /*
1707  * This is used to sanitize size for kasan or OOB adjustments.
1708  * It assumes that size is [0, KFREE_ABSURD_SIZE) valued.
1709  */
1710 __header_always_inline vm_size_t
K_SANE_SIZE(vm_size_t size)1711 K_SANE_SIZE(vm_size_t size)
1712 {
1713 	return MAX(size, 1);
1714 }
1715 
1716 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)1717 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
1718 {
1719 	thread_t thr = current_thread();
1720 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
1721 }
1722 
1723 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)1724 KALLOC_ZINFO_SFREE(vm_size_t bytes)
1725 {
1726 	thread_t thr = current_thread();
1727 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
1728 }
1729 
1730 zone_t
kalloc_heap_zone_for_size(kalloc_heap_t kheap,vm_size_t size)1731 kalloc_heap_zone_for_size(kalloc_heap_t kheap, vm_size_t size)
1732 {
1733 	struct kheap_zones *khz = kheap->kh_zones;
1734 	zone_t z = ZONE_NULL;
1735 
1736 	if (size < MAX_SIZE_ZDLUT) {
1737 		z = khz->k_zone[khz->dlut[INDEX_ZDLUT(size)]];
1738 		__builtin_assume(z != ZONE_NULL);
1739 	} else if (size <= KHEAP_MAX_SIZE) {
1740 		uint32_t zindex = khz->k_zindex_start;
1741 		while (khz->cfg[zindex].kzc_size < size) {
1742 			zindex++;
1743 		}
1744 		z = khz->k_zone[zindex];
1745 		__builtin_assume(z != ZONE_NULL);
1746 	}
1747 
1748 	return z;
1749 }
1750 
1751 static zone_t
kalloc_type_zone_for_size(kalloc_type_var_view_t kt_view,size_t size)1752 kalloc_type_zone_for_size(kalloc_type_var_view_t kt_view, size_t size)
1753 {
1754 	uint32_t zid = kt_view->kt_heap_start, idx;
1755 
1756 	if (size <= KHEAP_START_SIZE) {
1757 #if !__LP64__
1758 		zid  += (size > 8);
1759 #endif
1760 		zid  += (size > 16);
1761 	} else if (size <= KHEAP_MAX_SIZE) {
1762 		/*
1763 		 * . log2down(size - 1) is log2up(size) - 1
1764 		 * . (size - 1) >> (log2down(size - 1) - 1) is either 0x2 or 0x3
1765 		 */
1766 		size -= 1;
1767 		idx   = kmem_log2down((uint32_t)size);
1768 		zid  += KHEAP_EXTRA_ZONES +
1769 		    2 * (idx - KHEAP_START_IDX) +
1770 		    ((uint32_t)size >> (idx - 1)) - 2;
1771 	} else {
1772 		return ZONE_NULL;
1773 	}
1774 
1775 	return zone_array + zid;
1776 }
1777 
1778 static zone_t
kalloc_zone_for_size(kalloc_heap_t kheap,kalloc_type_var_view_t kt_view,vm_size_t size,bool forcopyin)1779 kalloc_zone_for_size(
1780 	kalloc_heap_t           kheap,
1781 	kalloc_type_var_view_t  kt_view,
1782 	vm_size_t               size,
1783 	bool                    forcopyin)
1784 {
1785 	vm_size_t max_size = KHEAP_MAX_SIZE;
1786 
1787 	if (kt_view && kheap->kh_heap_id == KHEAP_ID_KT_VAR) {
1788 		return kalloc_type_zone_for_size(kt_view, size);
1789 	}
1790 
1791 	if (forcopyin) {
1792 #if __x86_64__
1793 		/*
1794 		 * On Intel, the OSData() ABI used to allocate
1795 		 * from the kernel map starting at PAGE_SIZE.
1796 		 *
1797 		 * If only vm_map_copyin() or a wrapper is used,
1798 		 * then everything will work fine because vm_map_copy_t
1799 		 * will perform an actual copy if the data is smaller
1800 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
1801 		 *
1802 		 * However, if anyone is trying to call mach_vm_remap(),
1803 		 * then bad things (TM) happen.
1804 		 *
1805 		 * Avoid this by preserving the ABI and moving
1806 		 * to kalloc_large() earlier.
1807 		 *
1808 		 * Any recent code really ought to use IOMemoryDescriptor
1809 		 * for this purpose however.
1810 		 */
1811 		max_size = PAGE_SIZE - 1;
1812 #endif
1813 	}
1814 
1815 	if (size <= max_size) {
1816 		return kalloc_heap_zone_for_size(kheap, size);
1817 	}
1818 
1819 	return ZONE_NULL;
1820 }
1821 
1822 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,bool forcopyin)1823 kheap_size_from_zone(void *addr, vm_size_t size, bool forcopyin)
1824 {
1825 	vm_size_t max_size = KHEAP_MAX_SIZE;
1826 
1827 #if __x86_64__
1828 	/*
1829 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size()
1830 	 * behavior, then the element could have a PAGE_SIZE reported size,
1831 	 * yet still be from a zone for Z_MAY_COPYINMAP.
1832 	 */
1833 	if (forcopyin) {
1834 		if (size == PAGE_SIZE &&
1835 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
1836 			return true;
1837 		}
1838 
1839 		max_size = PAGE_SIZE - 1;
1840 	}
1841 #else
1842 #pragma unused(addr, forcopyin)
1843 #endif
1844 
1845 	return size <= max_size;
1846 }
1847 
1848 
1849 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)1850 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
1851 {
1852 	kmem_guard_t guard = {
1853 		.kmg_atomic      = true,
1854 		.kmg_tag         = tag,
1855 		.kmg_type_hash   = type_hash,
1856 		.kmg_context     = os_hash_kernel_pointer(owner),
1857 	};
1858 
1859 	/*
1860 	 * TODO: this use is really not sufficiently smart.
1861 	 */
1862 
1863 	return guard;
1864 }
1865 
1866 #pragma mark kalloc
1867 
1868 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_var_view_t kt_view,bool kt_free __unused)1869 kalloc_type_get_heap(kalloc_type_var_view_t kt_view, bool kt_free __unused)
1870 {
1871 	kalloc_heap_t fallback = KHEAP_DEFAULT;
1872 
1873 	/*
1874 	 * Redirect data-only views
1875 	 */
1876 	if (kalloc_type_is_data_fast(kalloc_type_func(KTV_VAR, get_atom,
1877 	    (vm_offset_t) kt_view, false))) {
1878 		return KHEAP_DATA_BUFFERS;
1879 	}
1880 
1881 	if (kt_view->kt_flags & KT_PROCESSED) {
1882 		return KHEAP_KT_VAR;
1883 	}
1884 
1885 	/*
1886 	 * Views from kexts not in BootKC on macOS
1887 	 */
1888 #if !ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
1889 	if (kt_free) {
1890 		fallback = KHEAP_ANY;
1891 	}
1892 #endif
1893 
1894 	return fallback;
1895 }
1896 
1897 __attribute__((noinline))
1898 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)1899 kalloc_large(
1900 	kalloc_heap_t         kheap,
1901 	vm_size_t             req_size,
1902 	zalloc_flags_t        flags,
1903 	uint16_t              kt_hash,
1904 	void                 *owner __unused)
1905 {
1906 	int kma_flags = KMA_NONE;
1907 	vm_tag_t tag;
1908 	vm_offset_t addr, size;
1909 
1910 	if (flags & Z_NOFAIL) {
1911 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
1912 		    (size_t)req_size);
1913 	}
1914 
1915 	/*
1916 	 * kmem_alloc could block so we return if noblock
1917 	 *
1918 	 * also, reject sizes larger than our address space is quickly,
1919 	 * as kt_size or IOMallocArraySize() expect this.
1920 	 */
1921 	if ((flags & Z_NOWAIT) ||
1922 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
1923 		return (struct kalloc_result){ };
1924 	}
1925 
1926 #ifndef __x86_64__
1927 	/*
1928 	 * (73465472) on Intel we didn't use to pass this flag,
1929 	 * which in turned allowed kalloc_large() memory to be shared
1930 	 * with user directly.
1931 	 *
1932 	 * We're bound by this unfortunate ABI.
1933 	 */
1934 	if ((flags & Z_MAY_COPYINMAP) == 0) {
1935 		kma_flags |= KMA_KOBJECT;
1936 	} else {
1937 		assert(kheap == KHEAP_DATA_BUFFERS);
1938 	}
1939 #endif
1940 	if (flags & Z_NOPAGEWAIT) {
1941 		kma_flags |= KMA_NOPAGEWAIT;
1942 	}
1943 	if (flags & Z_ZERO) {
1944 		kma_flags |= KMA_ZERO;
1945 	}
1946 	if (kheap == KHEAP_DATA_BUFFERS) {
1947 		kma_flags |= KMA_DATA;
1948 	}
1949 
1950 	tag = zalloc_flags_get_tag(flags);
1951 	if (flags & Z_VM_TAG_BT_BIT) {
1952 		tag = vm_tag_bt() ?: tag;
1953 	}
1954 	if (tag == VM_KERN_MEMORY_NONE) {
1955 		tag = kheap->kh_tag;
1956 	}
1957 
1958 #if KASAN_KALLOC
1959 	/* large allocation - use guard pages instead of small redzones */
1960 	size = round_page(kasan_alloc_resize(req_size) + ptoa(2));
1961 #else
1962 	size = round_page(req_size);
1963 	if (flags & Z_FULLSIZE) {
1964 		req_size = size;
1965 	}
1966 #endif
1967 
1968 	addr = kmem_alloc_guard(kernel_map, size, 0,
1969 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
1970 
1971 	if (addr != 0) {
1972 		counter_inc(&kalloc_large_count);
1973 		counter_add(&kalloc_large_total, size);
1974 		KALLOC_ZINFO_SALLOC(size);
1975 #if KASAN_KALLOC
1976 		/* fixup the return address to skip the redzone */
1977 		addr = kasan_alloc(addr, size, req_size, PAGE_SIZE);
1978 #else
1979 #endif
1980 	} else {
1981 		addr = 0;
1982 	}
1983 
1984 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
1985 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
1986 }
1987 
1988 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)1989 kalloc_zone(
1990 	zone_t                  z,
1991 	zone_stats_t            zstats,
1992 	zalloc_flags_t          flags,
1993 	vm_size_t               req_size)
1994 {
1995 	struct kalloc_result kr;
1996 	vm_size_t esize;
1997 
1998 	esize   = zone_elem_size(z);
1999 	kr.addr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN, esize);
2000 	kr.size = req_size;
2001 
2002 	if (__probable(kr.addr)) {
2003 #if !KASAN_KALLOC
2004 		if (flags & Z_FULLSIZE) {
2005 			kr.size = req_size = esize;
2006 		}
2007 #endif /* !KASAN_KALLOC */
2008 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2009 		kr.addr = zone_element_pgz_oob_adjust(kr, esize);
2010 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2011 #if KASAN_KALLOC
2012 		kr.addr = (void *)kasan_alloc((vm_offset_t)kr.addr, esize,
2013 		    kr.size, KASAN_GUARD_SIZE);
2014 		__nosan_bzero((char *)kr.addr, kr.size);
2015 #endif /* KASAN_KALLOC */
2016 #if CONFIG_KERNEL_TBI && KASAN_TBI
2017 		/*
2018 		 * Kasan-TBI at least needs to tag one byte so that
2019 		 * we can prove the allocation was live at kfree_ext()
2020 		 * time by doing a manual __asan_loadN check.
2021 		 */
2022 		kr.addr = (void *)kasan_tbi_tag_zalloc((vm_offset_t)kr.addr,
2023 		    esize, K_SANE_SIZE(kr.size), false);
2024 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
2025 	}
2026 
2027 	DTRACE_VM3(kalloc, vm_size_t, kr.size, vm_size_t, req_size, void*, kr.addr);
2028 	return kr;
2029 }
2030 
2031 struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t req_size,zalloc_flags_t flags,void * owner)2032 kalloc_ext(
2033 	void                   *kheap_or_kt_view,
2034 	vm_size_t               req_size,
2035 	zalloc_flags_t          flags,
2036 	void                   *owner)
2037 {
2038 	kalloc_type_var_view_t kt_view;
2039 	kalloc_heap_t kheap;
2040 	zone_stats_t zstats = NULL;
2041 	vm_size_t size;
2042 	zone_t z;
2043 	uint16_t kt_hash;
2044 
2045 	if (kt_is_var_view(kheap_or_kt_view)) {
2046 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2047 		kheap   = kalloc_type_get_heap(kt_view, false);
2048 		/*
2049 		 * Use stats from view if present, else use stats from kheap.
2050 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2051 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2052 		 * use stats from the respective zones.
2053 		 */
2054 		zstats  = kt_view->kt_stats;
2055 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2056 	} else {
2057 		kt_view = NULL;
2058 		kheap   = kheap_or_kt_view;
2059 		kt_hash = kheap->kh_type_hash;
2060 	}
2061 
2062 	if (!zstats) {
2063 		zstats = kheap->kh_stats;
2064 	}
2065 
2066 #if KASAN_KALLOC
2067 	/*
2068 	 * Kasan for kalloc heaps will put the redzones *inside*
2069 	 * the allocation, and hence augment its size.
2070 	 *
2071 	 * kalloc heaps do not use zone_t::z_kasan_redzone.
2072 	 */
2073 	size = kasan_alloc_resize(req_size);
2074 #else
2075 	size = req_size;
2076 #endif
2077 
2078 	z = kalloc_zone_for_size(kheap, kt_view, size, flags & Z_MAY_COPYINMAP);
2079 	if (z) {
2080 		return kalloc_zone(z, zstats, flags, req_size);
2081 	} else {
2082 		return kalloc_large(kheap, req_size, flags, kt_hash, owner);
2083 	}
2084 }
2085 
2086 void *
2087 kalloc_external(vm_size_t size);
2088 void *
kalloc_external(vm_size_t size)2089 kalloc_external(vm_size_t size)
2090 {
2091 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2092 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2093 }
2094 
2095 void *
2096 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2097 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2098 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2099 {
2100 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2101 	return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags);
2102 }
2103 
2104 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
2105 
2106 __abortlike
2107 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2108 kalloc_data_require_panic(void *addr, vm_size_t size)
2109 {
2110 	zone_id_t zid = zone_id_for_element(addr, size);
2111 
2112 	if (zid != ZONE_ID_INVALID) {
2113 		zone_t z = &zone_array[zid];
2114 		zone_security_flags_t zsflags = zone_security_array[zid];
2115 
2116 		if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
2117 			panic("kalloc_data_require failed: address %p in [%s%s]",
2118 			    addr, zone_heap_name(z), zone_name(z));
2119 		}
2120 
2121 		panic("kalloc_data_require failed: address %p in [%s%s], "
2122 		    "size too large %zd > %zd", addr,
2123 		    zone_heap_name(z), zone_name(z),
2124 		    (size_t)size, (size_t)zone_elem_size(z));
2125 	} else {
2126 		panic("kalloc_data_require failed: address %p not in zone native map",
2127 		    addr);
2128 	}
2129 }
2130 
2131 __abortlike
2132 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2133 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2134 {
2135 	zone_id_t zid = zone_id_for_element(addr, size);
2136 
2137 	if (zid != ZONE_ID_INVALID) {
2138 		zone_t z = &zone_array[zid];
2139 		zone_security_flags_t zsflags = zone_security_array[zid];
2140 
2141 		switch (zsflags.z_kheap_id) {
2142 		case KHEAP_ID_NONE:
2143 		case KHEAP_ID_DATA_BUFFERS:
2144 		case KHEAP_ID_KT_VAR:
2145 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2146 			    addr, zone_heap_name(z), zone_name(z));
2147 		default:
2148 			break;
2149 		}
2150 
2151 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2152 		    "size too large %zd > %zd", addr,
2153 		    zone_heap_name(z), zone_name(z),
2154 		    (size_t)size, (size_t)zone_elem_size(z));
2155 	} else {
2156 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2157 		    addr);
2158 	}
2159 }
2160 
2161 #endif /* ZSECURITY_CONFIG(SUBMAP_USER_DATA) */
2162 
2163 void
kalloc_data_require(void * addr,vm_size_t size)2164 kalloc_data_require(void *addr, vm_size_t size)
2165 {
2166 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
2167 	zone_id_t zid = zone_id_for_element(addr, size);
2168 
2169 	if (zid != ZONE_ID_INVALID) {
2170 		zone_t z = &zone_array[zid];
2171 		zone_security_flags_t zsflags = zone_security_array[zid];
2172 		if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS &&
2173 		    size <= zone_elem_size(z)) {
2174 			return;
2175 		}
2176 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2177 	    (vm_address_t)pgz_decode(addr, size), size)) {
2178 		return;
2179 	}
2180 
2181 	kalloc_data_require_panic(addr, size);
2182 #else
2183 #pragma unused(addr, size)
2184 #endif
2185 }
2186 
2187 void
kalloc_non_data_require(void * addr,vm_size_t size)2188 kalloc_non_data_require(void *addr, vm_size_t size)
2189 {
2190 #if ZSECURITY_CONFIG(SUBMAP_USER_DATA)
2191 	zone_id_t zid = zone_id_for_element(addr, size);
2192 
2193 	if (zid != ZONE_ID_INVALID) {
2194 		zone_t z = &zone_array[zid];
2195 		zone_security_flags_t zsflags = zone_security_array[zid];
2196 		switch (zsflags.z_kheap_id) {
2197 		case KHEAP_ID_NONE:
2198 			if (!zsflags.z_kalloc_type) {
2199 				break;
2200 			}
2201 			OS_FALLTHROUGH;
2202 		case KHEAP_ID_DEFAULT:
2203 		case KHEAP_ID_KT_VAR:
2204 			if (size < zone_elem_size(z)) {
2205 				return;
2206 			}
2207 			break;
2208 		default:
2209 			break;
2210 		}
2211 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2212 	    (vm_address_t)pgz_decode(addr, size), size)) {
2213 		return;
2214 	}
2215 
2216 	kalloc_non_data_require_panic(addr, size);
2217 #else
2218 #pragma unused(addr, size)
2219 #endif
2220 }
2221 
2222 void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2223 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2224 {
2225 	/*
2226 	 * Callsites from a kext that aren't in the BootKC on macOS or
2227 	 * any callsites on armv7 are not processed during startup,
2228 	 * default to using kheap_alloc
2229 	 *
2230 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2231 	 * NULL as we need to use the vm for the allocation
2232 	 *
2233 	 */
2234 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2235 		vm_size_t size = kalloc_type_get_size(kt_view->kt_size);
2236 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2237 		return kalloc_ext(KHEAP_DEFAULT, size, flags, NULL).addr;
2238 	}
2239 
2240 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2241 	return zalloc_flags(kt_view, flags);
2242 }
2243 
2244 void *
2245 kalloc_type_var_impl_external(
2246 	kalloc_type_var_view_t  kt_view,
2247 	vm_size_t               size,
2248 	zalloc_flags_t          flags,
2249 	void                   *owner);
2250 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2251 kalloc_type_var_impl_external(
2252 	kalloc_type_var_view_t  kt_view,
2253 	vm_size_t               size,
2254 	zalloc_flags_t          flags,
2255 	void                   *owner)
2256 {
2257 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2258 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2259 }
2260 
2261 #pragma mark kfree
2262 
2263 __abortlike
2264 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2265 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2266 {
2267 	zone_security_flags_t zsflags = zone_security_config(z);
2268 	const char *kheap_name = "";
2269 
2270 	if (kheap == KHEAP_ANY) {
2271 		kheap_name = "KHEAP_ANY (default/kalloc type var/data)";
2272 	} else {
2273 		kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2274 	}
2275 
2276 	if (zsflags.z_kalloc_type) {
2277 		panic_include_kalloc_types = true;
2278 		kalloc_type_src_zone = z;
2279 		panic("kfree: addr %p found in kalloc type zone '%s'"
2280 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2281 	}
2282 
2283 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2284 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2285 		    data, size, zone_heap_name(z), z->z_name);
2286 	} else {
2287 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2288 		    data, size, zone_heap_name(z), kheap_name);
2289 	}
2290 }
2291 
2292 __abortlike
2293 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2294 kfree_size_confusion_panic(zone_t z, void *data,
2295     size_t oob_offs, size_t size, size_t zsize)
2296 {
2297 	if (z) {
2298 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2299 		    "with elem_size %zd",
2300 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2301 	} else {
2302 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2303 		    data, size, oob_offs);
2304 	}
2305 }
2306 
2307 __abortlike
2308 static void
kfree_size_invalid_panic(void * data,size_t size)2309 kfree_size_invalid_panic(void *data, size_t size)
2310 {
2311 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2312 	    data, size);
2313 }
2314 
2315 __abortlike
2316 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2317 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2318     size_t max_size)
2319 {
2320 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2321 	    data, size, min_size, max_size);
2322 }
2323 
2324 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2325 kfree_size_require(
2326 	kalloc_heap_t kheap,
2327 	void *addr,
2328 	vm_size_t min_size,
2329 	vm_size_t max_size)
2330 {
2331 	assert3u(min_size, <=, max_size);
2332 #if KASAN_KALLOC
2333 	max_size = kasan_alloc_resize(max_size);
2334 #endif
2335 	zone_t max_zone = kalloc_heap_zone_for_size(kheap, max_size);
2336 	vm_size_t max_zone_size = max_zone->z_elem_size;
2337 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2338 	if (elem_size > max_zone_size || elem_size < min_size) {
2339 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2340 	}
2341 }
2342 
2343 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2344 kfree_large(
2345 	vm_offset_t             addr,
2346 	vm_size_t               size,
2347 	kmf_flags_t             flags,
2348 	void                   *owner)
2349 {
2350 #if KASAN_KALLOC
2351 	/* TODO: quarantine for kasan large that works with guards */
2352 	kasan_poison_range(addr, size, ASAN_VALID);
2353 #endif
2354 
2355 	size = kmem_free_guard(kernel_map, addr, size, flags,
2356 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2357 
2358 	counter_dec(&kalloc_large_count);
2359 	counter_add(&kalloc_large_total, -(uint64_t)size);
2360 	KALLOC_ZINFO_SFREE(size);
2361 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2362 }
2363 
2364 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2365 kfree_zone(
2366 	void                   *kheap_or_kt_view __unsafe_indexable,
2367 	void                   *data,
2368 	vm_size_t               size,
2369 	zone_t                  z,
2370 	vm_size_t               zsize)
2371 {
2372 	zone_security_flags_t zsflags = zone_security_config(z);
2373 	kalloc_type_var_view_t kt_view;
2374 	kalloc_heap_t kheap;
2375 	zone_stats_t zstats;
2376 
2377 	if (kt_is_var_view(kheap_or_kt_view)) {
2378 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2379 		kheap   = kalloc_type_get_heap(kt_view, true);
2380 		zstats  = kt_view->kt_stats;
2381 	} else {
2382 		kt_view = NULL;
2383 		kheap   = kheap_or_kt_view;
2384 		zstats  = kheap ? kheap->kh_stats : NULL;
2385 	}
2386 
2387 	zsflags = zone_security_config(z);
2388 	if (kheap != KHEAP_ANY && kheap != KHEAP_KT_VAR) {
2389 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2390 			kfree_heap_confusion_panic(kheap, data, size, z);
2391 		}
2392 	} else if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
2393 		/*
2394 		 * Allocations from kalloc.type.var zones use stats from KHEAP_KT_VAR
2395 		 * if they don't have private accounting. We need to use the kheap_id here
2396 		 * as we allow cross frees between default and KT_VAR and we may use
2397 		 * KHEAP_ANY which will skew the stats if we relie on stats from kheap.
2398 		 */
2399 		if (zstats == NULL) {
2400 			zstats = KHEAP_KT_VAR->kh_stats;
2401 		}
2402 	} else if (zsflags.z_kheap_id != KHEAP_ID_DEFAULT &&
2403 	    (kt_options & KT_OPTIONS_LOOSE_FREE &&
2404 	    zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS)) {
2405 		kfree_heap_confusion_panic(kheap, data, size, z);
2406 	}
2407 
2408 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2409 
2410 #if KASAN_KALLOC
2411 	/*
2412 	 * Hand the allocation to the quarantine, data might become NULL.
2413 	 * FIXME: this breaks accounting because we do not remember `zstats`
2414 	 */
2415 	kasan_free(&data, &zsize, KASAN_HEAP_KALLOC, &z, size);
2416 	if (data == NULL) {
2417 		return;
2418 	}
2419 #else
2420 	/* needs to be __nosan because the user size might be partial */
2421 	__nosan_bzero(data, zsize);
2422 #endif /* !KASAN_ZALLOC */
2423 	zfree_ext(z, zstats ?: z->z_stats, data, zsize);
2424 }
2425 
2426 void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2427 kfree_ext(
2428 	void                   *kheap_or_kt_view,
2429 	void                   *data,
2430 	vm_size_t               size)
2431 {
2432 	vm_size_t adj_size, bucket_size;
2433 	zone_t z;
2434 
2435 	if (data == NULL) {
2436 		return;
2437 	}
2438 
2439 	if (size > KFREE_ABSURD_SIZE) {
2440 		kfree_size_invalid_panic(data, size);
2441 	}
2442 
2443 #if KASAN_KALLOC
2444 	adj_size = kasan_alloc_resize(size);
2445 	kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC);
2446 #else
2447 	adj_size = size;
2448 #if CONFIG_KERNEL_TBI && KASAN_TBI
2449 	__asan_loadN((vm_offset_t)data, K_SANE_SIZE(size));
2450 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
2451 #endif /* KASAN_KALLOC */
2452 
2453 	if (adj_size <= KHEAP_MAX_SIZE) {
2454 		vm_size_t oob_offs;
2455 
2456 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2457 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2458 			kfree_size_confusion_panic(z, data,
2459 			    oob_offs, size, bucket_size);
2460 		}
2461 
2462 		data = (char *)data - oob_offs;
2463 		__builtin_assume(z != ZONE_NULL);
2464 	} else {
2465 		z = ZONE_NULL;
2466 	}
2467 
2468 #if KASAN_KALLOC
2469 	/*
2470 	 * Resize back to the real allocation size.
2471 	 */
2472 	vm_size_t real_size = size;
2473 	data = (void *)kasan_dealloc((vm_address_t)data, &real_size);
2474 	if (z) {
2475 		bucket_size = real_size;
2476 	} else {
2477 		size = real_size;
2478 	}
2479 #endif /* KASAN_KALLOC */
2480 
2481 	if (z) {
2482 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2483 	} else {
2484 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2485 	}
2486 }
2487 
2488 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2489 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2490 {
2491 	if (data == NULL) {
2492 		return;
2493 	}
2494 
2495 #if KASAN_KALLOC
2496 	kfree_ext(kheap, data, kasan_user_size((vm_offset_t)data));
2497 #else
2498 	vm_offset_t oob_offs;
2499 	vm_size_t size;
2500 	zone_t z;
2501 
2502 #if CONFIG_KERNEL_TBI && KASAN_TBI
2503 	__asan_loadN((vm_offset_t)data, KALLOC_MINSIZE);
2504 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
2505 
2506 	size = zone_element_size(data, &z, true, &oob_offs);
2507 	if (size) {
2508 		data = (char *)data - oob_offs;
2509 		kfree_zone(kheap, data, size - oob_offs, z, size);
2510 	} else {
2511 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2512 	}
2513 #endif /* !KASAN_KALLOC */
2514 }
2515 
2516 void
2517 kfree_external(void *addr, vm_size_t size);
2518 void
kfree_external(void * addr,vm_size_t size)2519 kfree_external(void *addr, vm_size_t size)
2520 {
2521 	kfree_ext(KHEAP_ANY, addr, size);
2522 }
2523 
2524 void
2525 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2526     vm_size_t min_sz, vm_size_t max_sz)
2527 {
2528 	if (__improbable(addr == NULL)) {
2529 		return;
2530 	}
2531 	kfree_size_require(kheap, addr, min_sz, max_sz);
2532 	kfree_addr_ext(kheap, addr);
2533 }
2534 
2535 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)2536 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
2537 {
2538 	/*
2539 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
2540 	 * processed during startup so default to using kheap_alloc
2541 	 *
2542 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2543 	 * NULL as we need to use the vm for the allocation/free
2544 	 */
2545 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2546 		return kheap_free(KHEAP_DEFAULT, ptr,
2547 		           kalloc_type_get_size(kt_view->kt_size));
2548 	}
2549 	if (__improbable(ptr == NULL)) {
2550 		return;
2551 	}
2552 	return zfree(kt_view, ptr);
2553 }
2554 
2555 void
2556 kfree_type_var_impl_external(
2557 	kalloc_type_var_view_t  kt_view,
2558 	void                   *ptr,
2559 	vm_size_t               size);
2560 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)2561 kfree_type_var_impl_external(
2562 	kalloc_type_var_view_t  kt_view,
2563 	void                   *ptr,
2564 	vm_size_t               size)
2565 {
2566 	return kfree_type_var_impl(kt_view, ptr, size);
2567 }
2568 
2569 void
2570 kfree_data_external(void *ptr, vm_size_t size);
2571 void
kfree_data_external(void * ptr,vm_size_t size)2572 kfree_data_external(void *ptr, vm_size_t size)
2573 {
2574 	return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2575 }
2576 
2577 void
2578 kfree_data_addr_external(void *ptr);
2579 void
kfree_data_addr_external(void * ptr)2580 kfree_data_addr_external(void *ptr)
2581 {
2582 	return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2583 }
2584 
2585 #pragma mark krealloc
2586 
2587 __abortlike
2588 static void
krealloc_size_invalid_panic(void * data,size_t size)2589 krealloc_size_invalid_panic(void *data, size_t size)
2590 {
2591 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
2592 	    data, size);
2593 }
2594 
2595 __attribute__((noinline))
2596 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2597 krealloc_large(
2598 	kalloc_heap_t         kheap,
2599 	vm_offset_t           addr,
2600 	vm_size_t             old_size,
2601 	vm_size_t             new_size,
2602 	zalloc_flags_t        flags,
2603 	uint16_t              kt_hash,
2604 	void                 *owner __unused)
2605 {
2606 	int kmr_flags = KMR_FREEOLD;
2607 	vm_size_t new_req_size = new_size;
2608 	vm_size_t old_req_size = old_size;
2609 	uint64_t delta;
2610 	kmem_return_t kmr;
2611 	vm_tag_t tag;
2612 
2613 	if (flags & Z_NOFAIL) {
2614 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2615 		    (size_t)new_req_size);
2616 	}
2617 
2618 	/*
2619 	 * kmem_alloc could block so we return if noblock
2620 	 *
2621 	 * also, reject sizes larger than our address space is quickly,
2622 	 * as kt_size or IOMallocArraySize() expect this.
2623 	 */
2624 	if ((flags & Z_NOWAIT) ||
2625 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2626 		return (struct kalloc_result){ };
2627 	}
2628 
2629 #ifndef __x86_64__
2630 	/*
2631 	 * (73465472) on Intel we didn't use to pass this flag,
2632 	 * which in turned allowed kalloc_large() memory to be shared
2633 	 * with user directly.
2634 	 *
2635 	 * We're bound by this unfortunate ABI.
2636 	 */
2637 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2638 		kmr_flags |= KMR_KOBJECT;
2639 	} else {
2640 		assert(kheap == KHEAP_DATA_BUFFERS);
2641 	}
2642 #endif
2643 	if (flags & Z_NOPAGEWAIT) {
2644 		kmr_flags |= KMR_NOPAGEWAIT;
2645 	}
2646 	if (flags & Z_ZERO) {
2647 		kmr_flags |= KMR_ZERO;
2648 	}
2649 	if (kheap == KHEAP_DATA_BUFFERS) {
2650 		kmr_flags |= KMR_DATA;
2651 	}
2652 	if (flags & Z_REALLOCF) {
2653 		kmr_flags |= KMR_REALLOCF;
2654 	}
2655 
2656 	tag = zalloc_flags_get_tag(flags);
2657 	if (flags & Z_VM_TAG_BT_BIT) {
2658 		tag = vm_tag_bt() ?: tag;
2659 	}
2660 	if (tag == VM_KERN_MEMORY_NONE) {
2661 		tag = kheap->kh_tag;
2662 	}
2663 
2664 #if KASAN_KALLOC
2665 	/* large allocation - use guard pages instead of small redzones */
2666 	if (addr) {
2667 		addr = kasan_dealloc(addr, &old_size);
2668 		kasan_poison_range(addr, old_size, ASAN_VALID);
2669 	}
2670 	new_size = round_page(kasan_alloc_resize(new_req_size) + ptoa(2));
2671 #else
2672 	new_size = round_page(new_req_size);
2673 	old_size = round_page(old_req_size);
2674 #endif
2675 
2676 	kmr = kmem_realloc_guard(kernel_map, addr, old_size, new_size,
2677 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
2678 
2679 	if (kmr.kmr_address != 0) {
2680 		delta = (uint64_t)(new_size - old_size);
2681 	} else if (flags & Z_REALLOCF) {
2682 		counter_dec(&kalloc_large_count);
2683 		delta = (uint64_t)(-old_size);
2684 	} else {
2685 		delta = 0;
2686 	}
2687 
2688 	counter_add(&kalloc_large_total, delta);
2689 	KALLOC_ZINFO_SALLOC((vm_offset_t)delta);
2690 
2691 	if (addr != 0) {
2692 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
2693 		    void*, addr);
2694 	}
2695 	if (__improbable(kmr.kmr_address == 0)) {
2696 		return (struct kalloc_result){ };
2697 	}
2698 
2699 #if KASAN_KALLOC
2700 	/* fixup the return address to skip the redzone */
2701 	kmr.kmr_address = kasan_alloc(kmr.kmr_address,
2702 	    new_size, new_req_size, PAGE_SIZE);
2703 
2704 	/*
2705 	 * kasan stores backtraces in the page in the right red-zone,
2706 	 * if we realloced, kmem dutifully preserved it, so we need
2707 	 * to clear it.
2708 	 */
2709 	if ((flags & Z_ZERO) && old_req_size < new_req_size) {
2710 		vm_size_t needs_zero_end = PAGE_SIZE + round_page(old_req_size);
2711 
2712 		__nosan_bzero((char *)kmr.kmr_address + old_req_size,
2713 		    MIN(needs_zero_end, new_req_size) - old_req_size);
2714 	}
2715 #endif
2716 
2717 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
2718 	    void*, kmr.kmr_address);
2719 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
2720 }
2721 
2722 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)2723 krealloc_ext(
2724 	void                   *kheap_or_kt_view __unsafe_indexable,
2725 	void                   *addr,
2726 	vm_size_t               old_size,
2727 	vm_size_t               new_size,
2728 	zalloc_flags_t          flags,
2729 	void                   *owner)
2730 {
2731 	vm_size_t old_bucket_size, new_bucket_size, min_size;
2732 	vm_size_t adj_new_size, adj_old_size;
2733 	kalloc_type_var_view_t kt_view;
2734 	kalloc_heap_t kheap;
2735 	zone_stats_t zstats = NULL;
2736 	struct kalloc_result kr;
2737 	vm_offset_t oob_offs = 0;
2738 	bool forcopyin = flags & Z_MAY_COPYINMAP;
2739 	zone_t old_z, new_z;
2740 	uint16_t kt_hash = 0;
2741 
2742 	if (old_size > KFREE_ABSURD_SIZE) {
2743 		krealloc_size_invalid_panic(addr, old_size);
2744 	}
2745 
2746 	if (addr == NULL && new_size == 0) {
2747 		return (struct kalloc_result){ };
2748 	}
2749 
2750 #if KASAN_KALLOC
2751 	/*
2752 	 * Adjust sizes to account for kasan redzones
2753 	 */
2754 	adj_new_size = kasan_alloc_resize(new_size);
2755 	adj_old_size = kasan_alloc_resize(old_size);
2756 	if (addr) {
2757 		kasan_check_free((vm_address_t)addr, old_size, KASAN_HEAP_KALLOC);
2758 	}
2759 #else
2760 	adj_old_size = old_size;
2761 	adj_new_size = new_size;
2762 #if CONFIG_KERNEL_TBI && KASAN_TBI
2763 	if (addr) {
2764 		__asan_loadN((vm_offset_t)addr, K_SANE_SIZE(old_size));
2765 	}
2766 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
2767 #endif /* KASAN_KALLOC */
2768 
2769 	if (kt_is_var_view(kheap_or_kt_view)) {
2770 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2771 		kheap   = kalloc_type_get_heap(kt_view, false);
2772 		/*
2773 		 * Similar to kalloc_ext: Use stats from view if present, else use stats
2774 		 * from kheap. krealloc_type isn't exposed to kexts, so we don't need to
2775 		 * handle cross frees or KHEAP_ANY for typed allocations and can relie on
2776 		 * stats from view or kheap.
2777 		 */
2778 		zstats  = kt_view->kt_stats;
2779 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
2780 	} else {
2781 		kt_view = NULL;
2782 		kheap   = kheap_or_kt_view;
2783 		kt_hash = kheap->kh_type_hash;
2784 	}
2785 
2786 	if (!zstats && kheap) {
2787 		zstats = kheap->kh_stats;
2788 	}
2789 	/*
2790 	 * Find out the size of the bucket in which the new sized allocation
2791 	 * would land. If it matches the bucket of the original allocation,
2792 	 * simply return the same address.
2793 	 */
2794 	if (new_size == 0) {
2795 		new_z = ZONE_NULL;
2796 		new_bucket_size = adj_new_size = 0;
2797 	} else {
2798 		new_z = kalloc_zone_for_size(kheap, kt_view, adj_new_size, forcopyin);
2799 		new_bucket_size = new_z ? zone_elem_size(new_z) : round_page(adj_new_size);
2800 	}
2801 #if !KASAN_KALLOC
2802 	if (flags & Z_FULLSIZE) {
2803 		adj_new_size = new_size = new_bucket_size;
2804 	}
2805 #endif /* !KASAN_KALLOC */
2806 
2807 	if (addr == NULL) {
2808 		old_z = ZONE_NULL;
2809 		old_size = old_bucket_size = adj_old_size = 0;
2810 	} else if (kheap_size_from_zone(addr, adj_old_size, forcopyin)) {
2811 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
2812 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
2813 			kfree_size_confusion_panic(old_z, addr,
2814 			    oob_offs, old_size, old_bucket_size);
2815 		}
2816 		__builtin_assume(old_z != ZONE_NULL);
2817 	} else {
2818 		old_z = ZONE_NULL;
2819 		old_bucket_size = round_page(adj_old_size);
2820 	}
2821 	min_size = MIN(old_size, new_size);
2822 
2823 	if (old_bucket_size == new_bucket_size) {
2824 		kr.addr = (char *)addr - oob_offs;
2825 		kr.size = new_size;
2826 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2827 		if (old_z) {
2828 			kr.addr = zone_element_pgz_oob_adjust(kr, new_bucket_size);
2829 			if (kr.addr != addr) {
2830 				memmove(kr.addr, addr, min_size);
2831 				bzero((char *)kr.addr + min_size,
2832 				    kr.size - min_size);
2833 			}
2834 		}
2835 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2836 #if KASAN_KALLOC
2837 		/*
2838 		 * Adjust right redzone in the element and poison it correctly
2839 		 */
2840 		kr.addr = (void *)kasan_realloc((vm_offset_t)addr,
2841 		    new_bucket_size + ptoa(old_z ? 0 : 2), kr.size,
2842 		    old_z ? KASAN_GUARD_SIZE : PAGE_SIZE);
2843 		__nosan_bzero((char *)kr.addr + min_size, kr.size - min_size);
2844 #endif /* KASAN_KALLOC */
2845 #if CONFIG_KERNEL_TBI && KASAN_TBI
2846 		/*
2847 		 * Validate the current buffer, then generate a new tag,
2848 		 * even if the address is stable, it's a "new" allocation.
2849 		 */
2850 		kr.addr = (void *)kasan_tbi_tag_zalloc((vm_offset_t)kr.addr,
2851 		    kr.size, new_bucket_size, false);
2852 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
2853 		return kr;
2854 	}
2855 
2856 	if (addr && !old_z && new_size && !new_z) {
2857 		return krealloc_large(kheap, (vm_offset_t)addr,
2858 		           old_size, new_size, flags, kt_hash, owner);
2859 	}
2860 
2861 	if (!new_size) {
2862 		kr.addr = NULL;
2863 		kr.size = 0;
2864 	} else if (new_z) {
2865 		kr = kalloc_zone(new_z, zstats, flags, new_size);
2866 	} else if (old_z || addr == NULL) {
2867 		kr = kalloc_large(kheap, new_size, flags, kt_hash, owner);
2868 	}
2869 
2870 	if (addr && kr.addr) {
2871 		__nosan_memcpy(kr.addr, addr, min_size);
2872 	}
2873 
2874 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
2875 #if KASAN_KALLOC
2876 		/*
2877 		 * Resize back to the real allocation size.
2878 		 */
2879 		vm_size_t real_size = old_size;
2880 		addr = (void *)kasan_dealloc((vm_address_t)addr, &real_size);
2881 		if (old_z) {
2882 			old_bucket_size = real_size;
2883 		} else {
2884 			old_size = real_size;
2885 		}
2886 #endif /* KASAN_KALLOC */
2887 
2888 		if (old_z) {
2889 			kfree_zone(kheap_or_kt_view,
2890 			    (char *)addr - oob_offs, old_size,
2891 			    old_z, old_bucket_size);
2892 		} else {
2893 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
2894 		}
2895 	}
2896 	return kr;
2897 }
2898 
2899 void *
2900 krealloc_data_external(
2901 	void               *ptr,
2902 	vm_size_t           old_size,
2903 	vm_size_t           new_size,
2904 	zalloc_flags_t      flags);
2905 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)2906 krealloc_data_external(
2907 	void               *ptr,
2908 	vm_size_t           old_size,
2909 	vm_size_t           new_size,
2910 	zalloc_flags_t      flags)
2911 {
2912 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2913 	return krealloc_ext(KHEAP_DATA_BUFFERS, ptr, old_size, new_size, flags, NULL).addr;
2914 }
2915 
2916 __startup_func
2917 void
kheap_startup_init(kalloc_heap_t kheap)2918 kheap_startup_init(kalloc_heap_t kheap)
2919 {
2920 	struct kheap_zones *zones;
2921 	vm_tag_t tag;
2922 
2923 	switch (kheap->kh_heap_id) {
2924 	case KHEAP_ID_DEFAULT:
2925 		zones = KHEAP_DEFAULT->kh_zones;
2926 		tag = KHEAP_DEFAULT->kh_tag;
2927 		break;
2928 	case KHEAP_ID_DATA_BUFFERS:
2929 		zones = KHEAP_DATA_BUFFERS->kh_zones;
2930 		tag = KHEAP_DATA_BUFFERS->kh_tag;
2931 		break;
2932 	default:
2933 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
2934 		    kheap->kh_heap_id);
2935 	}
2936 
2937 	kheap->kh_heap_id = zones->heap_id;
2938 	kheap->kh_zones = zones;
2939 	kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
2940 	kheap->kh_next = zones->views;
2941 	zones->views = kheap;
2942 	kheap->kh_tag = tag;
2943 	zone_view_count += 1;
2944 
2945 #if PLATFORM_MacOSX
2946 	if (kheap == KERN_OS_MALLOC) {
2947 		kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
2948 			(uint32_t) early_random(), 0);
2949 	}
2950 #endif
2951 }
2952 
2953 #pragma mark IOKit/libkern helpers
2954 
2955 #if PLATFORM_MacOSX
2956 
2957 void *
2958 kern_os_malloc_external(size_t size);
2959 void *
kern_os_malloc_external(size_t size)2960 kern_os_malloc_external(size_t size)
2961 {
2962 	if (size == 0) {
2963 		return NULL;
2964 	}
2965 
2966 	return kheap_alloc(KERN_OS_MALLOC, size,
2967 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
2968 }
2969 
2970 void
2971 kern_os_free_external(void *addr);
2972 void
kern_os_free_external(void * addr)2973 kern_os_free_external(void *addr)
2974 {
2975 	kheap_free_addr(KERN_OS_MALLOC, addr);
2976 }
2977 
2978 void *
2979 kern_os_realloc_external(void *addr, size_t nsize);
2980 void *
kern_os_realloc_external(void * addr,size_t nsize)2981 kern_os_realloc_external(void *addr, size_t nsize)
2982 {
2983 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
2984 	vm_size_t osize, oob_offs = 0;
2985 
2986 	if (addr == NULL) {
2987 		return kern_os_malloc_external(nsize);
2988 	}
2989 
2990 #if KASAN_KALLOC
2991 	osize = kasan_user_size((vm_offset_t)addr);
2992 #else
2993 	osize = zone_element_size(addr, NULL, false, &oob_offs);
2994 	if (osize == 0) {
2995 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
2996 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
2997 	}
2998 #endif
2999 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3000 }
3001 
3002 #endif /* PLATFORM_MacOSX */
3003 
3004 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3005 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3006 {
3007 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3008 #pragma unused(size)
3009 	zfree(zone, addr);
3010 #else
3011 	if (zone_owns(zone, addr)) {
3012 		zfree(zone, addr);
3013 	} else {
3014 		/*
3015 		 * Third party kexts might not know about the operator new
3016 		 * and be allocated from the default heap
3017 		 */
3018 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3019 		    zone->z_name);
3020 		kheap_free(KHEAP_DEFAULT, addr, size);
3021 	}
3022 #endif
3023 }
3024 
3025 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3026 IOMallocType_from_vm(kalloc_type_view_t ktv)
3027 {
3028 	struct kalloc_type_atom kt_atom = kalloc_type_func(KTV_FIXED, get_atom,
3029 	    (vm_offset_t)ktv, false);
3030 	return kalloc_type_from_vm(kt_atom);
3031 }
3032 
3033 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3034 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3035 {
3036 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE) || !ZSECURITY_CONFIG(KALLOC_TYPE)
3037 #pragma unused(esize)
3038 #else
3039 	/*
3040 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3041 	 * an allocation of an OSObject that is defined in xnu or first pary
3042 	 * kexts, by directly calling new will lead to using the default heap
3043 	 * as it will call OSObject_operator_new_external. If this object
3044 	 * is freed by xnu, it panics as xnu uses the typed free which
3045 	 * requires the object to have been allocated in a kalloc.type zone.
3046 	 * To workaround this issue, detect if the allocation being freed is
3047 	 * from the default heap and allow freeing to it.
3048 	 */
3049 	zone_id_t zid = zone_id_for_element(addr, esize);
3050 	if (__probable(zid < MAX_ZONES)) {
3051 		zone_security_flags_t zsflags = zone_security_array[zid];
3052 		if (zsflags.z_kheap_id == KHEAP_ID_DEFAULT) {
3053 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3054 		}
3055 	}
3056 #endif
3057 	kfree_type_impl_external(ktv, addr);
3058 }
3059 
3060 #pragma mark tests
3061 #if DEBUG || DEVELOPMENT
3062 
3063 #include <sys/random.h>
3064 /*
3065  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3066  *
3067  * Note: Presence of zones with name kalloc.type* is used to
3068  * determine if the feature is on.
3069  */
3070 static int
kalloc_type_feature_on(void)3071 kalloc_type_feature_on(void)
3072 {
3073 	/*
3074 	 * ZSECURITY_CONFIG not present
3075 	 */
3076 #if !ZSECURITY_CONFIG(KALLOC_TYPE)
3077 	return 1;
3078 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
3079 
3080 	boolean_t zone_found = false;
3081 	const char kalloc_type_str[] = "kalloc.type";
3082 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
3083 		zone_t z = kalloc_type_zarray[i];
3084 		while (z != NULL) {
3085 			zone_found = true;
3086 			if (strncmp(z->z_name, kalloc_type_str,
3087 			    strlen(kalloc_type_str)) != 0) {
3088 				return 0;
3089 			}
3090 			z = z->z_kt_next;
3091 		}
3092 	}
3093 
3094 	if (!zone_found) {
3095 		return 0;
3096 	}
3097 
3098 	return 1;
3099 }
3100 
3101 /*
3102  * Ensure that the policy uses the zone budget completely
3103  */
3104 #if ZSECURITY_CONFIG(KALLOC_TYPE)
3105 static int
kalloc_type_test_policy(int64_t in)3106 kalloc_type_test_policy(int64_t in)
3107 {
3108 	uint16_t zone_budget = (uint16_t) in;
3109 	uint16_t max_bucket_freq = 25;
3110 	uint16_t freq_list[MAX_K_ZONE(k_zone_cfg)] = {};
3111 	uint16_t zones_per_bucket[MAX_K_ZONE(k_zone_cfg)] = {};
3112 	uint16_t random[MAX_K_ZONE(k_zone_cfg)];
3113 	int ret = 0;
3114 
3115 	/*
3116 	 * Need a minimum of 2 zones per size class
3117 	 */
3118 	if (zone_budget < MAX_K_ZONE(k_zone_cfg) * 2) {
3119 		return ret;
3120 	}
3121 	read_random((void *)&random[0], sizeof(random));
3122 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
3123 		freq_list[i] = random[i] % max_bucket_freq;
3124 	}
3125 	uint16_t wasted_zone_budget = kalloc_type_apply_policy(freq_list,
3126 	    zones_per_bucket, zone_budget);
3127 	if (wasted_zone_budget == 0) {
3128 		ret = 1;
3129 	}
3130 	return ret;
3131 }
3132 #else /* ZSECURITY_CONFIG(KALLOC_TYPE) */
3133 static int
kalloc_type_test_policy(int64_t in)3134 kalloc_type_test_policy(int64_t in)
3135 {
3136 #pragma unused(in)
3137 	return 1;
3138 }
3139 #endif /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
3140 
3141 /*
3142  * Ensure that size of adopters of kalloc_type fit in the zone
3143  * they have been assigned.
3144  */
3145 static int
kalloc_type_check_size(zone_t z)3146 kalloc_type_check_size(zone_t z)
3147 {
3148 	uint16_t elem_size = z->z_elem_size;
3149 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3150 	const char site_str[] = "site.";
3151 	const size_t site_str_len = strlen(site_str);
3152 	while (kt_cur != NULL) {
3153 		/*
3154 		 * Process only kalloc_type_views and skip the zone_views when
3155 		 * feature is off.
3156 		 */
3157 #if !ZSECURITY_CONFIG(KALLOC_TYPE)
3158 		if (strncmp(kt_cur->kt_zv.zv_name, site_str, site_str_len) != 0) {
3159 			kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3160 			continue;
3161 		}
3162 #else /* !ZSECURITY_CONFIG(KALLOC_TYPE) */
3163 #pragma unused(site_str, site_str_len)
3164 #endif /* ZSECURITY_CONFIG(KALLOC_TYPE) */
3165 		if (kalloc_type_get_size(kt_cur->kt_size) > elem_size) {
3166 			return 0;
3167 		}
3168 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3169 	}
3170 	return 1;
3171 }
3172 
3173 struct test_kt_data {
3174 	int a;
3175 };
3176 
3177 static int
kalloc_type_test_data_redirect()3178 kalloc_type_test_data_redirect()
3179 {
3180 	struct kalloc_type_view ktv_data = {
3181 		.kt_signature = __builtin_xnu_type_signature(struct test_kt_data)
3182 	};
3183 	if (!kalloc_type_is_data(kalloc_type_func(KTV_FIXED, get_atom,
3184 	    (vm_offset_t)&ktv_data, false))) {
3185 		printf("%s: data redirect failed\n", __func__);
3186 		return 0;
3187 	}
3188 	return 1;
3189 }
3190 
3191 static int
run_kalloc_type_test(int64_t in,int64_t * out)3192 run_kalloc_type_test(int64_t in, int64_t *out)
3193 {
3194 	*out = 0;
3195 	for (uint16_t i = 0; i < MAX_K_ZONE(k_zone_cfg); i++) {
3196 		zone_t z = kalloc_type_zarray[i];
3197 		while (z != NULL) {
3198 			if (!kalloc_type_check_size(z)) {
3199 				printf("%s: size check failed\n", __func__);
3200 				return 0;
3201 			}
3202 			z = z->z_kt_next;
3203 		}
3204 	}
3205 
3206 	if (!kalloc_type_test_policy(in)) {
3207 		printf("%s: policy check failed\n", __func__);
3208 		return 0;
3209 	}
3210 
3211 	if (!kalloc_type_feature_on()) {
3212 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3213 		return 0;
3214 	}
3215 
3216 	if (!kalloc_type_test_data_redirect()) {
3217 		printf("%s: kalloc_type redirect for all data signature failed\n",
3218 		    __func__);
3219 		return 0;
3220 	}
3221 
3222 	printf("%s: test passed\n", __func__);
3223 
3224 	*out = 1;
3225 	return 0;
3226 }
3227 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3228 
3229 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3230 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3231 {
3232 	zone_t z = kalloc_heap_zone_for_size(kheap, size);
3233 
3234 	return z ? zone_elem_size(z) : round_page(size);
3235 }
3236 
3237 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3238 run_kalloc_test(int64_t in __unused, int64_t *out)
3239 {
3240 	*out = 0;
3241 	uint64_t * data_ptr;
3242 	size_t alloc_size, old_alloc_size;
3243 
3244 	printf("%s: test running\n", __func__);
3245 
3246 	alloc_size = 3544;
3247 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size,
3248 	    Z_WAITOK, &data_ptr).addr;
3249 	if (!data_ptr) {
3250 		printf("%s: kalloc 3544 returned not null\n", __func__);
3251 		return 0;
3252 	}
3253 
3254 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, alloc_size,
3255 	    PAGE_SIZE, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3256 	*data_ptr = 0;
3257 	if (!data_ptr) {
3258 		printf("%s: krealoc -> 2pgs returned not null\n", __func__);
3259 		return 0;
3260 	}
3261 
3262 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, PAGE_SIZE,
3263 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3264 	if (!data_ptr) {
3265 		printf("%s: krealoc -> VM1 returned not null\n", __func__);
3266 		return 0;
3267 	}
3268 	*data_ptr = 0;
3269 
3270 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 2,
3271 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3272 	*data_ptr = 0;
3273 	if (!data_ptr) {
3274 		printf("%s: krealoc -> VM2 returned not null\n", __func__);
3275 		return 0;
3276 	}
3277 
3278 	krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 4,
3279 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3280 
3281 	alloc_size = sizeof(uint64_t) + 1;
3282 	data_ptr = kalloc_ext(KHEAP_DEFAULT, alloc_size, Z_WAITOK, NULL).addr;
3283 	if (!data_ptr) {
3284 		printf("%s: kalloc sizeof(uint64_t) returned null\n", __func__);
3285 		return 0;
3286 	}
3287 
3288 	struct kalloc_result kr = {};
3289 	old_alloc_size = alloc_size;
3290 	alloc_size++;
3291 	kr = krealloc_ext(KHEAP_DEFAULT, data_ptr, old_alloc_size, alloc_size,
3292 	    Z_WAITOK | Z_NOFAIL, NULL);
3293 	if (!kr.addr || kr.addr != data_ptr ||
3294 	    test_bucket_size(KHEAP_DEFAULT, kr.size) !=
3295 	    test_bucket_size(KHEAP_DEFAULT, old_alloc_size)) {
3296 		printf("%s: same size class realloc failed\n", __func__);
3297 		return 0;
3298 	}
3299 
3300 	old_alloc_size = alloc_size;
3301 	alloc_size *= 2;
3302 	kr = krealloc_ext(KHEAP_DEFAULT, kr.addr, old_alloc_size, alloc_size,
3303 	    Z_WAITOK | Z_NOFAIL, NULL);
3304 	if (!kr.addr || test_bucket_size(KHEAP_DEFAULT, kr.size) ==
3305 	    test_bucket_size(KHEAP_DEFAULT, old_alloc_size)) {
3306 		printf("%s: new size class realloc failed\n", __func__);
3307 		return 0;
3308 	}
3309 
3310 	old_alloc_size = alloc_size;
3311 	alloc_size *= 2;
3312 	data_ptr = krealloc_ext(KHEAP_DEFAULT, kr.addr, old_alloc_size,
3313 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3314 	if (!data_ptr) {
3315 		printf("%s: realloc without old size returned null\n", __func__);
3316 		return 0;
3317 	}
3318 	kheap_free(KHEAP_DEFAULT, data_ptr, alloc_size);
3319 
3320 	printf("%s: test passed\n", __func__);
3321 	*out = 1;
3322 	return 0;
3323 }
3324 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3325 
3326 #endif
3327