xref: /xnu-8796.141.3/osfmk/kern/kalloc.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_map.h>
81 #include <sys/kdebug.h>
82 
83 #include <os/hash.h>
84 #include <san/kasan.h>
85 #include <libkern/section_keywords.h>
86 #include <libkern/prelink.h>
87 
88 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
89 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
90 
91 #pragma mark initialization
92 
93 /*
94  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
95  * sized zone.  This allocator is built on top of the zone allocator.  A zone
96  * is created for each potential size that we are willing to get in small
97  * blocks.
98  *
99  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
100  */
101 
102 /*
103  * The kt_zone_cfg table defines the configuration of zones on various
104  * platforms for kalloc_type fixed size allocations.
105  */
106 
107 #if KASAN_CLASSIC
108 #define K_SIZE_CLASS(size)    \
109 	(((size) & PAGE_MASK) == 0 ? (size) : \
110 	((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
111 #else
112 #define K_SIZE_CLASS(size)    (size)
113 #endif
114 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
115 
116 static const uint16_t kt_zone_cfg[] = {
117 	K_SIZE_CLASS(16),
118 	K_SIZE_CLASS(32),
119 	K_SIZE_CLASS(48),
120 	K_SIZE_CLASS(64),
121 	K_SIZE_CLASS(80),
122 	K_SIZE_CLASS(96),
123 	K_SIZE_CLASS(128),
124 	K_SIZE_CLASS(160),
125 	K_SIZE_CLASS(192),
126 	K_SIZE_CLASS(224),
127 	K_SIZE_CLASS(256),
128 	K_SIZE_CLASS(288),
129 	K_SIZE_CLASS(368),
130 	K_SIZE_CLASS(400),
131 	K_SIZE_CLASS(512),
132 	K_SIZE_CLASS(576),
133 	K_SIZE_CLASS(768),
134 	K_SIZE_CLASS(1024),
135 	K_SIZE_CLASS(1152),
136 	K_SIZE_CLASS(1280),
137 	K_SIZE_CLASS(1664),
138 	K_SIZE_CLASS(2048),
139 	K_SIZE_CLASS(4096),
140 	K_SIZE_CLASS(6144),
141 	K_SIZE_CLASS(8192),
142 	K_SIZE_CLASS(12288),
143 	K_SIZE_CLASS(16384),
144 #if __arm64__
145 	K_SIZE_CLASS(24576),
146 	K_SIZE_CLASS(32768),
147 #endif /* __arm64__ */
148 };
149 
150 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
151 
152 /*
153  * kalloc_type callsites are assigned a zone during early boot. They
154  * use the dlut[] (direct lookup table), indexed by size normalized
155  * to the minimum alignment to find the right zone index quickly.
156  */
157 #define INDEX_ZDLUT(size)       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
158 #define KALLOC_DLUT_SIZE        (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
159 #define MAX_SIZE_ZDLUT          ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
160 static __startup_data uint8_t   kalloc_type_dlut[KALLOC_DLUT_SIZE];
161 static __startup_data uint32_t  kheap_zsize[KHEAP_NUM_ZONES];
162 
163 #if VM_TAG_SIZECLASSES
164 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
165 #endif
166 
167 const char * const kalloc_heap_names[] = {
168 	[KHEAP_ID_NONE]          = "",
169 	[KHEAP_ID_SHARED]        = "shared.",
170 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
171 	[KHEAP_ID_KT_VAR]        = "",
172 };
173 
174 /*
175  * Shared heap configuration
176  */
177 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_SHARED[1] = {
178 	{
179 		.kh_name     = "shared.kalloc",
180 		.kh_heap_id  = KHEAP_ID_SHARED,
181 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE,
182 	}
183 };
184 
185 /*
186  * Bag of bytes heap configuration
187  */
188 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
189 	{
190 		.kh_name     = "data.kalloc",
191 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
192 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
193 	}
194 };
195 
196 /*
197  * Configuration of variable kalloc type heaps
198  */
199 SECURITY_READ_ONLY_LATE(struct kheap_info)
200 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
201 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
202 	{
203 		.kh_name     = "kalloc.type.var",
204 		.kh_heap_id  = KHEAP_ID_KT_VAR,
205 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
206 	}
207 };
208 
209 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
210 
211 __startup_func
212 static void
kalloc_zsize_compute(void)213 kalloc_zsize_compute(void)
214 {
215 	uint32_t step = KHEAP_STEP_START;
216 	uint32_t size = KHEAP_START_SIZE;
217 
218 	/*
219 	 * Manually initialize extra initial zones
220 	 */
221 	kheap_zsize[0] = size / 2;
222 	kheap_zsize[1] = size;
223 	static_assert(KHEAP_EXTRA_ZONES == 2);
224 
225 	/*
226 	 * Compute sizes for remaining zones
227 	 */
228 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
229 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
230 
231 		kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
232 		kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
233 
234 		step *= 2;
235 		size += step;
236 	}
237 }
238 
239 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)240 kalloc_zone_for_size_with_flags(
241 	zone_id_t               zid,
242 	vm_size_t               size,
243 	zalloc_flags_t          flags)
244 {
245 	vm_size_t max_size = KHEAP_MAX_SIZE;
246 	bool forcopyin = flags & Z_MAY_COPYINMAP;
247 	zone_t zone;
248 
249 	if (flags & Z_KALLOC_ARRAY) {
250 		size = roundup(size, KALLOC_ARRAY_GRANULE);
251 	}
252 
253 	if (forcopyin) {
254 #if __x86_64__
255 		/*
256 		 * On Intel, the OSData() ABI used to allocate
257 		 * from the kernel map starting at PAGE_SIZE.
258 		 *
259 		 * If only vm_map_copyin() or a wrapper is used,
260 		 * then everything will work fine because vm_map_copy_t
261 		 * will perform an actual copy if the data is smaller
262 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
263 		 *
264 		 * However, if anyone is trying to call mach_vm_remap(),
265 		 * then bad things (TM) happen.
266 		 *
267 		 * Avoid this by preserving the ABI and moving
268 		 * to kalloc_large() earlier.
269 		 *
270 		 * Any recent code really ought to use IOMemoryDescriptor
271 		 * for this purpose however.
272 		 */
273 		max_size = PAGE_SIZE - 1;
274 #endif
275 	}
276 
277 	if (size <= max_size) {
278 		uint32_t idx;
279 
280 		if (size <= KHEAP_START_SIZE) {
281 			zid  += (size > 16);
282 		} else {
283 			/*
284 			 * . log2down(size - 1) is log2up(size) - 1
285 			 * . (size - 1) >> (log2down(size - 1) - 1)
286 			 *   is either 0x2 or 0x3
287 			 */
288 			idx   = kalloc_log2down((uint32_t)(size - 1));
289 			zid  += KHEAP_EXTRA_ZONES +
290 			    2 * (idx - KHEAP_START_IDX) +
291 			    ((uint32_t)(size - 1) >> (idx - 1)) - 2;
292 		}
293 
294 		zone = zone_by_id(zid);
295 #if KASAN_CLASSIC
296 		/*
297 		 * Under kasan classic, certain size classes are a redzone
298 		 * away from the mathematical formula above, and we need
299 		 * to "go to the next zone".
300 		 *
301 		 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
302 		 * this will never go to an "invalid" zone that doesn't
303 		 * belong to the kheap.
304 		 */
305 		if (size > zone_elem_inner_size(zone)) {
306 			zone++;
307 		}
308 #endif
309 		return zone;
310 	}
311 
312 	return ZONE_NULL;
313 }
314 
315 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)316 kalloc_zone_for_size(zone_id_t zid, size_t size)
317 {
318 	return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
319 }
320 
321 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)322 kheap_size_from_zone(
323 	void                   *addr,
324 	vm_size_t               size,
325 	zalloc_flags_t          flags)
326 {
327 	vm_size_t max_size = KHEAP_MAX_SIZE;
328 	bool forcopyin = flags & Z_MAY_COPYINMAP;
329 
330 #if __x86_64__
331 	/*
332 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
333 	 * behavior, then the element could have a PAGE_SIZE reported size,
334 	 * yet still be from a zone for Z_MAY_COPYINMAP.
335 	 */
336 	if (forcopyin) {
337 		if (size == PAGE_SIZE &&
338 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
339 			return true;
340 		}
341 
342 		max_size = PAGE_SIZE - 1;
343 	}
344 #else
345 #pragma unused(addr, forcopyin)
346 #endif
347 
348 	return size <= max_size;
349 }
350 
351 /*
352  * All data zones shouldn't use shared zone. Therefore set the no share
353  * bit right after creation.
354  */
355 __startup_func
356 static void
kalloc_set_no_share_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)357 kalloc_set_no_share_for_data(
358 	zone_kheap_id_t       kheap_id,
359 	zone_stats_t          zstats)
360 {
361 	if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
362 		zpercpu_foreach(zs, zstats) {
363 			os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
364 		}
365 	}
366 }
367 
368 __startup_func
369 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)370 kalloc_zone_init(
371 	const char           *kheap_name,
372 	zone_kheap_id_t       kheap_id,
373 	zone_id_t            *kheap_zstart,
374 	zone_create_flags_t   zc_flags)
375 {
376 	zc_flags |= ZC_PGZ_USE_GUARDS;
377 
378 	for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
379 		uint32_t size = kheap_zsize[i];
380 		char buf[MAX_ZONE_NAME], *z_name;
381 		int len;
382 
383 		len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
384 		z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
385 		strlcpy(z_name, buf, len + 1);
386 
387 		(void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
388 #if __arm64e__ || KASAN_TBI
389 			uint32_t scale = kalloc_log2down(size / 32);
390 
391 			if (size == 32 << scale) {
392 			        z->z_array_size_class = scale;
393 			} else {
394 			        z->z_array_size_class = scale | 0x10;
395 			}
396 #endif
397 			zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
398 			if (i == 0) {
399 			        *kheap_zstart = zone_index(z);
400 			}
401 			kalloc_set_no_share_for_data(kheap_id, z->z_stats);
402 		});
403 	}
404 }
405 
406 __startup_func
407 static void
kalloc_heap_init(struct kalloc_heap * kheap)408 kalloc_heap_init(struct kalloc_heap *kheap)
409 {
410 	kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
411 	    ZC_NONE);
412 	/*
413 	 * Count all the "raw" views for zones in the heap.
414 	 */
415 	zone_view_count += KHEAP_NUM_ZONES;
416 }
417 
418 #define KEXT_ALIGN_SHIFT           6
419 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
420 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
421 #define kt_scratch_size            (256ul << 10)
422 #define KALLOC_TYPE_SECTION(type) \
423 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
424 
425 /*
426  * Enum to specify the kalloc_type variant being used.
427  */
428 __options_decl(kalloc_type_variant_t, uint16_t, {
429 	KTV_FIXED     = 0x0001,
430 	KTV_VAR       = 0x0002,
431 });
432 
433 /*
434  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
435  * variable) of the desired variable/function.
436  */
437 #define kalloc_type_var(type, var)              \
438 	((type) == KTV_FIXED?                       \
439 	(vm_offset_t) kalloc_type_##var##_fixed:    \
440 	(vm_offset_t) kalloc_type_##var##_var)
441 #define kalloc_type_func(type, func, ...)       \
442 	((type) == KTV_FIXED?                       \
443 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
444 	kalloc_type_##func##_var(__VA_ARGS__))
445 
446 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
447 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
448     ZSECURITY_CONFIG_KT_VAR_BUDGET);
449 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
450     ZSECURITY_CONFIG_KT_BUDGET);
451 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
452 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
453 
454 /*
455  * Section start/end for fixed kalloc_type views
456  */
457 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
458 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
459 
460 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
461 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
462 
463 /*
464  * Section start/end for variable kalloc_type views
465  */
466 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
467 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
468 
469 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
470 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
471 
472 __startup_data
473 static kalloc_type_views_t *kt_buffer = NULL;
474 __startup_data
475 static uint64_t kt_count;
476 __startup_data
477 uint32_t kalloc_type_hash_seed;
478 
479 __startup_data
480 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
481 __startup_data
482 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
483 
484 struct nzones_with_idx {
485 	uint16_t nzones;
486 	uint16_t idx;
487 };
488 int16_t zone_carry = 0;
489 
490 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
491     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
492 
493 /*
494  * For use by lldb to iterate over kalloc types
495  */
496 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
497 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
498 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
499 
500 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
501 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
502     KMEM_DIRECTION_MASK),
503     "Insufficient bits to represent range and dir for VM allocations");
504 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
505     "validate idx mask");
506 /* qsort routines */
507 typedef int (*cmpfunc_t)(const void *a, const void *b);
508 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
509 
510 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)511 kalloc_type_get_idx(uint32_t kt_size)
512 {
513 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
514 }
515 
516 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)517 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
518 {
519 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
520 }
521 
522 static void
kalloc_type_build_dlut(void)523 kalloc_type_build_dlut(void)
524 {
525 	vm_size_t size = 0;
526 	for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
527 		uint8_t zindex = 0;
528 		while (kt_zone_cfg[zindex] < size) {
529 			zindex++;
530 		}
531 		kalloc_type_dlut[i] = zindex;
532 	}
533 }
534 
535 static uint32_t
kalloc_type_idx_for_size(uint32_t size)536 kalloc_type_idx_for_size(uint32_t size)
537 {
538 	assert(size <= KHEAP_MAX_SIZE);
539 	uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
540 	return kalloc_type_set_idx(size, idx);
541 }
542 
543 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t shared_zone)544 kalloc_type_assign_zone_fixed(
545 	kalloc_type_view_t     *cur,
546 	kalloc_type_view_t     *end,
547 	zone_t                  z,
548 	zone_t                  sig_zone,
549 	zone_t                  shared_zone)
550 {
551 	/*
552 	 * Assign the zone created for every kalloc_type_view
553 	 * of the same unique signature
554 	 */
555 	bool need_raw_view = false;
556 
557 	while (cur < end) {
558 		kalloc_type_view_t kt = *cur;
559 		struct zone_view *zv = &kt->kt_zv;
560 		zv->zv_zone = z;
561 		kalloc_type_flags_t kt_flags = kt->kt_flags;
562 		zone_security_flags_t zsflags = zone_security_config(z);
563 
564 		assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
565 		if (!shared_zone) {
566 			assert(zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS);
567 		}
568 
569 		if (kt_flags & KT_SLID) {
570 			kt->kt_signature -= vm_kernel_slide;
571 			kt->kt_zv.zv_name -= vm_kernel_slide;
572 		}
573 
574 		if ((kt_flags & KT_PRIV_ACCT) ||
575 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
576 			zv->zv_stats = zalloc_percpu_permanent_type(
577 				struct zone_stats);
578 			need_raw_view = true;
579 			zone_view_count += 1;
580 		} else {
581 			zv->zv_stats = z->z_stats;
582 		}
583 
584 		if ((kt_flags & KT_NOSHARED) || !shared_zone) {
585 			if ((kt_flags & KT_NOSHARED) && !(kt_flags & KT_PRIV_ACCT)) {
586 				panic("KT_NOSHARED used w/o private accounting for view %s",
587 				    zv->zv_name);
588 			}
589 
590 			zpercpu_foreach(zs, zv->zv_stats) {
591 				os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
592 			}
593 		}
594 
595 		if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
596 			kt->kt_zshared = shared_zone;
597 			kt->kt_zsig = sig_zone;
598 			/*
599 			 * If we haven't yet set the signature equivalance then set it
600 			 * otherwise validate that the zone has the same signature equivalance
601 			 * as the sig_zone provided
602 			 */
603 			if (!zone_get_sig_eq(z)) {
604 				zone_set_sig_eq(z, zone_index(sig_zone));
605 			} else {
606 				assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
607 			}
608 		}
609 		zv->zv_next = (zone_view_t) z->z_views;
610 		zv->zv_zone->z_views = (zone_view_t) kt;
611 		cur++;
612 	}
613 	if (need_raw_view) {
614 		zone_view_count += 1;
615 	}
616 }
617 
618 __startup_func
619 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)620 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
621     kalloc_type_var_view_t *end, uint32_t heap_idx)
622 {
623 	struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
624 	while (cur < end) {
625 		kalloc_type_var_view_t kt = *cur;
626 		kt->kt_heap_start = cfg->kh_zstart;
627 		kalloc_type_flags_t kt_flags = kt->kt_flags;
628 
629 		if (kt_flags & KT_SLID) {
630 			if (kt->kt_sig_hdr) {
631 				kt->kt_sig_hdr -= vm_kernel_slide;
632 			}
633 			kt->kt_sig_type -= vm_kernel_slide;
634 			kt->kt_name -= vm_kernel_slide;
635 		}
636 
637 		if ((kt_flags & KT_PRIV_ACCT) ||
638 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
639 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
640 			zone_view_count += 1;
641 		}
642 
643 		kt->kt_next = (zone_view_t) cfg->kt_views;
644 		cfg->kt_views = kt;
645 		cur++;
646 	}
647 }
648 
649 __startup_func
650 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)651 kalloc_type_slide_fixed(vm_offset_t addr)
652 {
653 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
654 	ktv->kt_signature += vm_kernel_slide;
655 	ktv->kt_zv.zv_name += vm_kernel_slide;
656 	ktv->kt_flags |= KT_SLID;
657 }
658 
659 __startup_func
660 static inline void
kalloc_type_slide_var(vm_offset_t addr)661 kalloc_type_slide_var(vm_offset_t addr)
662 {
663 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
664 	if (ktv->kt_sig_hdr) {
665 		ktv->kt_sig_hdr += vm_kernel_slide;
666 	}
667 	ktv->kt_sig_type += vm_kernel_slide;
668 	ktv->kt_name += vm_kernel_slide;
669 	ktv->kt_flags |= KT_SLID;
670 }
671 
672 __startup_func
673 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)674 kalloc_type_validate_flags(
675 	kalloc_type_flags_t   kt_flags,
676 	const char           *kt_name,
677 	uuid_string_t         kext_uuid)
678 {
679 	if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
680 		panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
681 		    "required xnu headers", kt_name, kext_uuid);
682 	}
683 }
684 
685 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)686 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
687 {
688 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
689 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
690 	return ktv->kt_flags;
691 }
692 
693 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)694 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
695 {
696 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
697 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
698 	return ktv->kt_flags;
699 }
700 
701 /*
702  * Check if signature of type is made up of only data and padding
703  */
704 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)705 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
706 {
707 	assert(kt_flags & KT_CHANGED);
708 	return kt_flags & KT_DATA_ONLY;
709 }
710 
711 /*
712  * Check if signature of type is made up of only pointers
713  */
714 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)715 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
716 {
717 	assert(kt_flags & KT_CHANGED2);
718 	return kt_flags & KT_PTR_ARRAY;
719 }
720 
721 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)722 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
723 {
724 	assert(kt_flags & KT_CHANGED);
725 	return kt_flags & KT_VM;
726 }
727 
728 __startup_func
729 static inline vm_size_t
kalloc_type_view_sz_fixed(void)730 kalloc_type_view_sz_fixed(void)
731 {
732 	return sizeof(struct kalloc_type_view);
733 }
734 
735 __startup_func
736 static inline vm_size_t
kalloc_type_view_sz_var(void)737 kalloc_type_view_sz_var(void)
738 {
739 	return sizeof(struct kalloc_type_var_view);
740 }
741 
742 __startup_func
743 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)744 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
745     vm_offset_t end)
746 {
747 	return (end - start) / kalloc_type_func(type, view_sz);
748 }
749 
750 __startup_func
751 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)752 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
753 {
754 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
755 }
756 
757 __startup_func
758 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)759 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
760 {
761 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
762 }
763 
764 __startup_func
765 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)766 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
767 {
768 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
769 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
770 	    cur_data_view->kt_size);
771 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
772 	    NULL);
773 }
774 
775 __startup_func
776 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)777 kalloc_type_handle_data_view_var(vm_offset_t addr)
778 {
779 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
780 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
781 }
782 
783 __startup_func
784 static uint32_t
kalloc_type_handle_parray_var(void)785 kalloc_type_handle_parray_var(void)
786 {
787 	uint32_t i = 0;
788 	kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
789 	const char *p_name = kt->kt_name;
790 
791 	/*
792 	 * The sorted list of variable kalloc_type_view has pointer arrays at the
793 	 * beginning. Walk through them and assign a random pointer heap to each
794 	 * type detected by typename.
795 	 */
796 	while (kalloc_type_is_ptr_array(kt->kt_flags)) {
797 		uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
798 		const char *c_name = kt->kt_name;
799 		uint32_t p_i = i;
800 
801 		while (strcmp(c_name, p_name) == 0) {
802 			i++;
803 			kt = kt_buffer[i].ktv_var;
804 			c_name = kt->kt_name;
805 		}
806 		p_name = c_name;
807 		kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
808 		    &kt_buffer[i].ktv_var, heap_id);
809 	}
810 
811 	/*
812 	 * Returns the the index of the first view that isn't a pointer array
813 	 */
814 	return i;
815 }
816 
817 __startup_func
818 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)819 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
820 {
821 	/*
822 	 * Limit range_id to ptr ranges
823 	 */
824 	uint32_t range_id = kmem_adjust_range_id(hash);
825 	uint32_t direction = hash & 0x8000;
826 	return (range_id | KMEM_HASH_SET | direction) << shift;
827 }
828 
829 __startup_func
830 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)831 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
832     kalloc_type_flags_t *kt_flags)
833 {
834 	uint32_t hash = 0;
835 
836 	assert(sig_ty != NULL);
837 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
838 	    kalloc_type_hash_seed);
839 	if (sig_hdr) {
840 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
841 	}
842 	os_hash_jenkins_finish(hash);
843 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
844 
845 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
846 }
847 
848 __startup_func
849 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)850 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
851 {
852 	/*
853 	 * Use backtraces on fixed as we don't have signatures for types that go
854 	 * to the VM due to rdar://85182551.
855 	 */
856 	(void) addr;
857 }
858 
859 __startup_func
860 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)861 kalloc_type_set_type_hash_var(vm_offset_t addr)
862 {
863 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
864 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
865 	    &ktv->kt_flags);
866 }
867 
868 __startup_func
869 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)870 kalloc_type_mark_processed_fixed(vm_offset_t addr)
871 {
872 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
873 	ktv->kt_flags |= KT_PROCESSED;
874 }
875 
876 __startup_func
877 static void
kalloc_type_mark_processed_var(vm_offset_t addr)878 kalloc_type_mark_processed_var(vm_offset_t addr)
879 {
880 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
881 	ktv->kt_flags |= KT_PROCESSED;
882 }
883 
884 __startup_func
885 static void
kalloc_type_update_view_fixed(vm_offset_t addr)886 kalloc_type_update_view_fixed(vm_offset_t addr)
887 {
888 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
889 	ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
890 }
891 
892 __startup_func
893 static void
kalloc_type_update_view_var(vm_offset_t addr)894 kalloc_type_update_view_var(vm_offset_t addr)
895 {
896 	(void) addr;
897 }
898 
899 __startup_func
900 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)901 kalloc_type_view_copy(
902 	const kalloc_type_variant_t   type,
903 	vm_offset_t                   start,
904 	vm_offset_t                   end,
905 	uint64_t                     *cur_count,
906 	bool                          slide,
907 	uuid_string_t                 kext_uuid)
908 {
909 	uint64_t count = kalloc_type_view_count(type, start, end);
910 	if (count + *cur_count >= kt_count) {
911 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
912 	}
913 	vm_offset_t cur = start;
914 	while (cur < end) {
915 		if (slide) {
916 			kalloc_type_func(type, slide, cur);
917 		}
918 		kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
919 		    kext_uuid);
920 		kalloc_type_func(type, mark_processed, cur);
921 		/*
922 		 * Skip views that go to the VM
923 		 */
924 		if (kalloc_type_from_vm(kt_flags)) {
925 			cur += kalloc_type_func(type, view_sz);
926 			continue;
927 		}
928 
929 		/*
930 		 * If signature indicates that the entire allocation is data move it to
931 		 * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap,
932 		 * variable kalloc_type handles the actual redirection in the entry points
933 		 * kalloc/kfree_type_var_impl.
934 		 */
935 		if (kalloc_type_is_data(kt_flags)) {
936 			kalloc_type_func(type, handle_data_view, cur);
937 			cur += kalloc_type_func(type, view_sz);
938 			continue;
939 		}
940 
941 		/*
942 		 * Set type hash that is used by kmem_*_guard
943 		 */
944 		kalloc_type_func(type, set_type_hash, cur);
945 		kalloc_type_func(type, update_view, cur);
946 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
947 		cur += kalloc_type_func(type, view_sz);
948 		*cur_count = *cur_count + 1;
949 	}
950 }
951 
952 __startup_func
953 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)954 kalloc_type_view_parse(const kalloc_type_variant_t type)
955 {
956 	kc_format_t kc_format;
957 	uint64_t cur_count = 0;
958 
959 	if (!PE_get_primary_kc_format(&kc_format)) {
960 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
961 	}
962 
963 	if (kc_format == KCFormatStatic) {
964 		/*
965 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
966 		 * xnu are coalesced.
967 		 */
968 		kalloc_type_view_copy(type,
969 		    kalloc_type_var(type, sec_start),
970 		    kalloc_type_var(type, sec_end),
971 		    &cur_count, false, NULL);
972 	} else if (kc_format == KCFormatFileset) {
973 		/*
974 		 * If kc uses filesets, traverse __kalloc_type section for each
975 		 * macho in the BootKC.
976 		 */
977 		kernel_mach_header_t *kc_mh = NULL;
978 		kernel_mach_header_t *kext_mh = NULL;
979 
980 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
981 		struct load_command *lc =
982 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
983 		for (uint32_t i = 0; i < kc_mh->ncmds;
984 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
985 			if (lc->cmd != LC_FILESET_ENTRY) {
986 				continue;
987 			}
988 			struct fileset_entry_command *fse =
989 			    (struct fileset_entry_command *)(vm_offset_t)lc;
990 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
991 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
992 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
993 			if (sect != NULL) {
994 				unsigned long uuidlen = 0;
995 				void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
996 				uuid_string_t kext_uuid_str;
997 				if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
998 					uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
999 				}
1000 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1001 				    &cur_count, false, kext_uuid_str);
1002 			}
1003 		}
1004 	} else if (kc_format == KCFormatKCGEN) {
1005 		/*
1006 		 * Parse __kalloc_type section from xnu
1007 		 */
1008 		kalloc_type_view_copy(type,
1009 		    kalloc_type_var(type, sec_start),
1010 		    kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1011 
1012 		/*
1013 		 * Parse __kalloc_type section for kexts
1014 		 *
1015 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1016 		 * as this platform has insufficient memory for type based
1017 		 * segregation. kalloc_type_impl_external will direct callsites
1018 		 * based on their size.
1019 		 */
1020 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1021 		vm_offset_t cur = 0;
1022 		vm_offset_t end = 0;
1023 
1024 		/*
1025 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1026 		 * and traverse it.
1027 		 */
1028 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1029 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1030 		assert(prelink_sect);
1031 		cur = prelink_sect->addr;
1032 		end = prelink_sect->addr + prelink_sect->size;
1033 
1034 		while (cur < end) {
1035 			uint64_t kext_text_sz = 0;
1036 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1037 
1038 			if (kext_mh->magic == 0) {
1039 				/*
1040 				 * Assert that we have processed all kexts and all that is left
1041 				 * is padding
1042 				 */
1043 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1044 				break;
1045 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1046 			    kext_mh->magic != MH_CIGAM_64) {
1047 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1048 				    cur);
1049 			}
1050 
1051 			/*
1052 			 * Kext macho found, iterate through its segments
1053 			 */
1054 			struct load_command *lc =
1055 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1056 			bool isSplitKext = false;
1057 
1058 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1059 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1060 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1061 					isSplitKext = true;
1062 					continue;
1063 				} else if (lc->cmd != LC_SEGMENT_64) {
1064 					continue;
1065 				}
1066 
1067 				kernel_segment_command_t *seg_cmd =
1068 				    (struct segment_command_64 *)(vm_offset_t)lc;
1069 				/*
1070 				 * Parse kalloc_type section
1071 				 */
1072 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1073 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1074 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1075 					if (kt_sect) {
1076 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1077 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1078 						    true, NULL);
1079 					}
1080 				}
1081 				/*
1082 				 * If the kext has a __TEXT segment, that is the only thing that
1083 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1084 				 * macho is right after.
1085 				 */
1086 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1087 					kext_text_sz = seg_cmd->filesize;
1088 				}
1089 			}
1090 			/*
1091 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1092 			 * only a __LINKEDIT segment) then the next macho will be after all the
1093 			 * header commands.
1094 			 */
1095 			if (!kext_text_sz) {
1096 				kext_text_sz = kext_mh->sizeofcmds;
1097 			} else if (!isSplitKext) {
1098 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1099 				break;
1100 			}
1101 
1102 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1103 		}
1104 	} else {
1105 		/*
1106 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1107 		 * parsing kalloc_type_view structs during startup.
1108 		 */
1109 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1110 		    " for kc_format = %d\n", kc_format);
1111 	}
1112 	return cur_count;
1113 }
1114 
1115 __startup_func
1116 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1117 kalloc_type_cmp_fixed(const void *a, const void *b)
1118 {
1119 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1120 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1121 
1122 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1123 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1124 	/*
1125 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1126 	 * signature else sort by size
1127 	 */
1128 	if (idxA == idxB) {
1129 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1130 		/*
1131 		 * If the kalloc_type_views have the same signature sort by site
1132 		 * name
1133 		 */
1134 		if (result == 0) {
1135 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1136 		}
1137 		return result;
1138 	}
1139 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1140 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1141 	return (int)(sizeA - sizeB);
1142 }
1143 
1144 __startup_func
1145 static int
kalloc_type_cmp_var(const void * a,const void * b)1146 kalloc_type_cmp_var(const void *a, const void *b)
1147 {
1148 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1149 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1150 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1151 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1152 	bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1153 	bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1154 	int result = 0;
1155 
1156 	/*
1157 	 * Switched around (B - A) because we want the pointer arrays to be at the
1158 	 * top
1159 	 */
1160 	result = ktB_ptrArray - ktA_ptrArray;
1161 	if (result == 0) {
1162 		result = strcmp(ktA_hdr, ktB_hdr);
1163 		if (result == 0) {
1164 			result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1165 			if (result == 0) {
1166 				result = strcmp(ktA->kt_name, ktB->kt_name);
1167 			}
1168 		}
1169 	}
1170 	return result;
1171 }
1172 
1173 __startup_func
1174 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1175 kalloc_type_create_iterators_fixed(
1176 	uint16_t           *kt_skip_list_start,
1177 	uint64_t            count)
1178 {
1179 	uint16_t *kt_skip_list = kt_skip_list_start;
1180 	uint16_t p_idx = UINT16_MAX; /* previous size idx */
1181 	uint16_t c_idx = 0; /* current size idx */
1182 	uint16_t unique_sig = 0;
1183 	uint16_t total_sig = 0;
1184 	const char *p_sig = NULL;
1185 	const char *p_name = "";
1186 	const char *c_sig = NULL;
1187 	const char *c_name = NULL;
1188 
1189 	/*
1190 	 * Walk over each kalloc_type_view
1191 	 */
1192 	for (uint16_t i = 0; i < count; i++) {
1193 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1194 
1195 		c_idx = kalloc_type_get_idx(kt->kt_size);
1196 		c_sig = kt->kt_signature;
1197 		c_name = kt->kt_zv.zv_name;
1198 		/*
1199 		 * When current kalloc_type_view is in a different kalloc size
1200 		 * bucket than the previous, it means we have processed all in
1201 		 * the previous size bucket, so store the accumulated values
1202 		 * and advance the indices.
1203 		 */
1204 		if (p_idx == UINT16_MAX || c_idx != p_idx) {
1205 			/*
1206 			 * Updates for frequency lists
1207 			 */
1208 			if (p_idx != UINT16_MAX) {
1209 				kt_freq_list[p_idx] = unique_sig;
1210 				kt_freq_list_total[p_idx] = total_sig - unique_sig;
1211 			}
1212 			unique_sig = 1;
1213 			total_sig = 1;
1214 
1215 			p_idx = c_idx;
1216 			p_sig = c_sig;
1217 			p_name = c_name;
1218 
1219 			/*
1220 			 * Updates to signature skip list
1221 			 */
1222 			*kt_skip_list = i;
1223 			kt_skip_list++;
1224 
1225 			continue;
1226 		}
1227 
1228 		/*
1229 		 * When current kalloc_type_views is in the kalloc size bucket as
1230 		 * previous, analyze the siganture to see if it is unique.
1231 		 *
1232 		 * Signatures are collapsible if one is a substring of the next.
1233 		 */
1234 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1235 			/*
1236 			 * Unique signature detected. Update counts and advance index
1237 			 */
1238 			unique_sig++;
1239 			total_sig++;
1240 
1241 			*kt_skip_list = i;
1242 			kt_skip_list++;
1243 			p_sig = c_sig;
1244 			p_name = c_name;
1245 			continue;
1246 		}
1247 		/*
1248 		 * Need this here as we do substring matching for signatures so you
1249 		 * want to track the longer signature seen rather than the substring
1250 		 */
1251 		p_sig = c_sig;
1252 
1253 		/*
1254 		 * Check if current kalloc_type_view corresponds to a new type
1255 		 */
1256 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1257 			total_sig++;
1258 			p_name = c_name;
1259 		}
1260 	}
1261 	/*
1262 	 * Final update
1263 	 */
1264 	assert(c_idx == p_idx);
1265 	assert(kt_freq_list[c_idx] == 0);
1266 	kt_freq_list[c_idx] = unique_sig;
1267 	kt_freq_list_total[c_idx] = total_sig - unique_sig;
1268 	*kt_skip_list = (uint16_t) count;
1269 
1270 	return ++kt_skip_list;
1271 }
1272 
1273 __startup_func
1274 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1275 kalloc_type_create_iterators_var(
1276 	uint32_t           *kt_skip_list_start,
1277 	uint32_t            buf_start)
1278 {
1279 	uint32_t *kt_skip_list = kt_skip_list_start;
1280 	uint32_t n = 0;
1281 
1282 	kt_skip_list[n] = buf_start;
1283 	assert(kt_count > buf_start + 1);
1284 	for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1285 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1286 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1287 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1288 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1289 		assert(ktA->kt_sig_type != NULL);
1290 		assert(ktB->kt_sig_type != NULL);
1291 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1292 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1293 			n++;
1294 			kt_skip_list[n] = i;
1295 		}
1296 	}
1297 	/*
1298 	 * Final update
1299 	 */
1300 	n++;
1301 	kt_skip_list[n] = (uint32_t) kt_count;
1302 	return n;
1303 }
1304 
1305 __startup_func
1306 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1307 kalloc_type_distribute_budget(
1308 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1309 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1310 	uint16_t            zone_budget,
1311 	uint16_t            min_zones_per_size)
1312 {
1313 	uint16_t total_sig = 0;
1314 	uint16_t min_sig = 0;
1315 	uint16_t assigned_zones = 0;
1316 	uint16_t remaining_zones = zone_budget;
1317 	uint16_t modulo = 0;
1318 
1319 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1320 		uint16_t sig_freq = freq_list[i];
1321 		uint16_t min_zones = min_zones_per_size;
1322 
1323 		if (sig_freq < min_zones_per_size) {
1324 			min_zones = sig_freq;
1325 		}
1326 		total_sig += sig_freq;
1327 		kt_zones[i] = min_zones;
1328 		min_sig += min_zones;
1329 	}
1330 	if (remaining_zones > total_sig) {
1331 		remaining_zones = total_sig;
1332 	}
1333 	assert(remaining_zones >= min_sig);
1334 	remaining_zones -= min_sig;
1335 	total_sig -= min_sig;
1336 	assigned_zones += min_sig;
1337 
1338 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1339 		uint16_t freq = freq_list[i];
1340 
1341 		if (freq < min_zones_per_size) {
1342 			continue;
1343 		}
1344 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1345 		uint16_t n_zones = (uint16_t) numer / total_sig;
1346 
1347 		/*
1348 		 * Accumulate remainder and increment n_zones when it goes above
1349 		 * denominator
1350 		 */
1351 		modulo += numer % total_sig;
1352 		if (modulo >= total_sig) {
1353 			n_zones++;
1354 			modulo -= total_sig;
1355 		}
1356 
1357 		/*
1358 		 * Cap the total number of zones to the unique signatures
1359 		 */
1360 		if ((n_zones + min_zones_per_size) > freq) {
1361 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1362 			modulo += (extra_zones * total_sig);
1363 			n_zones -= extra_zones;
1364 		}
1365 		kt_zones[i] += n_zones;
1366 		assigned_zones += n_zones;
1367 	}
1368 
1369 	if (kt_options & KT_OPTIONS_DEBUG) {
1370 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1371 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1372 	}
1373 	return remaining_zones + min_sig - assigned_zones;
1374 }
1375 
1376 __startup_func
1377 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1378 kalloc_type_cmp_type_zones(const void *a, const void *b)
1379 {
1380 	const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1381 	const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1382 
1383 	return (int)(B.nzones - A.nzones);
1384 }
1385 
1386 __startup_func
1387 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1388 kalloc_type_redistribute_budget(
1389 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1390 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1391 {
1392 	uint16_t count = 0, cur_count = 0;
1393 	struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1394 	uint16_t top_zone_total = 0;
1395 
1396 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1397 		uint16_t zones = kt_zones[i];
1398 
1399 		/*
1400 		 * If a sizeclass got no zones but has types to divide make a note
1401 		 * of it
1402 		 */
1403 		if (zones == 0 && (freq_total_list[i] != 0)) {
1404 			count++;
1405 		}
1406 
1407 		sorted_zones[i].nzones = kt_zones[i];
1408 		sorted_zones[i].idx = i;
1409 	}
1410 
1411 	qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1412 	    sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1413 
1414 	for (uint16_t i = 0; i < 3; i++) {
1415 		top_zone_total += sorted_zones[i].nzones;
1416 	}
1417 
1418 	/*
1419 	 * Borrow zones from the top 3 sizeclasses and redistribute to those
1420 	 * that didn't get a zone but that types to divide
1421 	 */
1422 	cur_count = count;
1423 	for (uint16_t i = 0; i < 3; i++) {
1424 		uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1425 		uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1426 
1427 		if (zone_borrow > (zone_available / 2)) {
1428 			zone_borrow = zone_available / 2;
1429 		}
1430 		kt_zones[sorted_zones[i].idx] -= zone_borrow;
1431 		cur_count -= zone_borrow;
1432 	}
1433 
1434 	for (uint16_t i = 0; i < 3; i++) {
1435 		if (cur_count == 0) {
1436 			break;
1437 		}
1438 		kt_zones[sorted_zones[i].idx]--;
1439 		cur_count--;
1440 	}
1441 
1442 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1443 		if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1444 		    (count > cur_count)) {
1445 			kt_zones[i]++;
1446 			count--;
1447 		}
1448 	}
1449 }
1450 
1451 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1452 kalloc_type_apply_policy(
1453 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1454 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1455 	uint16_t            kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1456 	uint16_t            kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1457 	uint16_t            zone_budget)
1458 {
1459 	uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1460 	uint16_t zbudget_type = zone_budget - zbudget_sig;
1461 	uint16_t wasted_zones = 0;
1462 
1463 #if DEBUG || DEVELOPMENT
1464 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1465 		uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1466 
1467 		assert(zone_budget + current_zones <= MAX_ZONES);
1468 	}
1469 #endif
1470 
1471 	wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1472 	    zbudget_sig, 2);
1473 	wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1474 	    kt_zones_type, zbudget_type, 0);
1475 	kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1476 
1477 	/*
1478 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1479 	 */
1480 	if (kt_options & KT_OPTIONS_DEBUG) {
1481 		printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1482 		    "zones_type\n");
1483 		for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1484 			printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1485 			    freq_total_list[i] + freq_list[i], freq_list[i],
1486 			    kt_zones_sig[i] + kt_zones_type[i],
1487 			    kt_zones_sig[i], kt_zones_type[i]);
1488 		}
1489 	}
1490 
1491 	return wasted_zones;
1492 }
1493 
1494 
1495 __startup_func
1496 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1497 kalloc_type_create_zone_for_size(
1498 	zone_t             *kt_zones_for_size,
1499 	uint16_t            kt_zones,
1500 	vm_size_t           z_size)
1501 {
1502 	zone_t p_zone = NULL;
1503 	char *z_name = NULL;
1504 	zone_t shared_z = NULL;
1505 
1506 	for (uint16_t i = 0; i < kt_zones; i++) {
1507 		z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1508 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1509 		    (size_t) z_size);
1510 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1511 		if (i != 0) {
1512 			p_zone->z_kt_next = z;
1513 		}
1514 		p_zone = z;
1515 		kt_zones_for_size[i] = z;
1516 	}
1517 	/*
1518 	 * Create shared zone for sizeclass if it doesn't already exist
1519 	 */
1520 	if (kt_shared_fixed) {
1521 		shared_z = kalloc_zone_for_size(KHEAP_SHARED->kh_zstart, z_size);
1522 		if (zone_elem_inner_size(shared_z) != z_size) {
1523 			z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1524 			snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1525 			    (size_t) z_size);
1526 			shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1527 			    ^(zone_t zone){
1528 				zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_SHARED;
1529 			});
1530 		}
1531 	}
1532 	kt_zones_for_size[kt_zones] = shared_z;
1533 }
1534 
1535 __startup_func
1536 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1537 kalloc_type_zones_for_type(
1538 	uint16_t            zones_total_type,
1539 	uint16_t            unique_types,
1540 	uint16_t            total_types,
1541 	bool                last_sig)
1542 {
1543 	uint16_t zones_for_type = 0, n_mod = 0;
1544 
1545 	if (zones_total_type == 0) {
1546 		return 0;
1547 	}
1548 
1549 	zones_for_type = (zones_total_type * unique_types) / total_types;
1550 	n_mod = (zones_total_type * unique_types) % total_types;
1551 	zone_carry += n_mod;
1552 
1553 	/*
1554 	 * Drain carry opportunistically
1555 	 */
1556 	if (((unique_types > 3) && (zone_carry > 0)) ||
1557 	    (zone_carry >= (int) total_types) ||
1558 	    (last_sig && (zone_carry > 0))) {
1559 		zone_carry -= total_types;
1560 		zones_for_type++;
1561 	}
1562 
1563 	if (last_sig) {
1564 		assert(zone_carry == 0);
1565 	}
1566 
1567 	return zones_for_type;
1568 }
1569 
1570 __startup_func
1571 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1572 kalloc_type_build_skip_list(
1573 	kalloc_type_view_t     *start,
1574 	kalloc_type_view_t     *end,
1575 	uint16_t               *kt_skip_list)
1576 {
1577 	kalloc_type_view_t *cur = start;
1578 	kalloc_type_view_t prev = *start;
1579 	uint16_t i = 0, idx = 0;
1580 
1581 	kt_skip_list[idx] = i;
1582 	idx++;
1583 
1584 	while (cur < end) {
1585 		kalloc_type_view_t kt_cur = *cur;
1586 
1587 		if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1588 			kt_skip_list[idx] = i;
1589 
1590 			prev = kt_cur;
1591 			idx++;
1592 		}
1593 		i++;
1594 		cur++;
1595 	}
1596 
1597 	/*
1598 	 * Final update
1599 	 */
1600 	kt_skip_list[idx] = i;
1601 	return idx;
1602 }
1603 
1604 __startup_func
1605 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1606 kalloc_type_init_sig_eq(
1607 	zone_t             *zones,
1608 	uint16_t            n_zones,
1609 	zone_t              sig_zone)
1610 {
1611 	for (uint16_t i = 0; i < n_zones; i++) {
1612 		zone_t z = zones[i];
1613 
1614 		assert(!zone_get_sig_eq(z));
1615 		zone_set_sig_eq(z, zone_index(sig_zone));
1616 	}
1617 }
1618 
1619 __startup_func
1620 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[32],uint16_t type_zones_start,zone_t sig_zone,zone_t shared_zone)1621 kalloc_type_distribute_zone_for_type(
1622 	kalloc_type_view_t *start,
1623 	kalloc_type_view_t *end,
1624 	bool                last_sig,
1625 	uint16_t            zones_total_type,
1626 	uint16_t            total_types,
1627 	uint16_t           *kt_skip_list,
1628 	zone_t              kt_zones_for_size[32],
1629 	uint16_t            type_zones_start,
1630 	zone_t              sig_zone,
1631 	zone_t              shared_zone)
1632 {
1633 	uint16_t count = 0, n_zones = 0;
1634 	uint16_t *shuffle_buf = NULL;
1635 	zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1636 
1637 	/*
1638 	 * Assert there is space in buffer
1639 	 */
1640 	count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1641 	n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1642 	    last_sig);
1643 	shuffle_buf = &kt_skip_list[count + 1];
1644 
1645 	/*
1646 	 * Initalize signature equivalence zone for type zones
1647 	 */
1648 	kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1649 
1650 	if (n_zones == 0) {
1651 		kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1652 		    shared_zone);
1653 		return n_zones;
1654 	}
1655 
1656 	/*
1657 	 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1658 	 */
1659 	if (count == 1) {
1660 		kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1661 		    shared_zone);
1662 		return n_zones;
1663 	}
1664 
1665 	/*
1666 	 * Add the signature based zone to n_zones
1667 	 */
1668 	n_zones++;
1669 
1670 	for (uint16_t i = 0; i < count; i++) {
1671 		uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1672 		uint16_t type_start = kt_skip_list[i];
1673 		kalloc_type_view_t *kt_type_start = &start[type_start];
1674 		uint16_t type_end = kt_skip_list[i + 1];
1675 		kalloc_type_view_t *kt_type_end = &start[type_end];
1676 		zone_t zone;
1677 
1678 		if (zidx == 0) {
1679 			kmem_shuffle(shuffle_buf, n_zones);
1680 		}
1681 
1682 		shuffled_zidx = shuffle_buf[zidx];
1683 		zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1684 		kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1685 		    shared_zone);
1686 	}
1687 
1688 	return n_zones - 1;
1689 }
1690 
1691 __startup_func
1692 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1693 kalloc_type_create_zones_fixed(
1694 	uint16_t           *kt_skip_list_start,
1695 	uint16_t           *kt_shuffle_buf)
1696 {
1697 	uint16_t *kt_skip_list = kt_skip_list_start;
1698 	uint16_t p_j = 0;
1699 	uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1700 	uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1701 #if DEBUG || DEVELOPMENT
1702 	uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1703 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1704 #endif
1705 	/*
1706 	 * Apply policy to determine how many zones to create for each size
1707 	 * class.
1708 	 */
1709 	kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1710 	    kt_zones_sig, kt_zones_type, kt_fixed_zones);
1711 
1712 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1713 		uint16_t n_unique_sig = kt_freq_list[i];
1714 		vm_size_t z_size = kt_zone_cfg[i];
1715 		uint16_t n_zones_sig = kt_zones_sig[i];
1716 		uint16_t n_zones_type = kt_zones_type[i];
1717 		uint16_t total_types = kt_freq_list_total[i];
1718 		uint16_t type_zones_used = 0;
1719 
1720 		if (n_unique_sig == 0) {
1721 			continue;
1722 		}
1723 
1724 		zone_carry = 0;
1725 		assert(n_zones_sig + n_zones_type + 1 <= 32);
1726 		zone_t kt_zones_for_size[32] = {};
1727 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1728 		    n_zones_sig + n_zones_type, z_size);
1729 
1730 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1731 		/*
1732 		 * Ensure that there is enough space to shuffle n_unique_sig
1733 		 * indices
1734 		 */
1735 		assert(n_unique_sig < kt_shuffle_count);
1736 
1737 		/*
1738 		 * Get a shuffled set of signature indices
1739 		 */
1740 		*kt_shuffle_buf = 0;
1741 		if (n_unique_sig > 1) {
1742 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1743 		}
1744 
1745 		for (uint16_t j = 0; j < n_zones_sig; j++) {
1746 			zone_t *z_ptr = &kt_zones_for_size[j];
1747 
1748 			kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1749 		}
1750 
1751 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1752 			/*
1753 			 * For every size that has unique types
1754 			 */
1755 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1756 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1757 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1758 			zone_t zone = kt_zones_for_size[j % n_zones_sig];
1759 			zone_t shared_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1760 			bool last_sig;
1761 
1762 			last_sig = (j == (n_unique_sig - 1)) ? true : false;
1763 			type_zones_used += kalloc_type_distribute_zone_for_type(
1764 				&kt_buffer[cur].ktv_fixed,
1765 				&kt_buffer[end].ktv_fixed, last_sig,
1766 				n_zones_type, total_types + n_unique_sig,
1767 				&kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1768 				n_zones_sig + type_zones_used, zone, shared_zone);
1769 		}
1770 		assert(type_zones_used <= n_zones_type);
1771 		p_j += n_unique_sig;
1772 	}
1773 }
1774 
1775 __startup_func
1776 static void
kalloc_type_view_init_fixed(void)1777 kalloc_type_view_init_fixed(void)
1778 {
1779 	kalloc_type_hash_seed = (uint32_t) early_random();
1780 	kalloc_type_build_dlut();
1781 	/*
1782 	 * Parse __kalloc_type sections and build array of pointers to
1783 	 * all kalloc type views in kt_buffer.
1784 	 */
1785 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1786 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1787 
1788 #if DEBUG || DEVELOPMENT
1789 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1790 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1791 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1792 #endif
1793 
1794 	/*
1795 	 * Sort based on size class and signature
1796 	 */
1797 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1798 	    kalloc_type_cmp_fixed);
1799 
1800 	/*
1801 	 * Build a skip list that holds starts of unique signatures and a
1802 	 * frequency list of number of unique and total signatures per kalloc
1803 	 * size class
1804 	 */
1805 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1806 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1807 		kt_skip_list_start, kt_count);
1808 
1809 	/*
1810 	 * Create zones based on signatures
1811 	 */
1812 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1813 }
1814 
1815 __startup_func
1816 static void
kalloc_type_heap_init(void)1817 kalloc_type_heap_init(void)
1818 {
1819 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1820 	char kh_name[MAX_ZONE_NAME];
1821 	uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1822 
1823 	for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1824 		snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1825 		kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1826 		    &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1827 	}
1828 	/*
1829 	 * All variable kalloc type allocations are collapsed into a single
1830 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1831 	 */
1832 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1833 	zone_view_count += 1;
1834 }
1835 
1836 __startup_func
1837 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1838 kalloc_type_assign_heap(
1839 	uint32_t            start,
1840 	uint32_t            end,
1841 	uint32_t            heap_id)
1842 {
1843 	bool use_split = kmem_get_random16(1);
1844 
1845 	if (use_split) {
1846 		heap_id = kt_var_heaps;
1847 	}
1848 	kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1849 	    &kt_buffer[end].ktv_var, heap_id);
1850 }
1851 
1852 __startup_func
1853 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1854 kalloc_type_split_heap(
1855 	uint32_t            start,
1856 	uint32_t            end,
1857 	uint32_t            heap_id)
1858 {
1859 	uint32_t count = start;
1860 	const char *p_name = NULL;
1861 
1862 	while (count < end) {
1863 		kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1864 		const char *c_name = cur->kt_name;
1865 
1866 		if (!p_name) {
1867 			assert(count == start);
1868 			p_name = c_name;
1869 		}
1870 		if (strcmp(c_name, p_name) != 0) {
1871 			kalloc_type_assign_heap(start, count, heap_id);
1872 			start = count;
1873 			p_name = c_name;
1874 		}
1875 		count++;
1876 	}
1877 	kalloc_type_assign_heap(start, end, heap_id);
1878 }
1879 
1880 __startup_func
1881 static void
kalloc_type_view_init_var(void)1882 kalloc_type_view_init_var(void)
1883 {
1884 	uint32_t buf_start = 0, unique_sig = 0;
1885 	uint32_t *kt_skip_list_start;
1886 	uint16_t *shuffle_buf;
1887 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
1888 	uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
1889 	/*
1890 	 * Pick a random heap to split
1891 	 */
1892 	uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
1893 
1894 	/*
1895 	 * Zones are created prior to parsing the views as zone budget is fixed
1896 	 * per sizeclass and special types identified while parsing are redirected
1897 	 * as they are discovered.
1898 	 */
1899 	kalloc_type_heap_init();
1900 
1901 	/*
1902 	 * Parse __kalloc_var sections and build array of pointers to views that
1903 	 * aren't rediected in kt_buffer.
1904 	 */
1905 	kt_count = kalloc_type_view_parse(KTV_VAR);
1906 	assert(kt_count < UINT32_MAX);
1907 
1908 #if DEBUG || DEVELOPMENT
1909 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
1910 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
1911 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1912 #endif
1913 
1914 	/*
1915 	 * Sort based on size class and signature
1916 	 */
1917 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
1918 	    kalloc_type_cmp_var);
1919 
1920 	buf_start = kalloc_type_handle_parray_var();
1921 
1922 	/*
1923 	 * Build a skip list that holds starts of unique signatures
1924 	 */
1925 	kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
1926 	unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
1927 	    buf_start);
1928 	shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
1929 	/*
1930 	 * If we have only one heap then other elements share heap with pointer
1931 	 * arrays
1932 	 */
1933 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
1934 		panic("kt_var_heaps is too small");
1935 	}
1936 
1937 	kmem_shuffle(shuffle_buf, flex_heap_count);
1938 	/*
1939 	 * The index of the heap we decide to split is placed twice in the shuffle
1940 	 * buffer so that it gets twice the number of signatures that we split
1941 	 * evenly
1942 	 */
1943 	shuffle_buf[flex_heap_count] = split_heap;
1944 	split_heap += (fixed_heaps + 1);
1945 
1946 	for (uint32_t i = 1; i <= unique_sig; i++) {
1947 		uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
1948 		    fixed_heaps + 1;
1949 		uint32_t start = kt_skip_list_start[i - 1];
1950 		uint32_t end = kt_skip_list_start[i];
1951 
1952 		assert(heap_id <= kt_var_heaps);
1953 		if (heap_id == split_heap) {
1954 			kalloc_type_split_heap(start, end, heap_id);
1955 			continue;
1956 		}
1957 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1958 		    &kt_buffer[end].ktv_var, heap_id);
1959 	}
1960 }
1961 
1962 __startup_func
1963 static void
kalloc_init(void)1964 kalloc_init(void)
1965 {
1966 	/*
1967 	 * Allocate scratch space to parse kalloc_type_views and create
1968 	 * other structures necessary to process them.
1969 	 */
1970 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
1971 
1972 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
1973 	kalloc_zsize_compute();
1974 
1975 	/* Initialize kalloc data buffers heap */
1976 	kalloc_heap_init(KHEAP_DATA_BUFFERS);
1977 
1978 	/* Initialize kalloc shared buffers heap */
1979 	kalloc_heap_init(KHEAP_SHARED);
1980 
1981 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
1982 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_KALLOC);
1983 
1984 	/*
1985 	 * Handle fixed size views
1986 	 */
1987 	kalloc_type_view_init_fixed();
1988 
1989 	/*
1990 	 * Reset
1991 	 */
1992 	bzero(kt_buffer, kt_scratch_size);
1993 	kt_count = max_count;
1994 
1995 	/*
1996 	 * Handle variable size views
1997 	 */
1998 	kalloc_type_view_init_var();
1999 
2000 	/*
2001 	 * Free resources used
2002 	 */
2003 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2004 }
2005 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2006 
2007 #pragma mark accessors
2008 
2009 #define KFREE_ABSURD_SIZE \
2010 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2011 
2012 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2013 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2014 {
2015 	thread_t thr = current_thread();
2016 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2017 }
2018 
2019 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2020 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2021 {
2022 	thread_t thr = current_thread();
2023 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2024 }
2025 
2026 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2027 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2028 {
2029 	kmem_guard_t guard = {
2030 		.kmg_atomic      = true,
2031 		.kmg_tag         = tag,
2032 		.kmg_type_hash   = type_hash,
2033 		.kmg_context     = os_hash_kernel_pointer(owner),
2034 	};
2035 
2036 	/*
2037 	 * TODO: this use is really not sufficiently smart.
2038 	 */
2039 
2040 	return guard;
2041 }
2042 
2043 #if __arm64e__ || KASAN_TBI
2044 
2045 #if __arm64e__
2046 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2047 
2048 /*
2049  * Zone encoding is:
2050  *
2051  *   <PAC SIG><1><1><PTR value><5 bits of size class>
2052  *
2053  * VM encoding is:
2054  *
2055  *   <PAC SIG><1><0><PTR value><14 bits of page count>
2056  *
2057  * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2058  * so that PAC authentication extends the proper sign bit.
2059  */
2060 
2061 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2062 #else
2063 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2064 
2065 /*
2066  * Zone encoding is:
2067  *
2068  *   <TBI><1><PTR value><5 bits of size class>
2069  *
2070  * VM encoding is:
2071  *
2072  *   <TBI><0><PTR value><14 bits of page count>
2073  */
2074 
2075 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2076 #endif
2077 
2078 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2079 
2080 __attribute__((always_inline))
2081 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2082 __kalloc_array_decode(vm_address_t ptr)
2083 {
2084 	struct kalloc_result kr;
2085 	vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2086 
2087 	if (ptr & zone_mask) {
2088 		kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2089 		ptr &= ~0x1full;
2090 	} else if (__probable(ptr)) {
2091 		kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2092 		ptr &= ~PAGE_MASK;
2093 		ptr |= zone_mask;
2094 	} else {
2095 		kr.size = 0;
2096 	}
2097 
2098 	kr.addr = (void *)ptr;
2099 	return kr;
2100 }
2101 
2102 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2103 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2104 {
2105 	return (void *)((vm_address_t)ptr | z->z_array_size_class);
2106 }
2107 
2108 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2109 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2110 {
2111 	addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2112 
2113 	return addr | atop(size);
2114 }
2115 
2116 #else
2117 
2118 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2119 
2120 /*
2121  * Encoding is:
2122  * bits  0..46: pointer value
2123  * bits 47..47: 0: zones, 1: VM
2124  * bits 48..63: zones: elem size, VM: number of pages
2125  */
2126 
2127 #define KALLOC_ARRAY_TYPE_BIT   47
2128 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2129 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2130 
2131 __attribute__((always_inline))
2132 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2133 __kalloc_array_decode(vm_address_t ptr)
2134 {
2135 	struct kalloc_result kr;
2136 	uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2137 
2138 	kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2139 	if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2140 		kr.size <<= PAGE_SHIFT;
2141 	}
2142 	/* sign extend, so that it also works with NULL */
2143 	kr.addr = (void *)((long)(ptr << shift) >> shift);
2144 
2145 	return kr;
2146 }
2147 
2148 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2149 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2150 {
2151 	vm_address_t addr = (vm_address_t)ptr;
2152 
2153 	addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2154 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2155 
2156 	return (void *)addr;
2157 }
2158 
2159 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2160 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2161 {
2162 	addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2163 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2164 
2165 	return addr;
2166 }
2167 
2168 #endif
2169 
2170 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2171 kalloc_next_good_size(vm_size_t size, uint32_t period)
2172 {
2173 	uint32_t scale = kalloc_log2down((uint32_t)size);
2174 	vm_size_t step, size_class;
2175 
2176 	if (size < KHEAP_STEP_START) {
2177 		return KHEAP_STEP_START;
2178 	}
2179 	if (size < 2 * KHEAP_STEP_START) {
2180 		return 2 * KHEAP_STEP_START;
2181 	}
2182 
2183 	if (size < KHEAP_MAX_SIZE) {
2184 		step = 1ul << (scale - 1);
2185 	} else {
2186 		step = round_page(1ul << (scale - kalloc_log2down(period)));
2187 	}
2188 
2189 	size_class = (size + step) & -step;
2190 #if KASAN_CLASSIC
2191 	if (size > K_SIZE_CLASS(size_class)) {
2192 		return kalloc_next_good_size(size_class, period);
2193 	}
2194 	size_class = K_SIZE_CLASS(size_class);
2195 #endif
2196 	return size_class;
2197 }
2198 
2199 
2200 #pragma mark kalloc
2201 
2202 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_var_view_t kt_view,bool kt_free __unused)2203 kalloc_type_get_heap(kalloc_type_var_view_t kt_view, bool kt_free __unused)
2204 {
2205 	/*
2206 	 * Redirect data-only views
2207 	 */
2208 	if (kalloc_type_is_data(kt_view->kt_flags)) {
2209 		return KHEAP_DATA_BUFFERS;
2210 	}
2211 
2212 	if (kt_view->kt_flags & KT_PROCESSED) {
2213 		return KHEAP_KT_VAR;
2214 	}
2215 
2216 	return KHEAP_DEFAULT;
2217 }
2218 
2219 __attribute__((noinline))
2220 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2221 kalloc_large(
2222 	kalloc_heap_t         kheap,
2223 	vm_size_t             req_size,
2224 	zalloc_flags_t        flags,
2225 	uint16_t              kt_hash,
2226 	void                 *owner __unused)
2227 {
2228 	kma_flags_t kma_flags = KMA_KASAN_GUARD | KMA_TAG;
2229 	vm_tag_t tag;
2230 	vm_offset_t addr, size;
2231 
2232 	if (flags & Z_NOFAIL) {
2233 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2234 		    (size_t)req_size);
2235 	}
2236 
2237 	/*
2238 	 * kmem_alloc could block so we return if noblock
2239 	 *
2240 	 * also, reject sizes larger than our address space is quickly,
2241 	 * as kt_size or IOMallocArraySize() expect this.
2242 	 */
2243 	if ((flags & Z_NOWAIT) ||
2244 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2245 		return (struct kalloc_result){ };
2246 	}
2247 
2248 	if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2249 		return (struct kalloc_result){ };
2250 	}
2251 
2252 	/*
2253 	 * (73465472) on Intel we didn't use to pass this flag,
2254 	 * which in turned allowed kalloc_large() memory to be shared
2255 	 * with user directly.
2256 	 *
2257 	 * We're bound by this unfortunate ABI.
2258 	 */
2259 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2260 #ifndef __x86_64__
2261 		kma_flags |= KMA_KOBJECT;
2262 #endif
2263 	} else {
2264 		assert(kheap == KHEAP_DATA_BUFFERS);
2265 		kma_flags &= ~KMA_TAG;
2266 	}
2267 	if (flags & Z_NOPAGEWAIT) {
2268 		kma_flags |= KMA_NOPAGEWAIT;
2269 	}
2270 	if (flags & Z_ZERO) {
2271 		kma_flags |= KMA_ZERO;
2272 	}
2273 	if (kheap == KHEAP_DATA_BUFFERS) {
2274 		kma_flags |= KMA_DATA;
2275 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2276 		kma_flags |= KMA_SPRAYQTN;
2277 	}
2278 
2279 	tag = zalloc_flags_get_tag(flags);
2280 	if (flags & Z_VM_TAG_BT_BIT) {
2281 		tag = vm_tag_bt() ?: tag;
2282 	}
2283 	if (tag == VM_KERN_MEMORY_NONE) {
2284 		tag = kheap->kh_tag;
2285 	}
2286 
2287 	size = round_page(req_size);
2288 	if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2289 		req_size = round_page(size);
2290 	}
2291 
2292 	addr = kmem_alloc_guard(kernel_map, req_size, 0,
2293 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2294 
2295 	if (addr != 0) {
2296 		counter_inc(&kalloc_large_count);
2297 		counter_add(&kalloc_large_total, size);
2298 		KALLOC_ZINFO_SALLOC(size);
2299 		if (flags & Z_KALLOC_ARRAY) {
2300 			addr = __kalloc_array_encode_vm(addr, req_size);
2301 		}
2302 	} else {
2303 		addr = 0;
2304 	}
2305 
2306 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2307 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2308 }
2309 
2310 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2311 kalloc_zone(
2312 	zone_t                  z,
2313 	zone_stats_t            zstats,
2314 	zalloc_flags_t          flags,
2315 	vm_size_t               req_size)
2316 {
2317 	struct kalloc_result kr;
2318 	vm_size_t esize;
2319 
2320 	kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2321 	esize = kr.size;
2322 
2323 	if (__probable(kr.addr)) {
2324 		if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2325 			req_size = esize;
2326 		} else {
2327 			kr.size = req_size;
2328 		}
2329 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2330 		kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2331 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2332 #if KASAN_CLASSIC
2333 		kasan_alloc((vm_offset_t)kr.addr, esize, kr.size,
2334 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
2335 #endif /* KASAN_CLASSIC */
2336 #if KASAN_TBI
2337 		/*
2338 		 * Kasan-TBI at least needs to tag one byte so that
2339 		 * we can prove the allocation was live at kfree_ext()
2340 		 * time by doing a manual __asan_loadN check.
2341 		 */
2342 		kr.addr = (void *)kasan_tbi_tag_zalloc((vm_offset_t)kr.addr,
2343 		    esize, kr.size ?: 1, false);
2344 #endif /* KASAN_TBI */
2345 
2346 		if (flags & Z_KALLOC_ARRAY) {
2347 			kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2348 		}
2349 	}
2350 
2351 	DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2352 	return kr;
2353 }
2354 
2355 static zone_id_t
kalloc_use_shared_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2356 kalloc_use_shared_heap(
2357 	kalloc_heap_t           kheap,
2358 	zone_stats_t            zstats,
2359 	zone_id_t               zstart,
2360 	zalloc_flags_t         *flags)
2361 {
2362 	if (kheap != KHEAP_DATA_BUFFERS) {
2363 		zone_stats_t zstats_cpu = zpercpu_get(zstats);
2364 
2365 		if (os_atomic_load(&zstats_cpu->zs_alloc_not_shared, relaxed) == 0) {
2366 			*flags |= Z_SET_NOTSHARED;
2367 			return KHEAP_SHARED->kh_zstart;
2368 		}
2369 	}
2370 
2371 	return zstart;
2372 }
2373 
2374 #undef kalloc_ext
2375 
2376 struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2377 kalloc_ext(
2378 	void                   *kheap_or_kt_view,
2379 	vm_size_t               size,
2380 	zalloc_flags_t          flags,
2381 	void                   *owner)
2382 {
2383 	kalloc_type_var_view_t kt_view;
2384 	kalloc_heap_t kheap;
2385 	zone_stats_t zstats = NULL;
2386 	zone_t z;
2387 	uint16_t kt_hash;
2388 	zone_id_t zstart;
2389 
2390 	if (kt_is_var_view(kheap_or_kt_view)) {
2391 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2392 		kheap   = kalloc_type_get_heap(kt_view, false);
2393 		/*
2394 		 * Use stats from view if present, else use stats from kheap.
2395 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2396 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2397 		 * use stats from the respective zones.
2398 		 */
2399 		zstats  = kt_view->kt_stats;
2400 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2401 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
2402 	} else {
2403 		kt_view = NULL;
2404 		kheap   = kheap_or_kt_view;
2405 		kt_hash = kheap->kh_type_hash;
2406 		zstart  = kheap->kh_zstart;
2407 	}
2408 
2409 	if (!zstats) {
2410 		zstats = kheap->kh_stats;
2411 	}
2412 
2413 	zstart = kalloc_use_shared_heap(kheap, zstats, zstart, &flags);
2414 	z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2415 	if (z) {
2416 		return kalloc_zone(z, zstats, flags, size);
2417 	} else {
2418 		return kalloc_large(kheap, size, flags, kt_hash, owner);
2419 	}
2420 }
2421 
2422 #if XNU_PLATFORM_MacOSX
2423 void *
2424 kalloc_external(vm_size_t size);
2425 void *
kalloc_external(vm_size_t size)2426 kalloc_external(vm_size_t size)
2427 {
2428 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2429 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2430 }
2431 #endif /* XNU_PLATFORM_MacOSX */
2432 
2433 void *
2434 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2435 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2436 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2437 {
2438 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2439 	return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags);
2440 }
2441 
2442 __abortlike
2443 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2444 kalloc_data_require_panic(void *addr, vm_size_t size)
2445 {
2446 	zone_id_t zid = zone_id_for_element(addr, size);
2447 
2448 	if (zid != ZONE_ID_INVALID) {
2449 		zone_t z = &zone_array[zid];
2450 		zone_security_flags_t zsflags = zone_security_array[zid];
2451 
2452 		if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
2453 			panic("kalloc_data_require failed: address %p in [%s%s]",
2454 			    addr, zone_heap_name(z), zone_name(z));
2455 		}
2456 
2457 		panic("kalloc_data_require failed: address %p in [%s%s], "
2458 		    "size too large %zd > %zd", addr,
2459 		    zone_heap_name(z), zone_name(z),
2460 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2461 	} else {
2462 		panic("kalloc_data_require failed: address %p not in zone native map",
2463 		    addr);
2464 	}
2465 }
2466 
2467 __abortlike
2468 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2469 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2470 {
2471 	zone_id_t zid = zone_id_for_element(addr, size);
2472 
2473 	if (zid != ZONE_ID_INVALID) {
2474 		zone_t z = &zone_array[zid];
2475 		zone_security_flags_t zsflags = zone_security_array[zid];
2476 
2477 		switch (zsflags.z_kheap_id) {
2478 		case KHEAP_ID_NONE:
2479 		case KHEAP_ID_DATA_BUFFERS:
2480 		case KHEAP_ID_KT_VAR:
2481 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2482 			    addr, zone_heap_name(z), zone_name(z));
2483 		default:
2484 			break;
2485 		}
2486 
2487 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2488 		    "size too large %zd > %zd", addr,
2489 		    zone_heap_name(z), zone_name(z),
2490 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2491 	} else {
2492 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2493 		    addr);
2494 	}
2495 }
2496 
2497 void
kalloc_data_require(void * addr,vm_size_t size)2498 kalloc_data_require(void *addr, vm_size_t size)
2499 {
2500 	zone_id_t zid = zone_id_for_element(addr, size);
2501 
2502 	if (zid != ZONE_ID_INVALID) {
2503 		zone_t z = &zone_array[zid];
2504 		zone_security_flags_t zsflags = zone_security_array[zid];
2505 		if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS &&
2506 		    size <= zone_elem_inner_size(z)) {
2507 			return;
2508 		}
2509 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2510 	    (vm_address_t)pgz_decode(addr, size), size)) {
2511 		return;
2512 	}
2513 
2514 	kalloc_data_require_panic(addr, size);
2515 }
2516 
2517 void
kalloc_non_data_require(void * addr,vm_size_t size)2518 kalloc_non_data_require(void *addr, vm_size_t size)
2519 {
2520 	zone_id_t zid = zone_id_for_element(addr, size);
2521 
2522 	if (zid != ZONE_ID_INVALID) {
2523 		zone_t z = &zone_array[zid];
2524 		zone_security_flags_t zsflags = zone_security_array[zid];
2525 		switch (zsflags.z_kheap_id) {
2526 		case KHEAP_ID_NONE:
2527 			if (!zsflags.z_kalloc_type) {
2528 				break;
2529 			}
2530 			OS_FALLTHROUGH;
2531 		case KHEAP_ID_KT_VAR:
2532 			if (size < zone_elem_inner_size(z)) {
2533 				return;
2534 			}
2535 			break;
2536 		default:
2537 			break;
2538 		}
2539 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2540 	    (vm_address_t)pgz_decode(addr, size), size)) {
2541 		return;
2542 	}
2543 
2544 	kalloc_non_data_require_panic(addr, size);
2545 }
2546 
2547 void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2548 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2549 {
2550 	/*
2551 	 * Callsites from a kext that aren't in the BootKC on macOS or
2552 	 * any callsites on armv7 are not processed during startup,
2553 	 * default to using kheap_alloc
2554 	 *
2555 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2556 	 * NULL as we need to use the vm for the allocation
2557 	 *
2558 	 */
2559 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2560 		vm_size_t size = kalloc_type_get_size(kt_view->kt_size);
2561 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2562 		return kalloc_ext(KHEAP_DEFAULT, size, flags, NULL).addr;
2563 	}
2564 
2565 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2566 	return kalloc_type_impl(kt_view, flags);
2567 }
2568 
2569 void *
2570 kalloc_type_var_impl_external(
2571 	kalloc_type_var_view_t  kt_view,
2572 	vm_size_t               size,
2573 	zalloc_flags_t          flags,
2574 	void                   *owner);
2575 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2576 kalloc_type_var_impl_external(
2577 	kalloc_type_var_view_t  kt_view,
2578 	vm_size_t               size,
2579 	zalloc_flags_t          flags,
2580 	void                   *owner)
2581 {
2582 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2583 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2584 }
2585 
2586 #pragma mark kfree
2587 
2588 __abortlike
2589 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2590 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2591 {
2592 	zone_security_flags_t zsflags = zone_security_config(z);
2593 	const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2594 
2595 	if (zsflags.z_kalloc_type) {
2596 		panic_include_kalloc_types = true;
2597 		kalloc_type_src_zone = z;
2598 		panic("kfree: addr %p found in kalloc type zone '%s'"
2599 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2600 	}
2601 
2602 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2603 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2604 		    data, size, zone_heap_name(z), z->z_name);
2605 	} else {
2606 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2607 		    data, size, zone_heap_name(z), kheap_name);
2608 	}
2609 }
2610 
2611 __abortlike
2612 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2613 kfree_size_confusion_panic(zone_t z, void *data,
2614     size_t oob_offs, size_t size, size_t zsize)
2615 {
2616 	if (z) {
2617 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2618 		    "with elem_size %zd",
2619 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2620 	} else {
2621 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2622 		    data, size, oob_offs);
2623 	}
2624 }
2625 
2626 __abortlike
2627 static void
kfree_size_invalid_panic(void * data,size_t size)2628 kfree_size_invalid_panic(void *data, size_t size)
2629 {
2630 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2631 	    data, size);
2632 }
2633 
2634 __abortlike
2635 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2636 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2637     size_t max_size)
2638 {
2639 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2640 	    data, size, min_size, max_size);
2641 }
2642 
2643 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2644 kfree_size_require(
2645 	kalloc_heap_t kheap,
2646 	void *addr,
2647 	vm_size_t min_size,
2648 	vm_size_t max_size)
2649 {
2650 	assert3u(min_size, <=, max_size);
2651 	zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2652 	vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2653 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2654 	if (elem_size > max_zone_size || elem_size < min_size) {
2655 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2656 	}
2657 }
2658 
2659 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2660 kfree_large(
2661 	vm_offset_t             addr,
2662 	vm_size_t               size,
2663 	kmf_flags_t             flags,
2664 	void                   *owner)
2665 {
2666 	size = kmem_free_guard(kernel_map, addr, size,
2667 	    flags | KMF_TAG | KMF_KASAN_GUARD,
2668 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2669 
2670 	counter_dec(&kalloc_large_count);
2671 	counter_add(&kalloc_large_total, -(uint64_t)size);
2672 	KALLOC_ZINFO_SFREE(size);
2673 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2674 }
2675 
2676 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2677 kfree_zone(
2678 	void                   *kheap_or_kt_view __unsafe_indexable,
2679 	void                   *data,
2680 	vm_size_t               size,
2681 	zone_t                  z,
2682 	vm_size_t               zsize)
2683 {
2684 	zone_security_flags_t zsflags = zone_security_config(z);
2685 	kalloc_type_var_view_t kt_view;
2686 	kalloc_heap_t kheap;
2687 	zone_stats_t zstats = NULL;
2688 
2689 	if (kt_is_var_view(kheap_or_kt_view)) {
2690 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2691 		kheap   = kalloc_type_get_heap(kt_view, true);
2692 		/*
2693 		 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2694 		 * we will end up having incorrect stats. Cross frees may happen on
2695 		 * macOS due to allocation from an unprocessed view and free from
2696 		 * a processed view or vice versa.
2697 		 */
2698 		zstats  = kt_view->kt_stats;
2699 	} else {
2700 		kt_view = NULL;
2701 		kheap   = kheap_or_kt_view;
2702 	}
2703 
2704 	if (!zstats) {
2705 		zstats = kheap->kh_stats;
2706 	}
2707 
2708 	zsflags = zone_security_config(z);
2709 	if (kheap == KHEAP_DATA_BUFFERS) {
2710 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2711 			kfree_heap_confusion_panic(kheap, data, size, z);
2712 		}
2713 	} else {
2714 		if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2715 		    (zsflags.z_kheap_id != KHEAP_ID_SHARED)) {
2716 			kfree_heap_confusion_panic(kheap, data, size, z);
2717 		}
2718 	}
2719 
2720 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2721 
2722 	/* needs to be __nosan because the user size might be partial */
2723 	__nosan_bzero(data, zsize);
2724 	zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2725 }
2726 
2727 void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2728 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2729 {
2730 	vm_size_t bucket_size;
2731 	zone_t z;
2732 
2733 	if (data == NULL) {
2734 		return;
2735 	}
2736 
2737 	if (size > KFREE_ABSURD_SIZE) {
2738 		kfree_size_invalid_panic(data, size);
2739 	}
2740 
2741 	if (size <= KHEAP_MAX_SIZE) {
2742 		vm_size_t oob_offs;
2743 
2744 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2745 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2746 			kfree_size_confusion_panic(z, data,
2747 			    oob_offs, size, bucket_size);
2748 		}
2749 
2750 		data = (char *)data - oob_offs;
2751 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2752 	} else {
2753 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2754 	}
2755 }
2756 
2757 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2758 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2759 {
2760 	vm_offset_t oob_offs;
2761 	vm_size_t size, usize = 0;
2762 	zone_t z;
2763 
2764 	if (data == NULL) {
2765 		return;
2766 	}
2767 
2768 	size = zone_element_size(data, &z, true, &oob_offs);
2769 	if (size) {
2770 #if KASAN_CLASSIC
2771 		usize = kasan_user_size((vm_offset_t)data);
2772 #endif
2773 		data = (char *)data - oob_offs;
2774 		kfree_zone(kheap, data, usize, z, size);
2775 	} else {
2776 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2777 	}
2778 }
2779 
2780 #if XNU_PLATFORM_MacOSX
2781 void
2782 kfree_external(void *addr, vm_size_t size);
2783 void
kfree_external(void * addr,vm_size_t size)2784 kfree_external(void *addr, vm_size_t size)
2785 {
2786 	kalloc_heap_t kheap = KHEAP_DEFAULT;
2787 
2788 	kfree_ext(kheap, addr, size);
2789 }
2790 #endif /* XNU_PLATFORM_MacOSX */
2791 
2792 void
2793 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2794     vm_size_t min_sz, vm_size_t max_sz)
2795 {
2796 	if (__improbable(addr == NULL)) {
2797 		return;
2798 	}
2799 	kfree_size_require(kheap, addr, min_sz, max_sz);
2800 	kfree_addr_ext(kheap, addr);
2801 }
2802 
2803 void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)2804 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2805 {
2806 	zone_stats_t zs = kt_view->kt_zv.zv_stats;
2807 	zone_t       z  = kt_view->kt_zv.zv_zone;
2808 	zone_stats_t zs_cpu = zpercpu_get(zs);
2809 
2810 	if ((flags & Z_SET_NOTSHARED) ||
2811 	    os_atomic_load(&zs_cpu->zs_alloc_not_shared, relaxed)) {
2812 		return zalloc_ext(z, zs, flags).addr;
2813 	}
2814 
2815 	assert(zone_security_config(z).z_kheap_id != KHEAP_ID_DATA_BUFFERS);
2816 	return zalloc_ext(kt_view->kt_zshared, zs, flags | Z_SET_NOTSHARED).addr;
2817 }
2818 
2819 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)2820 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
2821 {
2822 	/*
2823 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
2824 	 * processed during startup so default to using kheap_alloc
2825 	 *
2826 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2827 	 * NULL as we need to use the vm for the allocation/free
2828 	 */
2829 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2830 		return kheap_free(KHEAP_DEFAULT, ptr,
2831 		           kalloc_type_get_size(kt_view->kt_size));
2832 	}
2833 	return kfree_type_impl(kt_view, ptr);
2834 }
2835 
2836 void
2837 kfree_type_var_impl_external(
2838 	kalloc_type_var_view_t  kt_view,
2839 	void                   *ptr,
2840 	vm_size_t               size);
2841 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)2842 kfree_type_var_impl_external(
2843 	kalloc_type_var_view_t  kt_view,
2844 	void                   *ptr,
2845 	vm_size_t               size)
2846 {
2847 	return kfree_type_var_impl(kt_view, ptr, size);
2848 }
2849 
2850 void
2851 kfree_data_external(void *ptr, vm_size_t size);
2852 void
kfree_data_external(void * ptr,vm_size_t size)2853 kfree_data_external(void *ptr, vm_size_t size)
2854 {
2855 	return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2856 }
2857 
2858 void
2859 kfree_data_addr_external(void *ptr);
2860 void
kfree_data_addr_external(void * ptr)2861 kfree_data_addr_external(void *ptr)
2862 {
2863 	return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2864 }
2865 
2866 #pragma mark krealloc
2867 
2868 __abortlike
2869 static void
krealloc_size_invalid_panic(void * data,size_t size)2870 krealloc_size_invalid_panic(void *data, size_t size)
2871 {
2872 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
2873 	    data, size);
2874 }
2875 
2876 __attribute__((noinline))
2877 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2878 krealloc_large(
2879 	kalloc_heap_t         kheap,
2880 	vm_offset_t           addr,
2881 	vm_size_t             old_size,
2882 	vm_size_t             new_size,
2883 	zalloc_flags_t        flags,
2884 	uint16_t              kt_hash,
2885 	void                 *owner __unused)
2886 {
2887 	kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_TAG | KMR_KASAN_GUARD;
2888 	vm_size_t new_req_size = new_size;
2889 	vm_size_t old_req_size = old_size;
2890 	uint64_t delta;
2891 	kmem_return_t kmr;
2892 	vm_tag_t tag;
2893 
2894 	if (flags & Z_NOFAIL) {
2895 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2896 		    (size_t)new_req_size);
2897 	}
2898 
2899 	/*
2900 	 * kmem_alloc could block so we return if noblock
2901 	 *
2902 	 * also, reject sizes larger than our address space is quickly,
2903 	 * as kt_size or IOMallocArraySize() expect this.
2904 	 */
2905 	if ((flags & Z_NOWAIT) ||
2906 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2907 		return (struct kalloc_result){ };
2908 	}
2909 
2910 	/*
2911 	 * (73465472) on Intel we didn't use to pass this flag,
2912 	 * which in turned allowed kalloc_large() memory to be shared
2913 	 * with user directly.
2914 	 *
2915 	 * We're bound by this unfortunate ABI.
2916 	 */
2917 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2918 #ifndef __x86_64__
2919 		kmr_flags |= KMR_KOBJECT;
2920 #endif
2921 	} else {
2922 		assert(kheap == KHEAP_DATA_BUFFERS);
2923 		kmr_flags &= ~KMR_TAG;
2924 	}
2925 	if (flags & Z_NOPAGEWAIT) {
2926 		kmr_flags |= KMR_NOPAGEWAIT;
2927 	}
2928 	if (flags & Z_ZERO) {
2929 		kmr_flags |= KMR_ZERO;
2930 	}
2931 	if (kheap == KHEAP_DATA_BUFFERS) {
2932 		kmr_flags |= KMR_DATA;
2933 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2934 		kmr_flags |= KMR_SPRAYQTN;
2935 	}
2936 	if (flags & Z_REALLOCF) {
2937 		kmr_flags |= KMR_REALLOCF;
2938 	}
2939 
2940 	tag = zalloc_flags_get_tag(flags);
2941 	if (flags & Z_VM_TAG_BT_BIT) {
2942 		tag = vm_tag_bt() ?: tag;
2943 	}
2944 	if (tag == VM_KERN_MEMORY_NONE) {
2945 		tag = kheap->kh_tag;
2946 	}
2947 
2948 	kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
2949 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
2950 
2951 	new_size = round_page(new_req_size);
2952 	old_size = round_page(old_req_size);
2953 
2954 	if (kmr.kmr_address != 0) {
2955 		delta = (uint64_t)(new_size - old_size);
2956 	} else if (flags & Z_REALLOCF) {
2957 		counter_dec(&kalloc_large_count);
2958 		delta = (uint64_t)(-old_size);
2959 	} else {
2960 		delta = 0;
2961 	}
2962 
2963 	counter_add(&kalloc_large_total, delta);
2964 	KALLOC_ZINFO_SALLOC(delta);
2965 
2966 	if (addr != 0 || (flags & Z_REALLOCF)) {
2967 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
2968 		    void*, addr);
2969 	}
2970 	if (__improbable(kmr.kmr_address == 0)) {
2971 		return (struct kalloc_result){ };
2972 	}
2973 
2974 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
2975 	    void*, kmr.kmr_address);
2976 
2977 	if (flags & Z_KALLOC_ARRAY) {
2978 		kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
2979 		    new_req_size);
2980 	}
2981 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
2982 }
2983 
2984 #undef krealloc_ext
2985 
2986 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)2987 krealloc_ext(
2988 	void                 *kheap_or_kt_view __unsafe_indexable,
2989 	void                 *addr,
2990 	vm_size_t             old_size,
2991 	vm_size_t             new_size,
2992 	zalloc_flags_t        flags,
2993 	void                 *owner)
2994 {
2995 	vm_size_t old_bucket_size, new_bucket_size, min_size;
2996 	kalloc_type_var_view_t kt_view;
2997 	kalloc_heap_t kheap;
2998 	zone_stats_t zstats = NULL;
2999 	struct kalloc_result kr;
3000 	vm_offset_t oob_offs = 0;
3001 	zone_t old_z, new_z;
3002 	uint16_t kt_hash = 0;
3003 	zone_id_t zstart;
3004 
3005 	if (old_size > KFREE_ABSURD_SIZE) {
3006 		krealloc_size_invalid_panic(addr, old_size);
3007 	}
3008 
3009 	if (addr == NULL && new_size == 0) {
3010 		return (struct kalloc_result){ };
3011 	}
3012 
3013 	if (kt_is_var_view(kheap_or_kt_view)) {
3014 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
3015 		kheap   = kalloc_type_get_heap(kt_view, false);
3016 		/*
3017 		 * Similar to kalloc_ext: Use stats from view if present,
3018 		 * else use stats from kheap.
3019 		 *
3020 		 * krealloc_type isn't exposed to kexts, so we don't need to
3021 		 * handle cross frees and can rely on stats from view or kheap.
3022 		 */
3023 		zstats  = kt_view->kt_stats;
3024 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
3025 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
3026 	} else {
3027 		kt_view = NULL;
3028 		kheap   = kheap_or_kt_view;
3029 		kt_hash = kheap->kh_type_hash;
3030 		zstart  = kheap->kh_zstart;
3031 	}
3032 
3033 	if (!zstats) {
3034 		zstats = kheap->kh_stats;
3035 	}
3036 	/*
3037 	 * Find out the size of the bucket in which the new sized allocation
3038 	 * would land. If it matches the bucket of the original allocation,
3039 	 * simply return the same address.
3040 	 */
3041 	if (new_size == 0) {
3042 		new_z = ZONE_NULL;
3043 		new_bucket_size = new_size = 0;
3044 	} else {
3045 		zstart = kalloc_use_shared_heap(kheap, zstats, zstart, &flags);
3046 		new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3047 		new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3048 	}
3049 #if !KASAN_CLASSIC
3050 	if (flags & Z_FULLSIZE) {
3051 		new_size = new_bucket_size;
3052 	}
3053 #endif /* !KASAN_CLASSIC */
3054 
3055 	if (addr == NULL) {
3056 		old_z = ZONE_NULL;
3057 		old_size = old_bucket_size = 0;
3058 	} else if (kheap_size_from_zone(addr, old_size, flags)) {
3059 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3060 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3061 			kfree_size_confusion_panic(old_z, addr,
3062 			    oob_offs, old_size, old_bucket_size);
3063 		}
3064 		__builtin_assume(old_z != ZONE_NULL);
3065 	} else {
3066 		old_z = ZONE_NULL;
3067 		old_bucket_size = round_page(old_size);
3068 	}
3069 	min_size = MIN(old_size, new_size);
3070 
3071 	if (old_bucket_size == new_bucket_size && old_z) {
3072 		kr.addr = (char *)addr - oob_offs;
3073 		kr.size = new_size;
3074 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3075 		kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3076 		    new_size, new_bucket_size);
3077 		if (kr.addr != addr) {
3078 			memmove(kr.addr, addr, min_size);
3079 			bzero((char *)kr.addr + min_size,
3080 			    kr.size - min_size);
3081 		}
3082 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3083 #if KASAN_CLASSIC
3084 		kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3085 		kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3086 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3087 #endif /* KASAN_CLASSIC */
3088 #if KASAN_TBI
3089 		/*
3090 		 * Validate the current buffer, then generate a new tag,
3091 		 * even if the address is stable, it's a "new" allocation.
3092 		 */
3093 		__asan_loadN((vm_offset_t)addr, old_size);
3094 		kr.addr = (void *)kasan_tbi_tag_zalloc((vm_offset_t)kr.addr,
3095 		    new_bucket_size, kr.size, false);
3096 #endif /* KASAN_TBI */
3097 		goto out_success;
3098 	}
3099 
3100 #if !KASAN
3101 	/*
3102 	 * Fallthrough to krealloc_large() for KASAN,
3103 	 * because we can't use kasan_check_alloc()
3104 	 * on kalloc_large() memory.
3105 	 *
3106 	 * kmem_realloc_guard() will perform all the validations,
3107 	 * and re-tagging.
3108 	 */
3109 	if (old_bucket_size == new_bucket_size) {
3110 		kr.addr = (char *)addr - oob_offs;
3111 		kr.size = new_size;
3112 		goto out_success;
3113 	}
3114 #endif
3115 
3116 	if (addr && !old_z && new_size && !new_z) {
3117 		return krealloc_large(kheap, (vm_offset_t)addr,
3118 		           old_size, new_size, flags, kt_hash, owner);
3119 	}
3120 
3121 	if (!new_size) {
3122 		kr.addr = NULL;
3123 		kr.size = 0;
3124 	} else if (new_z) {
3125 		kr = kalloc_zone(new_z, zstats,
3126 		    flags & ~Z_KALLOC_ARRAY, new_size);
3127 	} else if (old_z || addr == NULL) {
3128 		kr = kalloc_large(kheap, new_size,
3129 		    flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3130 	}
3131 
3132 	if (addr && kr.addr) {
3133 		__nosan_memcpy(kr.addr, addr, min_size);
3134 	}
3135 
3136 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3137 		if (old_z) {
3138 			kfree_zone(kheap_or_kt_view,
3139 			    (char *)addr - oob_offs, old_size,
3140 			    old_z, old_bucket_size);
3141 		} else {
3142 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3143 		}
3144 	}
3145 
3146 	if (__improbable(kr.addr == NULL)) {
3147 		return kr;
3148 	}
3149 
3150 out_success:
3151 	if ((flags & Z_KALLOC_ARRAY) == 0) {
3152 		return kr;
3153 	}
3154 
3155 	if (new_z) {
3156 		kr.addr = __kalloc_array_encode_zone(new_z,
3157 		    kr.addr, kr.size);
3158 	} else {
3159 		kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3160 		    kr.size);
3161 	}
3162 	return kr;
3163 }
3164 
3165 void *
3166 krealloc_data_external(
3167 	void               *ptr,
3168 	vm_size_t           old_size,
3169 	vm_size_t           new_size,
3170 	zalloc_flags_t      flags);
3171 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3172 krealloc_data_external(
3173 	void               *ptr,
3174 	vm_size_t           old_size,
3175 	vm_size_t           new_size,
3176 	zalloc_flags_t      flags)
3177 {
3178 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3179 	return krealloc_ext(KHEAP_DATA_BUFFERS, ptr, old_size, new_size, flags, NULL).addr;
3180 }
3181 
3182 __startup_func
3183 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3184 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3185 {
3186 	kheap->kh_zstart      = parent_heap->kh_zstart;
3187 	kheap->kh_heap_id     = parent_heap->kh_heap_id;
3188 	kheap->kh_tag         = parent_heap->kh_tag;
3189 	kheap->kh_stats       = zalloc_percpu_permanent_type(struct zone_stats);
3190 	zone_view_count += 1;
3191 }
3192 
3193 __startup_func
3194 static void
kheap_init_data(kalloc_heap_t kheap)3195 kheap_init_data(kalloc_heap_t kheap)
3196 {
3197 	kheap_init(KHEAP_DATA_BUFFERS, kheap);
3198 	kheap->kh_views               = KHEAP_DATA_BUFFERS->kh_views;
3199 	KHEAP_DATA_BUFFERS->kh_views  = kheap;
3200 }
3201 
3202 __startup_func
3203 static void
kheap_init_var(kalloc_heap_t kheap)3204 kheap_init_var(kalloc_heap_t kheap)
3205 {
3206 	uint16_t idx;
3207 	struct kheap_info *parent_heap;
3208 
3209 	kheap_init(KHEAP_KT_VAR, kheap);
3210 	idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3211 	    KT_VAR__FIRST_FLEXIBLE_HEAP;
3212 	parent_heap = &kalloc_type_heap_array[idx];
3213 	kheap->kh_zstart = parent_heap->kh_zstart;
3214 	kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3215 		(uint32_t) early_random(), 0);
3216 	kheap->kh_views       = parent_heap->kh_views;
3217 	parent_heap->kh_views = kheap;
3218 }
3219 
3220 __startup_func
3221 void
kheap_startup_init(kalloc_heap_t kheap)3222 kheap_startup_init(kalloc_heap_t kheap)
3223 {
3224 	switch (kheap->kh_heap_id) {
3225 	case KHEAP_ID_DATA_BUFFERS:
3226 		kheap_init_data(kheap);
3227 		break;
3228 	case KHEAP_ID_KT_VAR:
3229 		kheap_init_var(kheap);
3230 		break;
3231 	default:
3232 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3233 		    kheap->kh_heap_id);
3234 	}
3235 }
3236 
3237 #pragma mark IOKit/libkern helpers
3238 
3239 #if XNU_PLATFORM_MacOSX
3240 
3241 void *
3242 kern_os_malloc_external(size_t size);
3243 void *
kern_os_malloc_external(size_t size)3244 kern_os_malloc_external(size_t size)
3245 {
3246 	if (size == 0) {
3247 		return NULL;
3248 	}
3249 
3250 	return kheap_alloc(KERN_OS_MALLOC, size,
3251 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3252 }
3253 
3254 void
3255 kern_os_free_external(void *addr);
3256 void
kern_os_free_external(void * addr)3257 kern_os_free_external(void *addr)
3258 {
3259 	kheap_free_addr(KERN_OS_MALLOC, addr);
3260 }
3261 
3262 void *
3263 kern_os_realloc_external(void *addr, size_t nsize);
3264 void *
kern_os_realloc_external(void * addr,size_t nsize)3265 kern_os_realloc_external(void *addr, size_t nsize)
3266 {
3267 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3268 	vm_size_t osize, oob_offs = 0;
3269 
3270 	if (addr == NULL) {
3271 		return kern_os_malloc_external(nsize);
3272 	}
3273 
3274 	osize = zone_element_size(addr, NULL, false, &oob_offs);
3275 	if (osize == 0) {
3276 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3277 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3278 #if KASAN_CLASSIC
3279 	} else {
3280 		osize = kasan_user_size((vm_offset_t)addr);
3281 #endif
3282 	}
3283 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3284 }
3285 
3286 #endif /* XNU_PLATFORM_MacOSX */
3287 
3288 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3289 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3290 {
3291 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3292 #pragma unused(size)
3293 	zfree(zone, addr);
3294 #else
3295 	if (zone_owns(zone, addr)) {
3296 		zfree(zone, addr);
3297 	} else {
3298 		/*
3299 		 * Third party kexts might not know about the operator new
3300 		 * and be allocated from the default heap
3301 		 */
3302 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3303 		    zone->z_name);
3304 		kheap_free(KHEAP_DEFAULT, addr, size);
3305 	}
3306 #endif
3307 }
3308 
3309 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3310 IOMallocType_from_vm(kalloc_type_view_t ktv)
3311 {
3312 	return kalloc_type_from_vm(ktv->kt_flags);
3313 }
3314 
3315 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3316 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3317 {
3318 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3319 #pragma unused(esize)
3320 #else
3321 	/*
3322 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3323 	 * an allocation of an OSObject that is defined in xnu or first pary
3324 	 * kexts, by directly calling new will lead to using the default heap
3325 	 * as it will call OSObject_operator_new_external. If this object
3326 	 * is freed by xnu, it panics as xnu uses the typed free which
3327 	 * requires the object to have been allocated in a kalloc.type zone.
3328 	 * To workaround this issue, detect if the allocation being freed is
3329 	 * from the default heap and allow freeing to it.
3330 	 */
3331 	zone_id_t zid = zone_id_for_element(addr, esize);
3332 	if (__probable(zid < MAX_ZONES)) {
3333 		zone_security_flags_t zsflags = zone_security_array[zid];
3334 		if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3335 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3336 		}
3337 	}
3338 #endif
3339 	kfree_type_impl_external(ktv, addr);
3340 }
3341 
3342 #pragma mark tests
3343 #if DEBUG || DEVELOPMENT
3344 
3345 #include <sys/random.h>
3346 
3347 /*
3348  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3349  *
3350  * Note: Presence of zones with name kalloc.type* is used to
3351  * determine if the feature is on.
3352  */
3353 static int
kalloc_type_feature_on(void)3354 kalloc_type_feature_on(void)
3355 {
3356 	boolean_t zone_found = false;
3357 	const char kalloc_type_str[] = "kalloc.type";
3358 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3359 		zone_t z = kalloc_type_zarray[i];
3360 		while (z != NULL) {
3361 			zone_found = true;
3362 			if (strncmp(z->z_name, kalloc_type_str,
3363 			    strlen(kalloc_type_str)) != 0) {
3364 				return 0;
3365 			}
3366 			z = z->z_kt_next;
3367 		}
3368 	}
3369 
3370 	if (!zone_found) {
3371 		return 0;
3372 	}
3373 
3374 	return 1;
3375 }
3376 
3377 /*
3378  * Ensure that the policy uses the zone budget completely
3379  */
3380 static int
kalloc_type_test_policy(int64_t in)3381 kalloc_type_test_policy(int64_t in)
3382 {
3383 	uint16_t zone_budget = (uint16_t) in;
3384 	uint16_t max_bucket_freq = 25;
3385 	uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3386 	uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3387 	uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3388 	uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3389 	uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3390 	uint16_t wasted_zone_budget = 0, total_types = 0;
3391 	uint16_t n_zones = 0, n_zones_cal = 0;
3392 	int ret = 0;
3393 
3394 	/*
3395 	 * Need a minimum of 2 zones per size class
3396 	 */
3397 	if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3398 		return ret;
3399 	}
3400 	read_random((void *)&random[0], sizeof(random));
3401 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3402 		uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3403 		uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3404 
3405 		freq_list[i] = r1 > r2 ? r2 : r1;
3406 		freq_total_list[i] = r1 > r2 ? r1 : r2;
3407 	}
3408 	wasted_zone_budget = kalloc_type_apply_policy(
3409 		freq_list, freq_total_list,
3410 		zones_per_sig, zones_per_type, zone_budget);
3411 
3412 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3413 		total_types += freq_total_list[i];
3414 	}
3415 
3416 	n_zones = kmem_get_random16(total_types);
3417 	printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3418 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3419 		uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3420 		    freq_total_list[i], total_types,
3421 		    (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3422 
3423 		n_zones_cal += n_zones_for_type;
3424 
3425 		printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3426 	}
3427 	printf("-----------------------\n%u\t%u\n", total_types,
3428 	    n_zones_cal);
3429 
3430 	if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3431 		ret = 1;
3432 	}
3433 	return ret;
3434 }
3435 
3436 /*
3437  * Ensure that size of adopters of kalloc_type fit in the zone
3438  * they have been assigned.
3439  */
3440 static int
kalloc_type_check_size(zone_t z)3441 kalloc_type_check_size(zone_t z)
3442 {
3443 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3444 
3445 	while (kt_cur != NULL) {
3446 		if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3447 			return 0;
3448 		}
3449 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3450 	}
3451 
3452 	return 1;
3453 }
3454 
3455 struct test_kt_data {
3456 	int a;
3457 };
3458 
3459 static int
kalloc_type_test_data_redirect(void)3460 kalloc_type_test_data_redirect(void)
3461 {
3462 	struct kalloc_type_view ktv_data = {
3463 		.kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3464 		.kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3465 	};
3466 	if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3467 		printf("%s: data redirect failed\n", __func__);
3468 		return 0;
3469 	}
3470 	return 1;
3471 }
3472 
3473 static int
run_kalloc_type_test(int64_t in,int64_t * out)3474 run_kalloc_type_test(int64_t in, int64_t *out)
3475 {
3476 	*out = 0;
3477 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3478 		zone_t z = kalloc_type_zarray[i];
3479 		while (z != NULL) {
3480 			if (!kalloc_type_check_size(z)) {
3481 				printf("%s: size check failed\n", __func__);
3482 				return 0;
3483 			}
3484 			z = z->z_kt_next;
3485 		}
3486 	}
3487 
3488 	if (!kalloc_type_test_policy(in)) {
3489 		printf("%s: policy check failed\n", __func__);
3490 		return 0;
3491 	}
3492 
3493 	if (!kalloc_type_feature_on()) {
3494 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3495 		return 0;
3496 	}
3497 
3498 	if (!kalloc_type_test_data_redirect()) {
3499 		printf("%s: kalloc_type redirect for all data signature failed\n",
3500 		    __func__);
3501 		return 0;
3502 	}
3503 
3504 	printf("%s: test passed\n", __func__);
3505 
3506 	*out = 1;
3507 	return 0;
3508 }
3509 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3510 
3511 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3512 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3513 {
3514 	zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3515 
3516 	return z ? zone_elem_inner_size(z) : round_page(size);
3517 }
3518 
3519 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3520 run_kalloc_test(int64_t in __unused, int64_t *out)
3521 {
3522 	*out = 0;
3523 	uint64_t *data_ptr;
3524 	void *strippedp_old, *strippedp_new;
3525 	size_t alloc_size = 0, old_alloc_size = 0;
3526 	struct kalloc_result kr = {};
3527 
3528 	printf("%s: test running\n", __func__);
3529 
3530 	/*
3531 	 * Test size 0: alloc, free, realloc
3532 	 */
3533 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3534 	    NULL).addr;
3535 	if (!data_ptr) {
3536 		printf("%s: kalloc 0 returned null\n", __func__);
3537 		return 0;
3538 	}
3539 	kheap_free(KHEAP_DATA_BUFFERS, data_ptr, alloc_size);
3540 
3541 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3542 	    NULL).addr;
3543 	alloc_size = sizeof(uint64_t) + 1;
3544 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, kr.addr, old_alloc_size,
3545 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3546 	if (!data_ptr) {
3547 		printf("%s: krealloc -> old size 0 failed\n", __func__);
3548 		return 0;
3549 	}
3550 	*data_ptr = 0;
3551 
3552 	/*
3553 	 * Test krealloc: same sizeclass, different size classes, 2pgs,
3554 	 * VM (with owner)
3555 	 */
3556 	old_alloc_size = alloc_size;
3557 	alloc_size++;
3558 	kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3559 	    Z_WAITOK | Z_NOFAIL, NULL);
3560 
3561 #if CONFIG_KERNEL_TBI
3562 	strippedp_old = VM_KERNEL_TBI_FILL(data_ptr);
3563 	strippedp_new = VM_KERNEL_TBI_FILL(kr.addr);
3564 #else /* CONFIG_KERNEL_TBI */
3565 	strippedp_old = data_ptr;
3566 	strippedp_new = kr.addr;
3567 #endif /* !CONFIG_KERNEL_TBI */
3568 
3569 	if (!kr.addr || (strippedp_old != strippedp_new) ||
3570 	    (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) !=
3571 	    test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3572 		printf("%s: krealloc -> same size class failed\n", __func__);
3573 		return 0;
3574 	}
3575 	data_ptr = kr.addr;
3576 	*data_ptr = 0;
3577 
3578 	old_alloc_size = alloc_size;
3579 	alloc_size *= 2;
3580 	kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3581 	    Z_WAITOK | Z_NOFAIL, NULL);
3582 
3583 #if CONFIG_KERNEL_TBI
3584 	strippedp_old = VM_KERNEL_TBI_FILL(data_ptr);
3585 	strippedp_new = VM_KERNEL_TBI_FILL(kr.addr);
3586 #else /* CONFIG_KERNEL_TBI */
3587 	strippedp_old = data_ptr;
3588 	strippedp_new = kr.addr;
3589 #endif /* !CONFIG_KERNEL_TBI */
3590 
3591 	if (!kr.addr || (strippedp_old == strippedp_new) ||
3592 	    (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) ==
3593 	    test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3594 		printf("%s: krealloc -> different size class failed\n", __func__);
3595 		return 0;
3596 	}
3597 	data_ptr = kr.addr;
3598 	*data_ptr = 0;
3599 
3600 	kheap_free(KHEAP_DATA_BUFFERS, kr.addr, alloc_size);
3601 
3602 	alloc_size = 3544;
3603 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size,
3604 	    Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3605 	if (!data_ptr) {
3606 		printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3607 		    __func__);
3608 		return 0;
3609 	}
3610 	*data_ptr = 0;
3611 
3612 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, alloc_size,
3613 	    PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3614 	if (!data_ptr) {
3615 		printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3616 		return 0;
3617 	}
3618 	*data_ptr = 0;
3619 
3620 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, PAGE_SIZE * 2,
3621 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3622 	if (!data_ptr) {
3623 		printf("%s: krealloc -> VM1 returned not null\n", __func__);
3624 		return 0;
3625 	}
3626 	*data_ptr = 0;
3627 
3628 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 2,
3629 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3630 	*data_ptr = 0;
3631 	if (!data_ptr) {
3632 		printf("%s: krealloc -> VM2 returned not null\n", __func__);
3633 		return 0;
3634 	}
3635 
3636 	krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 4,
3637 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3638 
3639 	printf("%s: test passed\n", __func__);
3640 	*out = 1;
3641 	return 0;
3642 }
3643 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3644 
3645 #endif
3646