xref: /xnu-10002.61.3/osfmk/kern/kalloc.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83 
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88 
89 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
90 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
91 
92 #pragma mark initialization
93 
94 /*
95  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
96  * sized zone.  This allocator is built on top of the zone allocator.  A zone
97  * is created for each potential size that we are willing to get in small
98  * blocks.
99  *
100  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
101  */
102 
103 /*
104  * The kt_zone_cfg table defines the configuration of zones on various
105  * platforms for kalloc_type fixed size allocations.
106  */
107 
108 #if KASAN_CLASSIC
109 #define K_SIZE_CLASS(size)    \
110 	(((size) & PAGE_MASK) == 0 ? (size) : \
111 	((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
112 #else
113 #define K_SIZE_CLASS(size)    (size)
114 #endif
115 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
116 
117 static const uint16_t kt_zone_cfg[] = {
118 	K_SIZE_CLASS(16),
119 	K_SIZE_CLASS(32),
120 	K_SIZE_CLASS(48),
121 	K_SIZE_CLASS(64),
122 	K_SIZE_CLASS(80),
123 	K_SIZE_CLASS(96),
124 	K_SIZE_CLASS(128),
125 	K_SIZE_CLASS(160),
126 	K_SIZE_CLASS(192),
127 	K_SIZE_CLASS(224),
128 	K_SIZE_CLASS(256),
129 	K_SIZE_CLASS(288),
130 	K_SIZE_CLASS(368),
131 	K_SIZE_CLASS(400),
132 	K_SIZE_CLASS(512),
133 	K_SIZE_CLASS(576),
134 	K_SIZE_CLASS(768),
135 	K_SIZE_CLASS(1024),
136 	K_SIZE_CLASS(1152),
137 	K_SIZE_CLASS(1280),
138 	K_SIZE_CLASS(1664),
139 	K_SIZE_CLASS(2048),
140 	K_SIZE_CLASS(4096),
141 	K_SIZE_CLASS(6144),
142 	K_SIZE_CLASS(8192),
143 	K_SIZE_CLASS(12288),
144 	K_SIZE_CLASS(16384),
145 #if __arm64__
146 	K_SIZE_CLASS(24576),
147 	K_SIZE_CLASS(32768),
148 #endif /* __arm64__ */
149 };
150 
151 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
152 
153 /*
154  * kalloc_type callsites are assigned a zone during early boot. They
155  * use the dlut[] (direct lookup table), indexed by size normalized
156  * to the minimum alignment to find the right zone index quickly.
157  */
158 #define INDEX_ZDLUT(size)       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
159 #define KALLOC_DLUT_SIZE        (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
160 #define MAX_SIZE_ZDLUT          ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
161 static __startup_data uint8_t   kalloc_type_dlut[KALLOC_DLUT_SIZE];
162 static __startup_data uint32_t  kheap_zsize[KHEAP_NUM_ZONES];
163 
164 #if VM_TAG_SIZECLASSES
165 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
166 #endif
167 
168 const char * const kalloc_heap_names[] = {
169 	[KHEAP_ID_NONE]          = "",
170 	[KHEAP_ID_SHARED]        = "shared.",
171 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
172 	[KHEAP_ID_KT_VAR]        = "",
173 };
174 
175 /*
176  * Shared heap configuration
177  */
178 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_SHARED[1] = {
179 	{
180 		.kh_name     = "shared.kalloc",
181 		.kh_heap_id  = KHEAP_ID_SHARED,
182 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE,
183 	}
184 };
185 
186 /*
187  * Bag of bytes heap configuration
188  */
189 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
190 	{
191 		.kh_name     = "data.kalloc",
192 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
193 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
194 	}
195 };
196 
197 /*
198  * Configuration of variable kalloc type heaps
199  */
200 SECURITY_READ_ONLY_LATE(struct kheap_info)
201 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
202 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
203 	{
204 		.kh_name     = "kalloc.type.var",
205 		.kh_heap_id  = KHEAP_ID_KT_VAR,
206 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
207 	}
208 };
209 
210 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
211 
212 __startup_func
213 static void
kalloc_zsize_compute(void)214 kalloc_zsize_compute(void)
215 {
216 	uint32_t step = KHEAP_STEP_START;
217 	uint32_t size = KHEAP_START_SIZE;
218 
219 	/*
220 	 * Manually initialize extra initial zones
221 	 */
222 	kheap_zsize[0] = size / 2;
223 	kheap_zsize[1] = size;
224 	static_assert(KHEAP_EXTRA_ZONES == 2);
225 
226 	/*
227 	 * Compute sizes for remaining zones
228 	 */
229 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
230 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
231 
232 		kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
233 		kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
234 
235 		step *= 2;
236 		size += step;
237 	}
238 }
239 
240 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)241 kalloc_zone_for_size_with_flags(
242 	zone_id_t               zid,
243 	vm_size_t               size,
244 	zalloc_flags_t          flags)
245 {
246 	vm_size_t max_size = KHEAP_MAX_SIZE;
247 	bool forcopyin = flags & Z_MAY_COPYINMAP;
248 	zone_t zone;
249 
250 	if (flags & Z_KALLOC_ARRAY) {
251 		size = roundup(size, KALLOC_ARRAY_GRANULE);
252 	}
253 
254 	if (forcopyin) {
255 #if __x86_64__
256 		/*
257 		 * On Intel, the OSData() ABI used to allocate
258 		 * from the kernel map starting at PAGE_SIZE.
259 		 *
260 		 * If only vm_map_copyin() or a wrapper is used,
261 		 * then everything will work fine because vm_map_copy_t
262 		 * will perform an actual copy if the data is smaller
263 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
264 		 *
265 		 * However, if anyone is trying to call mach_vm_remap(),
266 		 * then bad things (TM) happen.
267 		 *
268 		 * Avoid this by preserving the ABI and moving
269 		 * to kalloc_large() earlier.
270 		 *
271 		 * Any recent code really ought to use IOMemoryDescriptor
272 		 * for this purpose however.
273 		 */
274 		max_size = PAGE_SIZE - 1;
275 #endif
276 	}
277 
278 	if (size <= max_size) {
279 		uint32_t idx;
280 
281 		if (size <= KHEAP_START_SIZE) {
282 			zid  += (size > 16);
283 		} else {
284 			/*
285 			 * . log2down(size - 1) is log2up(size) - 1
286 			 * . (size - 1) >> (log2down(size - 1) - 1)
287 			 *   is either 0x2 or 0x3
288 			 */
289 			idx   = kalloc_log2down((uint32_t)(size - 1));
290 			zid  += KHEAP_EXTRA_ZONES +
291 			    2 * (idx - KHEAP_START_IDX) +
292 			    ((uint32_t)(size - 1) >> (idx - 1)) - 2;
293 		}
294 
295 		zone = zone_by_id(zid);
296 #if KASAN_CLASSIC
297 		/*
298 		 * Under kasan classic, certain size classes are a redzone
299 		 * away from the mathematical formula above, and we need
300 		 * to "go to the next zone".
301 		 *
302 		 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
303 		 * this will never go to an "invalid" zone that doesn't
304 		 * belong to the kheap.
305 		 */
306 		if (size > zone_elem_inner_size(zone)) {
307 			zone++;
308 		}
309 #endif
310 		return zone;
311 	}
312 
313 	return ZONE_NULL;
314 }
315 
316 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)317 kalloc_zone_for_size(zone_id_t zid, size_t size)
318 {
319 	return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
320 }
321 
322 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)323 kheap_size_from_zone(
324 	void                   *addr,
325 	vm_size_t               size,
326 	zalloc_flags_t          flags)
327 {
328 	vm_size_t max_size = KHEAP_MAX_SIZE;
329 	bool forcopyin = flags & Z_MAY_COPYINMAP;
330 
331 #if __x86_64__
332 	/*
333 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
334 	 * behavior, then the element could have a PAGE_SIZE reported size,
335 	 * yet still be from a zone for Z_MAY_COPYINMAP.
336 	 */
337 	if (forcopyin) {
338 		if (size == PAGE_SIZE &&
339 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
340 			return true;
341 		}
342 
343 		max_size = PAGE_SIZE - 1;
344 	}
345 #else
346 #pragma unused(addr, forcopyin)
347 #endif
348 
349 	return size <= max_size;
350 }
351 
352 /*
353  * All data zones shouldn't use shared zone. Therefore set the no share
354  * bit right after creation.
355  */
356 __startup_func
357 static void
kalloc_set_no_share_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)358 kalloc_set_no_share_for_data(
359 	zone_kheap_id_t       kheap_id,
360 	zone_stats_t          zstats)
361 {
362 	if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
363 		zpercpu_foreach(zs, zstats) {
364 			os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
365 		}
366 	}
367 }
368 
369 __startup_func
370 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)371 kalloc_zone_init(
372 	const char           *kheap_name,
373 	zone_kheap_id_t       kheap_id,
374 	zone_id_t            *kheap_zstart,
375 	zone_create_flags_t   zc_flags)
376 {
377 	zc_flags |= ZC_PGZ_USE_GUARDS;
378 
379 	for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
380 		uint32_t size = kheap_zsize[i];
381 		char buf[MAX_ZONE_NAME], *z_name;
382 		int len;
383 
384 		len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
385 		z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
386 		strlcpy(z_name, buf, len + 1);
387 
388 		(void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
389 #if __arm64e__ || CONFIG_KERNEL_TAGGING
390 			uint32_t scale = kalloc_log2down(size / 32);
391 
392 			if (size == 32 << scale) {
393 			        z->z_array_size_class = scale;
394 			} else {
395 			        z->z_array_size_class = scale | 0x10;
396 			}
397 #endif
398 			zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
399 			if (i == 0) {
400 			        *kheap_zstart = zone_index(z);
401 			}
402 			kalloc_set_no_share_for_data(kheap_id, z->z_stats);
403 		});
404 	}
405 }
406 
407 __startup_func
408 static void
kalloc_heap_init(struct kalloc_heap * kheap)409 kalloc_heap_init(struct kalloc_heap *kheap)
410 {
411 	kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
412 	    ZC_NONE);
413 	/*
414 	 * Count all the "raw" views for zones in the heap.
415 	 */
416 	zone_view_count += KHEAP_NUM_ZONES;
417 }
418 
419 #define KEXT_ALIGN_SHIFT           6
420 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
421 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
422 #define kt_scratch_size            (256ul << 10)
423 #define KALLOC_TYPE_SECTION(type) \
424 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
425 
426 /*
427  * Enum to specify the kalloc_type variant being used.
428  */
429 __options_decl(kalloc_type_variant_t, uint16_t, {
430 	KTV_FIXED     = 0x0001,
431 	KTV_VAR       = 0x0002,
432 });
433 
434 /*
435  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
436  * variable) of the desired variable/function.
437  */
438 #define kalloc_type_var(type, var)              \
439 	((type) == KTV_FIXED?                       \
440 	(vm_offset_t) kalloc_type_##var##_fixed:    \
441 	(vm_offset_t) kalloc_type_##var##_var)
442 #define kalloc_type_func(type, func, ...)       \
443 	((type) == KTV_FIXED?                       \
444 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
445 	kalloc_type_##func##_var(__VA_ARGS__))
446 
447 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
448 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
449     ZSECURITY_CONFIG_KT_VAR_BUDGET);
450 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
451     ZSECURITY_CONFIG_KT_BUDGET);
452 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
453 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
454 
455 /*
456  * Section start/end for fixed kalloc_type views
457  */
458 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
459 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
460 
461 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
462 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
463 
464 /*
465  * Section start/end for variable kalloc_type views
466  */
467 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
468 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
469 
470 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
471 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
472 
473 __startup_data
474 static kalloc_type_views_t *kt_buffer = NULL;
475 __startup_data
476 static uint64_t kt_count;
477 __startup_data
478 uint32_t kalloc_type_hash_seed;
479 
480 __startup_data
481 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
482 __startup_data
483 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
484 
485 struct nzones_with_idx {
486 	uint16_t nzones;
487 	uint16_t idx;
488 };
489 int16_t zone_carry = 0;
490 
491 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
492     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
493 
494 /*
495  * For use by lldb to iterate over kalloc types
496  */
497 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
498 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
499 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
500 
501 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
502 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
503     KMEM_DIRECTION_MASK),
504     "Insufficient bits to represent range and dir for VM allocations");
505 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
506     "validate idx mask");
507 /* qsort routines */
508 typedef int (*cmpfunc_t)(const void *a, const void *b);
509 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
510 
511 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)512 kalloc_type_get_idx(uint32_t kt_size)
513 {
514 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
515 }
516 
517 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)518 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
519 {
520 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
521 }
522 
523 static void
kalloc_type_build_dlut(void)524 kalloc_type_build_dlut(void)
525 {
526 	vm_size_t size = 0;
527 	for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
528 		uint8_t zindex = 0;
529 		while (kt_zone_cfg[zindex] < size) {
530 			zindex++;
531 		}
532 		kalloc_type_dlut[i] = zindex;
533 	}
534 }
535 
536 static uint32_t
kalloc_type_idx_for_size(uint32_t size)537 kalloc_type_idx_for_size(uint32_t size)
538 {
539 	assert(size <= KHEAP_MAX_SIZE);
540 	uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
541 	return kalloc_type_set_idx(size, idx);
542 }
543 
544 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t shared_zone)545 kalloc_type_assign_zone_fixed(
546 	kalloc_type_view_t     *cur,
547 	kalloc_type_view_t     *end,
548 	zone_t                  z,
549 	zone_t                  sig_zone,
550 	zone_t                  shared_zone)
551 {
552 	/*
553 	 * Assign the zone created for every kalloc_type_view
554 	 * of the same unique signature
555 	 */
556 	bool need_raw_view = false;
557 
558 	while (cur < end) {
559 		kalloc_type_view_t kt = *cur;
560 		struct zone_view *zv = &kt->kt_zv;
561 		zv->zv_zone = z;
562 		kalloc_type_flags_t kt_flags = kt->kt_flags;
563 		zone_security_flags_t zsflags = zone_security_config(z);
564 
565 		assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
566 		if (!shared_zone) {
567 			assert(zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS);
568 		}
569 
570 		if (kt_flags & KT_SLID) {
571 			kt->kt_signature -= vm_kernel_slide;
572 			kt->kt_zv.zv_name -= vm_kernel_slide;
573 		}
574 
575 		if ((kt_flags & KT_PRIV_ACCT) ||
576 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
577 			zv->zv_stats = zalloc_percpu_permanent_type(
578 				struct zone_stats);
579 			need_raw_view = true;
580 			zone_view_count += 1;
581 		} else {
582 			zv->zv_stats = z->z_stats;
583 		}
584 
585 		if ((kt_flags & KT_NOSHARED) || !shared_zone) {
586 			if ((kt_flags & KT_NOSHARED) && !(kt_flags & KT_PRIV_ACCT)) {
587 				panic("KT_NOSHARED used w/o private accounting for view %s",
588 				    zv->zv_name);
589 			}
590 
591 			zpercpu_foreach(zs, zv->zv_stats) {
592 				os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
593 			}
594 		}
595 
596 		if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
597 			kt->kt_zshared = shared_zone;
598 			kt->kt_zsig = sig_zone;
599 			/*
600 			 * If we haven't yet set the signature equivalance then set it
601 			 * otherwise validate that the zone has the same signature equivalance
602 			 * as the sig_zone provided
603 			 */
604 			if (!zone_get_sig_eq(z)) {
605 				zone_set_sig_eq(z, zone_index(sig_zone));
606 			} else {
607 				assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
608 			}
609 		}
610 		zv->zv_next = (zone_view_t) z->z_views;
611 		zv->zv_zone->z_views = (zone_view_t) kt;
612 		cur++;
613 	}
614 	if (need_raw_view) {
615 		zone_view_count += 1;
616 	}
617 }
618 
619 __startup_func
620 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)621 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
622     kalloc_type_var_view_t *end, uint32_t heap_idx)
623 {
624 	struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
625 	while (cur < end) {
626 		kalloc_type_var_view_t kt = *cur;
627 		kt->kt_heap_start = cfg->kh_zstart;
628 		kalloc_type_flags_t kt_flags = kt->kt_flags;
629 
630 		if (kt_flags & KT_SLID) {
631 			if (kt->kt_sig_hdr) {
632 				kt->kt_sig_hdr -= vm_kernel_slide;
633 			}
634 			kt->kt_sig_type -= vm_kernel_slide;
635 			kt->kt_name -= vm_kernel_slide;
636 		}
637 
638 		if ((kt_flags & KT_PRIV_ACCT) ||
639 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
640 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
641 			zone_view_count += 1;
642 		}
643 
644 		kt->kt_next = (zone_view_t) cfg->kt_views;
645 		cfg->kt_views = kt;
646 		cur++;
647 	}
648 }
649 
650 __startup_func
651 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)652 kalloc_type_slide_fixed(vm_offset_t addr)
653 {
654 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
655 	ktv->kt_signature += vm_kernel_slide;
656 	ktv->kt_zv.zv_name += vm_kernel_slide;
657 	ktv->kt_flags |= KT_SLID;
658 }
659 
660 __startup_func
661 static inline void
kalloc_type_slide_var(vm_offset_t addr)662 kalloc_type_slide_var(vm_offset_t addr)
663 {
664 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
665 	if (ktv->kt_sig_hdr) {
666 		ktv->kt_sig_hdr += vm_kernel_slide;
667 	}
668 	ktv->kt_sig_type += vm_kernel_slide;
669 	ktv->kt_name += vm_kernel_slide;
670 	ktv->kt_flags |= KT_SLID;
671 }
672 
673 __startup_func
674 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)675 kalloc_type_validate_flags(
676 	kalloc_type_flags_t   kt_flags,
677 	const char           *kt_name,
678 	uuid_string_t         kext_uuid)
679 {
680 	if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
681 		panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
682 		    "required xnu headers", kt_name, kext_uuid);
683 	}
684 }
685 
686 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)687 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
688 {
689 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
690 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
691 	return ktv->kt_flags;
692 }
693 
694 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)695 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
696 {
697 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
698 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
699 	return ktv->kt_flags;
700 }
701 
702 /*
703  * Check if signature of type is made up of only data and padding
704  */
705 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)706 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
707 {
708 	assert(kt_flags & KT_CHANGED);
709 	return kt_flags & KT_DATA_ONLY;
710 }
711 
712 /*
713  * Check if signature of type is made up of only pointers
714  */
715 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)716 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
717 {
718 	assert(kt_flags & KT_CHANGED2);
719 	return kt_flags & KT_PTR_ARRAY;
720 }
721 
722 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)723 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
724 {
725 	assert(kt_flags & KT_CHANGED);
726 	return kt_flags & KT_VM;
727 }
728 
729 __startup_func
730 static inline vm_size_t
kalloc_type_view_sz_fixed(void)731 kalloc_type_view_sz_fixed(void)
732 {
733 	return sizeof(struct kalloc_type_view);
734 }
735 
736 __startup_func
737 static inline vm_size_t
kalloc_type_view_sz_var(void)738 kalloc_type_view_sz_var(void)
739 {
740 	return sizeof(struct kalloc_type_var_view);
741 }
742 
743 __startup_func
744 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)745 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
746     vm_offset_t end)
747 {
748 	return (end - start) / kalloc_type_func(type, view_sz);
749 }
750 
751 __startup_func
752 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)753 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
754 {
755 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
756 }
757 
758 __startup_func
759 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)760 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
761 {
762 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
763 }
764 
765 __startup_func
766 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)767 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
768 {
769 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
770 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
771 	    cur_data_view->kt_size);
772 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
773 	    NULL);
774 }
775 
776 __startup_func
777 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)778 kalloc_type_handle_data_view_var(vm_offset_t addr)
779 {
780 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
781 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
782 }
783 
784 __startup_func
785 static uint32_t
kalloc_type_handle_parray_var(void)786 kalloc_type_handle_parray_var(void)
787 {
788 	uint32_t i = 0;
789 	kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
790 	const char *p_name = kt->kt_name;
791 
792 	/*
793 	 * The sorted list of variable kalloc_type_view has pointer arrays at the
794 	 * beginning. Walk through them and assign a random pointer heap to each
795 	 * type detected by typename.
796 	 */
797 	while (kalloc_type_is_ptr_array(kt->kt_flags)) {
798 		uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
799 		const char *c_name = kt->kt_name;
800 		uint32_t p_i = i;
801 
802 		while (strcmp(c_name, p_name) == 0) {
803 			i++;
804 			kt = kt_buffer[i].ktv_var;
805 			c_name = kt->kt_name;
806 		}
807 		p_name = c_name;
808 		kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
809 		    &kt_buffer[i].ktv_var, heap_id);
810 	}
811 
812 	/*
813 	 * Returns the the index of the first view that isn't a pointer array
814 	 */
815 	return i;
816 }
817 
818 __startup_func
819 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)820 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
821 {
822 	/*
823 	 * Limit range_id to ptr ranges
824 	 */
825 	uint32_t range_id = kmem_adjust_range_id(hash);
826 	uint32_t direction = hash & 0x8000;
827 	return (range_id | KMEM_HASH_SET | direction) << shift;
828 }
829 
830 __startup_func
831 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)832 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
833     kalloc_type_flags_t *kt_flags)
834 {
835 	uint32_t hash = 0;
836 
837 	assert(sig_ty != NULL);
838 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
839 	    kalloc_type_hash_seed);
840 	if (sig_hdr) {
841 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
842 	}
843 	os_hash_jenkins_finish(hash);
844 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
845 
846 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
847 }
848 
849 __startup_func
850 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)851 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
852 {
853 	/*
854 	 * Use backtraces on fixed as we don't have signatures for types that go
855 	 * to the VM due to rdar://85182551.
856 	 */
857 	(void) addr;
858 }
859 
860 __startup_func
861 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)862 kalloc_type_set_type_hash_var(vm_offset_t addr)
863 {
864 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
865 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
866 	    &ktv->kt_flags);
867 }
868 
869 __startup_func
870 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)871 kalloc_type_mark_processed_fixed(vm_offset_t addr)
872 {
873 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
874 	ktv->kt_flags |= KT_PROCESSED;
875 }
876 
877 __startup_func
878 static void
kalloc_type_mark_processed_var(vm_offset_t addr)879 kalloc_type_mark_processed_var(vm_offset_t addr)
880 {
881 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
882 	ktv->kt_flags |= KT_PROCESSED;
883 }
884 
885 __startup_func
886 static void
kalloc_type_update_view_fixed(vm_offset_t addr)887 kalloc_type_update_view_fixed(vm_offset_t addr)
888 {
889 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
890 	ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
891 }
892 
893 __startup_func
894 static void
kalloc_type_update_view_var(vm_offset_t addr)895 kalloc_type_update_view_var(vm_offset_t addr)
896 {
897 	(void) addr;
898 }
899 
900 __startup_func
901 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)902 kalloc_type_view_copy(
903 	const kalloc_type_variant_t   type,
904 	vm_offset_t                   start,
905 	vm_offset_t                   end,
906 	uint64_t                     *cur_count,
907 	bool                          slide,
908 	uuid_string_t                 kext_uuid)
909 {
910 	uint64_t count = kalloc_type_view_count(type, start, end);
911 	if (count + *cur_count >= kt_count) {
912 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
913 	}
914 	vm_offset_t cur = start;
915 	while (cur < end) {
916 		if (slide) {
917 			kalloc_type_func(type, slide, cur);
918 		}
919 		kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
920 		    kext_uuid);
921 		kalloc_type_func(type, mark_processed, cur);
922 		/*
923 		 * Skip views that go to the VM
924 		 */
925 		if (kalloc_type_from_vm(kt_flags)) {
926 			cur += kalloc_type_func(type, view_sz);
927 			continue;
928 		}
929 
930 		/*
931 		 * If signature indicates that the entire allocation is data move it to
932 		 * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap,
933 		 * variable kalloc_type handles the actual redirection in the entry points
934 		 * kalloc/kfree_type_var_impl.
935 		 */
936 		if (kalloc_type_is_data(kt_flags)) {
937 			kalloc_type_func(type, handle_data_view, cur);
938 			cur += kalloc_type_func(type, view_sz);
939 			continue;
940 		}
941 
942 		/*
943 		 * Set type hash that is used by kmem_*_guard
944 		 */
945 		kalloc_type_func(type, set_type_hash, cur);
946 		kalloc_type_func(type, update_view, cur);
947 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
948 		cur += kalloc_type_func(type, view_sz);
949 		*cur_count = *cur_count + 1;
950 	}
951 }
952 
953 __startup_func
954 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)955 kalloc_type_view_parse(const kalloc_type_variant_t type)
956 {
957 	kc_format_t kc_format;
958 	uint64_t cur_count = 0;
959 
960 	if (!PE_get_primary_kc_format(&kc_format)) {
961 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
962 	}
963 
964 	if (kc_format == KCFormatStatic) {
965 		/*
966 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
967 		 * xnu are coalesced.
968 		 */
969 		kalloc_type_view_copy(type,
970 		    kalloc_type_var(type, sec_start),
971 		    kalloc_type_var(type, sec_end),
972 		    &cur_count, false, NULL);
973 	} else if (kc_format == KCFormatFileset) {
974 		/*
975 		 * If kc uses filesets, traverse __kalloc_type section for each
976 		 * macho in the BootKC.
977 		 */
978 		kernel_mach_header_t *kc_mh = NULL;
979 		kernel_mach_header_t *kext_mh = NULL;
980 
981 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
982 		struct load_command *lc =
983 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
984 		for (uint32_t i = 0; i < kc_mh->ncmds;
985 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
986 			if (lc->cmd != LC_FILESET_ENTRY) {
987 				continue;
988 			}
989 			struct fileset_entry_command *fse =
990 			    (struct fileset_entry_command *)(vm_offset_t)lc;
991 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
992 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
993 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
994 			if (sect != NULL) {
995 				unsigned long uuidlen = 0;
996 				void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
997 				uuid_string_t kext_uuid_str;
998 				if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
999 					uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1000 				}
1001 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1002 				    &cur_count, false, kext_uuid_str);
1003 			}
1004 		}
1005 	} else if (kc_format == KCFormatKCGEN) {
1006 		/*
1007 		 * Parse __kalloc_type section from xnu
1008 		 */
1009 		kalloc_type_view_copy(type,
1010 		    kalloc_type_var(type, sec_start),
1011 		    kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1012 
1013 		/*
1014 		 * Parse __kalloc_type section for kexts
1015 		 *
1016 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1017 		 * as this platform has insufficient memory for type based
1018 		 * segregation. kalloc_type_impl_external will direct callsites
1019 		 * based on their size.
1020 		 */
1021 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1022 		vm_offset_t cur = 0;
1023 		vm_offset_t end = 0;
1024 
1025 		/*
1026 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1027 		 * and traverse it.
1028 		 */
1029 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1030 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1031 		assert(prelink_sect);
1032 		cur = prelink_sect->addr;
1033 		end = prelink_sect->addr + prelink_sect->size;
1034 
1035 		while (cur < end) {
1036 			uint64_t kext_text_sz = 0;
1037 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1038 
1039 			if (kext_mh->magic == 0) {
1040 				/*
1041 				 * Assert that we have processed all kexts and all that is left
1042 				 * is padding
1043 				 */
1044 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1045 				break;
1046 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1047 			    kext_mh->magic != MH_CIGAM_64) {
1048 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1049 				    cur);
1050 			}
1051 
1052 			/*
1053 			 * Kext macho found, iterate through its segments
1054 			 */
1055 			struct load_command *lc =
1056 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1057 			bool isSplitKext = false;
1058 
1059 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1060 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1061 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1062 					isSplitKext = true;
1063 					continue;
1064 				} else if (lc->cmd != LC_SEGMENT_64) {
1065 					continue;
1066 				}
1067 
1068 				kernel_segment_command_t *seg_cmd =
1069 				    (struct segment_command_64 *)(vm_offset_t)lc;
1070 				/*
1071 				 * Parse kalloc_type section
1072 				 */
1073 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1074 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1075 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1076 					if (kt_sect) {
1077 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1078 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1079 						    true, NULL);
1080 					}
1081 				}
1082 				/*
1083 				 * If the kext has a __TEXT segment, that is the only thing that
1084 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1085 				 * macho is right after.
1086 				 */
1087 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1088 					kext_text_sz = seg_cmd->filesize;
1089 				}
1090 			}
1091 			/*
1092 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1093 			 * only a __LINKEDIT segment) then the next macho will be after all the
1094 			 * header commands.
1095 			 */
1096 			if (!kext_text_sz) {
1097 				kext_text_sz = kext_mh->sizeofcmds;
1098 			} else if (!isSplitKext) {
1099 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1100 				break;
1101 			}
1102 
1103 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1104 		}
1105 	} else {
1106 		/*
1107 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1108 		 * parsing kalloc_type_view structs during startup.
1109 		 */
1110 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1111 		    " for kc_format = %d\n", kc_format);
1112 	}
1113 	return cur_count;
1114 }
1115 
1116 __startup_func
1117 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1118 kalloc_type_cmp_fixed(const void *a, const void *b)
1119 {
1120 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1121 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1122 
1123 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1124 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1125 	/*
1126 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1127 	 * signature else sort by size
1128 	 */
1129 	if (idxA == idxB) {
1130 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1131 		/*
1132 		 * If the kalloc_type_views have the same signature sort by site
1133 		 * name
1134 		 */
1135 		if (result == 0) {
1136 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1137 		}
1138 		return result;
1139 	}
1140 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1141 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1142 	return (int)(sizeA - sizeB);
1143 }
1144 
1145 __startup_func
1146 static int
kalloc_type_cmp_var(const void * a,const void * b)1147 kalloc_type_cmp_var(const void *a, const void *b)
1148 {
1149 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1150 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1151 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1152 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1153 	bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1154 	bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1155 	int result = 0;
1156 
1157 	/*
1158 	 * Switched around (B - A) because we want the pointer arrays to be at the
1159 	 * top
1160 	 */
1161 	result = ktB_ptrArray - ktA_ptrArray;
1162 	if (result == 0) {
1163 		result = strcmp(ktA_hdr, ktB_hdr);
1164 		if (result == 0) {
1165 			result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1166 			if (result == 0) {
1167 				result = strcmp(ktA->kt_name, ktB->kt_name);
1168 			}
1169 		}
1170 	}
1171 	return result;
1172 }
1173 
1174 __startup_func
1175 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1176 kalloc_type_create_iterators_fixed(
1177 	uint16_t           *kt_skip_list_start,
1178 	uint64_t            count)
1179 {
1180 	uint16_t *kt_skip_list = kt_skip_list_start;
1181 	uint16_t p_idx = UINT16_MAX; /* previous size idx */
1182 	uint16_t c_idx = 0; /* current size idx */
1183 	uint16_t unique_sig = 0;
1184 	uint16_t total_sig = 0;
1185 	const char *p_sig = NULL;
1186 	const char *p_name = "";
1187 	const char *c_sig = NULL;
1188 	const char *c_name = NULL;
1189 
1190 	/*
1191 	 * Walk over each kalloc_type_view
1192 	 */
1193 	for (uint16_t i = 0; i < count; i++) {
1194 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1195 
1196 		c_idx = kalloc_type_get_idx(kt->kt_size);
1197 		c_sig = kt->kt_signature;
1198 		c_name = kt->kt_zv.zv_name;
1199 		/*
1200 		 * When current kalloc_type_view is in a different kalloc size
1201 		 * bucket than the previous, it means we have processed all in
1202 		 * the previous size bucket, so store the accumulated values
1203 		 * and advance the indices.
1204 		 */
1205 		if (p_idx == UINT16_MAX || c_idx != p_idx) {
1206 			/*
1207 			 * Updates for frequency lists
1208 			 */
1209 			if (p_idx != UINT16_MAX) {
1210 				kt_freq_list[p_idx] = unique_sig;
1211 				kt_freq_list_total[p_idx] = total_sig - unique_sig;
1212 			}
1213 			unique_sig = 1;
1214 			total_sig = 1;
1215 
1216 			p_idx = c_idx;
1217 			p_sig = c_sig;
1218 			p_name = c_name;
1219 
1220 			/*
1221 			 * Updates to signature skip list
1222 			 */
1223 			*kt_skip_list = i;
1224 			kt_skip_list++;
1225 
1226 			continue;
1227 		}
1228 
1229 		/*
1230 		 * When current kalloc_type_views is in the kalloc size bucket as
1231 		 * previous, analyze the siganture to see if it is unique.
1232 		 *
1233 		 * Signatures are collapsible if one is a substring of the next.
1234 		 */
1235 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1236 			/*
1237 			 * Unique signature detected. Update counts and advance index
1238 			 */
1239 			unique_sig++;
1240 			total_sig++;
1241 
1242 			*kt_skip_list = i;
1243 			kt_skip_list++;
1244 			p_sig = c_sig;
1245 			p_name = c_name;
1246 			continue;
1247 		}
1248 		/*
1249 		 * Need this here as we do substring matching for signatures so you
1250 		 * want to track the longer signature seen rather than the substring
1251 		 */
1252 		p_sig = c_sig;
1253 
1254 		/*
1255 		 * Check if current kalloc_type_view corresponds to a new type
1256 		 */
1257 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1258 			total_sig++;
1259 			p_name = c_name;
1260 		}
1261 	}
1262 	/*
1263 	 * Final update
1264 	 */
1265 	assert(c_idx == p_idx);
1266 	assert(kt_freq_list[c_idx] == 0);
1267 	kt_freq_list[c_idx] = unique_sig;
1268 	kt_freq_list_total[c_idx] = total_sig - unique_sig;
1269 	*kt_skip_list = (uint16_t) count;
1270 
1271 	return ++kt_skip_list;
1272 }
1273 
1274 __startup_func
1275 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1276 kalloc_type_create_iterators_var(
1277 	uint32_t           *kt_skip_list_start,
1278 	uint32_t            buf_start)
1279 {
1280 	uint32_t *kt_skip_list = kt_skip_list_start;
1281 	uint32_t n = 0;
1282 
1283 	kt_skip_list[n] = buf_start;
1284 	assert(kt_count > buf_start + 1);
1285 	for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1286 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1287 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1288 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1289 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1290 		assert(ktA->kt_sig_type != NULL);
1291 		assert(ktB->kt_sig_type != NULL);
1292 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1293 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1294 			n++;
1295 			kt_skip_list[n] = i;
1296 		}
1297 	}
1298 	/*
1299 	 * Final update
1300 	 */
1301 	n++;
1302 	kt_skip_list[n] = (uint32_t) kt_count;
1303 	return n;
1304 }
1305 
1306 __startup_func
1307 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1308 kalloc_type_distribute_budget(
1309 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1310 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1311 	uint16_t            zone_budget,
1312 	uint16_t            min_zones_per_size)
1313 {
1314 	uint16_t total_sig = 0;
1315 	uint16_t min_sig = 0;
1316 	uint16_t assigned_zones = 0;
1317 	uint16_t remaining_zones = zone_budget;
1318 	uint16_t modulo = 0;
1319 
1320 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1321 		uint16_t sig_freq = freq_list[i];
1322 		uint16_t min_zones = min_zones_per_size;
1323 
1324 		if (sig_freq < min_zones_per_size) {
1325 			min_zones = sig_freq;
1326 		}
1327 		total_sig += sig_freq;
1328 		kt_zones[i] = min_zones;
1329 		min_sig += min_zones;
1330 	}
1331 	if (remaining_zones > total_sig) {
1332 		remaining_zones = total_sig;
1333 	}
1334 	assert(remaining_zones >= min_sig);
1335 	remaining_zones -= min_sig;
1336 	total_sig -= min_sig;
1337 	assigned_zones += min_sig;
1338 
1339 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1340 		uint16_t freq = freq_list[i];
1341 
1342 		if (freq < min_zones_per_size) {
1343 			continue;
1344 		}
1345 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1346 		uint16_t n_zones = (uint16_t) numer / total_sig;
1347 
1348 		/*
1349 		 * Accumulate remainder and increment n_zones when it goes above
1350 		 * denominator
1351 		 */
1352 		modulo += numer % total_sig;
1353 		if (modulo >= total_sig) {
1354 			n_zones++;
1355 			modulo -= total_sig;
1356 		}
1357 
1358 		/*
1359 		 * Cap the total number of zones to the unique signatures
1360 		 */
1361 		if ((n_zones + min_zones_per_size) > freq) {
1362 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1363 			modulo += (extra_zones * total_sig);
1364 			n_zones -= extra_zones;
1365 		}
1366 		kt_zones[i] += n_zones;
1367 		assigned_zones += n_zones;
1368 	}
1369 
1370 	if (kt_options & KT_OPTIONS_DEBUG) {
1371 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1372 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1373 	}
1374 	return remaining_zones + min_sig - assigned_zones;
1375 }
1376 
1377 __startup_func
1378 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1379 kalloc_type_cmp_type_zones(const void *a, const void *b)
1380 {
1381 	const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1382 	const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1383 
1384 	return (int)(B.nzones - A.nzones);
1385 }
1386 
1387 __startup_func
1388 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1389 kalloc_type_redistribute_budget(
1390 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1391 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1392 {
1393 	uint16_t count = 0, cur_count = 0;
1394 	struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1395 	uint16_t top_zone_total = 0;
1396 
1397 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1398 		uint16_t zones = kt_zones[i];
1399 
1400 		/*
1401 		 * If a sizeclass got no zones but has types to divide make a note
1402 		 * of it
1403 		 */
1404 		if (zones == 0 && (freq_total_list[i] != 0)) {
1405 			count++;
1406 		}
1407 
1408 		sorted_zones[i].nzones = kt_zones[i];
1409 		sorted_zones[i].idx = i;
1410 	}
1411 
1412 	qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1413 	    sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1414 
1415 	for (uint16_t i = 0; i < 3; i++) {
1416 		top_zone_total += sorted_zones[i].nzones;
1417 	}
1418 
1419 	/*
1420 	 * Borrow zones from the top 3 sizeclasses and redistribute to those
1421 	 * that didn't get a zone but that types to divide
1422 	 */
1423 	cur_count = count;
1424 	for (uint16_t i = 0; i < 3; i++) {
1425 		uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1426 		uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1427 
1428 		if (zone_borrow > (zone_available / 2)) {
1429 			zone_borrow = zone_available / 2;
1430 		}
1431 		kt_zones[sorted_zones[i].idx] -= zone_borrow;
1432 		cur_count -= zone_borrow;
1433 	}
1434 
1435 	for (uint16_t i = 0; i < 3; i++) {
1436 		if (cur_count == 0) {
1437 			break;
1438 		}
1439 		kt_zones[sorted_zones[i].idx]--;
1440 		cur_count--;
1441 	}
1442 
1443 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1444 		if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1445 		    (count > cur_count)) {
1446 			kt_zones[i]++;
1447 			count--;
1448 		}
1449 	}
1450 }
1451 
1452 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1453 kalloc_type_apply_policy(
1454 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1455 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1456 	uint16_t            kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1457 	uint16_t            kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1458 	uint16_t            zone_budget)
1459 {
1460 	uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1461 	uint16_t zbudget_type = zone_budget - zbudget_sig;
1462 	uint16_t wasted_zones = 0;
1463 
1464 #if DEBUG || DEVELOPMENT
1465 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1466 		uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1467 
1468 		assert(zone_budget + current_zones <= MAX_ZONES);
1469 	}
1470 #endif
1471 
1472 	wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1473 	    zbudget_sig, 2);
1474 	wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1475 	    kt_zones_type, zbudget_type, 0);
1476 	kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1477 
1478 	/*
1479 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1480 	 */
1481 	if (kt_options & KT_OPTIONS_DEBUG) {
1482 		printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1483 		    "zones_type\n");
1484 		for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1485 			printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1486 			    freq_total_list[i] + freq_list[i], freq_list[i],
1487 			    kt_zones_sig[i] + kt_zones_type[i],
1488 			    kt_zones_sig[i], kt_zones_type[i]);
1489 		}
1490 	}
1491 
1492 	return wasted_zones;
1493 }
1494 
1495 
1496 __startup_func
1497 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1498 kalloc_type_create_zone_for_size(
1499 	zone_t             *kt_zones_for_size,
1500 	uint16_t            kt_zones,
1501 	vm_size_t           z_size)
1502 {
1503 	zone_t p_zone = NULL;
1504 	char *z_name = NULL;
1505 	zone_t shared_z = NULL;
1506 
1507 	for (uint16_t i = 0; i < kt_zones; i++) {
1508 		z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1509 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1510 		    (size_t) z_size);
1511 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1512 		if (i != 0) {
1513 			p_zone->z_kt_next = z;
1514 		}
1515 		p_zone = z;
1516 		kt_zones_for_size[i] = z;
1517 	}
1518 	/*
1519 	 * Create shared zone for sizeclass if it doesn't already exist
1520 	 */
1521 	if (kt_shared_fixed) {
1522 		shared_z = kalloc_zone_for_size(KHEAP_SHARED->kh_zstart, z_size);
1523 		if (zone_elem_inner_size(shared_z) != z_size) {
1524 			z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1525 			snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1526 			    (size_t) z_size);
1527 			shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1528 			    ^(zone_t zone){
1529 				zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_SHARED;
1530 			});
1531 		}
1532 	}
1533 	kt_zones_for_size[kt_zones] = shared_z;
1534 }
1535 
1536 __startup_func
1537 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1538 kalloc_type_zones_for_type(
1539 	uint16_t            zones_total_type,
1540 	uint16_t            unique_types,
1541 	uint16_t            total_types,
1542 	bool                last_sig)
1543 {
1544 	uint16_t zones_for_type = 0, n_mod = 0;
1545 
1546 	if (zones_total_type == 0) {
1547 		return 0;
1548 	}
1549 
1550 	zones_for_type = (zones_total_type * unique_types) / total_types;
1551 	n_mod = (zones_total_type * unique_types) % total_types;
1552 	zone_carry += n_mod;
1553 
1554 	/*
1555 	 * Drain carry opportunistically
1556 	 */
1557 	if (((unique_types > 3) && (zone_carry > 0)) ||
1558 	    (zone_carry >= (int) total_types) ||
1559 	    (last_sig && (zone_carry > 0))) {
1560 		zone_carry -= total_types;
1561 		zones_for_type++;
1562 	}
1563 
1564 	if (last_sig) {
1565 		assert(zone_carry == 0);
1566 	}
1567 
1568 	return zones_for_type;
1569 }
1570 
1571 __startup_func
1572 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1573 kalloc_type_build_skip_list(
1574 	kalloc_type_view_t     *start,
1575 	kalloc_type_view_t     *end,
1576 	uint16_t               *kt_skip_list)
1577 {
1578 	kalloc_type_view_t *cur = start;
1579 	kalloc_type_view_t prev = *start;
1580 	uint16_t i = 0, idx = 0;
1581 
1582 	kt_skip_list[idx] = i;
1583 	idx++;
1584 
1585 	while (cur < end) {
1586 		kalloc_type_view_t kt_cur = *cur;
1587 
1588 		if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1589 			kt_skip_list[idx] = i;
1590 
1591 			prev = kt_cur;
1592 			idx++;
1593 		}
1594 		i++;
1595 		cur++;
1596 	}
1597 
1598 	/*
1599 	 * Final update
1600 	 */
1601 	kt_skip_list[idx] = i;
1602 	return idx;
1603 }
1604 
1605 __startup_func
1606 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1607 kalloc_type_init_sig_eq(
1608 	zone_t             *zones,
1609 	uint16_t            n_zones,
1610 	zone_t              sig_zone)
1611 {
1612 	for (uint16_t i = 0; i < n_zones; i++) {
1613 		zone_t z = zones[i];
1614 
1615 		assert(!zone_get_sig_eq(z));
1616 		zone_set_sig_eq(z, zone_index(sig_zone));
1617 	}
1618 }
1619 
1620 __startup_func
1621 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[32],uint16_t type_zones_start,zone_t sig_zone,zone_t shared_zone)1622 kalloc_type_distribute_zone_for_type(
1623 	kalloc_type_view_t *start,
1624 	kalloc_type_view_t *end,
1625 	bool                last_sig,
1626 	uint16_t            zones_total_type,
1627 	uint16_t            total_types,
1628 	uint16_t           *kt_skip_list,
1629 	zone_t              kt_zones_for_size[32],
1630 	uint16_t            type_zones_start,
1631 	zone_t              sig_zone,
1632 	zone_t              shared_zone)
1633 {
1634 	uint16_t count = 0, n_zones = 0;
1635 	uint16_t *shuffle_buf = NULL;
1636 	zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1637 
1638 	/*
1639 	 * Assert there is space in buffer
1640 	 */
1641 	count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1642 	n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1643 	    last_sig);
1644 	shuffle_buf = &kt_skip_list[count + 1];
1645 
1646 	/*
1647 	 * Initalize signature equivalence zone for type zones
1648 	 */
1649 	kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1650 
1651 	if (n_zones == 0) {
1652 		kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1653 		    shared_zone);
1654 		return n_zones;
1655 	}
1656 
1657 	/*
1658 	 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1659 	 */
1660 	if (count == 1) {
1661 		kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1662 		    shared_zone);
1663 		return n_zones;
1664 	}
1665 
1666 	/*
1667 	 * Add the signature based zone to n_zones
1668 	 */
1669 	n_zones++;
1670 
1671 	for (uint16_t i = 0; i < count; i++) {
1672 		uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1673 		uint16_t type_start = kt_skip_list[i];
1674 		kalloc_type_view_t *kt_type_start = &start[type_start];
1675 		uint16_t type_end = kt_skip_list[i + 1];
1676 		kalloc_type_view_t *kt_type_end = &start[type_end];
1677 		zone_t zone;
1678 
1679 		if (zidx == 0) {
1680 			kmem_shuffle(shuffle_buf, n_zones);
1681 		}
1682 
1683 		shuffled_zidx = shuffle_buf[zidx];
1684 		zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1685 		kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1686 		    shared_zone);
1687 	}
1688 
1689 	return n_zones - 1;
1690 }
1691 
1692 __startup_func
1693 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1694 kalloc_type_create_zones_fixed(
1695 	uint16_t           *kt_skip_list_start,
1696 	uint16_t           *kt_shuffle_buf)
1697 {
1698 	uint16_t *kt_skip_list = kt_skip_list_start;
1699 	uint16_t p_j = 0;
1700 	uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1701 	uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1702 #if DEBUG || DEVELOPMENT
1703 	uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1704 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1705 #endif
1706 	/*
1707 	 * Apply policy to determine how many zones to create for each size
1708 	 * class.
1709 	 */
1710 	kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1711 	    kt_zones_sig, kt_zones_type, kt_fixed_zones);
1712 
1713 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1714 		uint16_t n_unique_sig = kt_freq_list[i];
1715 		vm_size_t z_size = kt_zone_cfg[i];
1716 		uint16_t n_zones_sig = kt_zones_sig[i];
1717 		uint16_t n_zones_type = kt_zones_type[i];
1718 		uint16_t total_types = kt_freq_list_total[i];
1719 		uint16_t type_zones_used = 0;
1720 
1721 		if (n_unique_sig == 0) {
1722 			continue;
1723 		}
1724 
1725 		zone_carry = 0;
1726 		assert(n_zones_sig + n_zones_type + 1 <= 32);
1727 		zone_t kt_zones_for_size[32] = {};
1728 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1729 		    n_zones_sig + n_zones_type, z_size);
1730 
1731 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1732 		/*
1733 		 * Ensure that there is enough space to shuffle n_unique_sig
1734 		 * indices
1735 		 */
1736 		assert(n_unique_sig < kt_shuffle_count);
1737 
1738 		/*
1739 		 * Get a shuffled set of signature indices
1740 		 */
1741 		*kt_shuffle_buf = 0;
1742 		if (n_unique_sig > 1) {
1743 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1744 		}
1745 
1746 		for (uint16_t j = 0; j < n_zones_sig; j++) {
1747 			zone_t *z_ptr = &kt_zones_for_size[j];
1748 
1749 			kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1750 		}
1751 
1752 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1753 			/*
1754 			 * For every size that has unique types
1755 			 */
1756 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1757 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1758 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1759 			zone_t zone = kt_zones_for_size[j % n_zones_sig];
1760 			zone_t shared_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1761 			bool last_sig;
1762 
1763 			last_sig = (j == (n_unique_sig - 1)) ? true : false;
1764 			type_zones_used += kalloc_type_distribute_zone_for_type(
1765 				&kt_buffer[cur].ktv_fixed,
1766 				&kt_buffer[end].ktv_fixed, last_sig,
1767 				n_zones_type, total_types + n_unique_sig,
1768 				&kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1769 				n_zones_sig + type_zones_used, zone, shared_zone);
1770 		}
1771 		assert(type_zones_used <= n_zones_type);
1772 		p_j += n_unique_sig;
1773 	}
1774 }
1775 
1776 __startup_func
1777 static void
kalloc_type_view_init_fixed(void)1778 kalloc_type_view_init_fixed(void)
1779 {
1780 	kalloc_type_hash_seed = (uint32_t) early_random();
1781 	kalloc_type_build_dlut();
1782 	/*
1783 	 * Parse __kalloc_type sections and build array of pointers to
1784 	 * all kalloc type views in kt_buffer.
1785 	 */
1786 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1787 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1788 
1789 #if DEBUG || DEVELOPMENT
1790 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1791 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1792 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1793 #endif
1794 
1795 	/*
1796 	 * Sort based on size class and signature
1797 	 */
1798 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1799 	    kalloc_type_cmp_fixed);
1800 
1801 	/*
1802 	 * Build a skip list that holds starts of unique signatures and a
1803 	 * frequency list of number of unique and total signatures per kalloc
1804 	 * size class
1805 	 */
1806 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1807 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1808 		kt_skip_list_start, kt_count);
1809 
1810 	/*
1811 	 * Create zones based on signatures
1812 	 */
1813 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1814 }
1815 
1816 __startup_func
1817 static void
kalloc_type_heap_init(void)1818 kalloc_type_heap_init(void)
1819 {
1820 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1821 	char kh_name[MAX_ZONE_NAME];
1822 	uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1823 
1824 	for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1825 		snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1826 		kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1827 		    &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1828 	}
1829 	/*
1830 	 * All variable kalloc type allocations are collapsed into a single
1831 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1832 	 */
1833 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1834 	zone_view_count += 1;
1835 }
1836 
1837 __startup_func
1838 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1839 kalloc_type_assign_heap(
1840 	uint32_t            start,
1841 	uint32_t            end,
1842 	uint32_t            heap_id)
1843 {
1844 	bool use_split = kmem_get_random16(1);
1845 
1846 	if (use_split) {
1847 		heap_id = kt_var_heaps;
1848 	}
1849 	kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1850 	    &kt_buffer[end].ktv_var, heap_id);
1851 }
1852 
1853 __startup_func
1854 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1855 kalloc_type_split_heap(
1856 	uint32_t            start,
1857 	uint32_t            end,
1858 	uint32_t            heap_id)
1859 {
1860 	uint32_t count = start;
1861 	const char *p_name = NULL;
1862 
1863 	while (count < end) {
1864 		kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1865 		const char *c_name = cur->kt_name;
1866 
1867 		if (!p_name) {
1868 			assert(count == start);
1869 			p_name = c_name;
1870 		}
1871 		if (strcmp(c_name, p_name) != 0) {
1872 			kalloc_type_assign_heap(start, count, heap_id);
1873 			start = count;
1874 			p_name = c_name;
1875 		}
1876 		count++;
1877 	}
1878 	kalloc_type_assign_heap(start, end, heap_id);
1879 }
1880 
1881 __startup_func
1882 static void
kalloc_type_view_init_var(void)1883 kalloc_type_view_init_var(void)
1884 {
1885 	uint32_t buf_start = 0, unique_sig = 0;
1886 	uint32_t *kt_skip_list_start;
1887 	uint16_t *shuffle_buf;
1888 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
1889 	uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
1890 	/*
1891 	 * Pick a random heap to split
1892 	 */
1893 	uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
1894 
1895 	/*
1896 	 * Zones are created prior to parsing the views as zone budget is fixed
1897 	 * per sizeclass and special types identified while parsing are redirected
1898 	 * as they are discovered.
1899 	 */
1900 	kalloc_type_heap_init();
1901 
1902 	/*
1903 	 * Parse __kalloc_var sections and build array of pointers to views that
1904 	 * aren't rediected in kt_buffer.
1905 	 */
1906 	kt_count = kalloc_type_view_parse(KTV_VAR);
1907 	assert(kt_count < UINT32_MAX);
1908 
1909 #if DEBUG || DEVELOPMENT
1910 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
1911 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
1912 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1913 #endif
1914 
1915 	/*
1916 	 * Sort based on size class and signature
1917 	 */
1918 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
1919 	    kalloc_type_cmp_var);
1920 
1921 	buf_start = kalloc_type_handle_parray_var();
1922 
1923 	/*
1924 	 * Build a skip list that holds starts of unique signatures
1925 	 */
1926 	kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
1927 	unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
1928 	    buf_start);
1929 	shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
1930 	/*
1931 	 * If we have only one heap then other elements share heap with pointer
1932 	 * arrays
1933 	 */
1934 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
1935 		panic("kt_var_heaps is too small");
1936 	}
1937 
1938 	kmem_shuffle(shuffle_buf, flex_heap_count);
1939 	/*
1940 	 * The index of the heap we decide to split is placed twice in the shuffle
1941 	 * buffer so that it gets twice the number of signatures that we split
1942 	 * evenly
1943 	 */
1944 	shuffle_buf[flex_heap_count] = split_heap;
1945 	split_heap += (fixed_heaps + 1);
1946 
1947 	for (uint32_t i = 1; i <= unique_sig; i++) {
1948 		uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
1949 		    fixed_heaps + 1;
1950 		uint32_t start = kt_skip_list_start[i - 1];
1951 		uint32_t end = kt_skip_list_start[i];
1952 
1953 		assert(heap_id <= kt_var_heaps);
1954 		if (heap_id == split_heap) {
1955 			kalloc_type_split_heap(start, end, heap_id);
1956 			continue;
1957 		}
1958 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1959 		    &kt_buffer[end].ktv_var, heap_id);
1960 	}
1961 }
1962 
1963 __startup_func
1964 static void
kalloc_init(void)1965 kalloc_init(void)
1966 {
1967 	/*
1968 	 * Allocate scratch space to parse kalloc_type_views and create
1969 	 * other structures necessary to process them.
1970 	 */
1971 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
1972 
1973 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
1974 	kalloc_zsize_compute();
1975 
1976 	/* Initialize kalloc data buffers heap */
1977 	kalloc_heap_init(KHEAP_DATA_BUFFERS);
1978 
1979 	/* Initialize kalloc shared buffers heap */
1980 	kalloc_heap_init(KHEAP_SHARED);
1981 
1982 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
1983 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_KALLOC);
1984 
1985 	/*
1986 	 * Handle fixed size views
1987 	 */
1988 	kalloc_type_view_init_fixed();
1989 
1990 	/*
1991 	 * Reset
1992 	 */
1993 	bzero(kt_buffer, kt_scratch_size);
1994 	kt_count = max_count;
1995 
1996 	/*
1997 	 * Handle variable size views
1998 	 */
1999 	kalloc_type_view_init_var();
2000 
2001 	/*
2002 	 * Free resources used
2003 	 */
2004 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2005 }
2006 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2007 
2008 #pragma mark accessors
2009 
2010 #define KFREE_ABSURD_SIZE \
2011 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2012 
2013 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2014 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2015 {
2016 	thread_t thr = current_thread();
2017 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2018 }
2019 
2020 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2021 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2022 {
2023 	thread_t thr = current_thread();
2024 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2025 }
2026 
2027 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2028 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2029 {
2030 	kmem_guard_t guard = {
2031 		.kmg_atomic      = true,
2032 		.kmg_tag         = tag,
2033 		.kmg_type_hash   = type_hash,
2034 		.kmg_context     = os_hash_kernel_pointer(owner),
2035 	};
2036 
2037 	/*
2038 	 * TODO: this use is really not sufficiently smart.
2039 	 */
2040 
2041 	return guard;
2042 }
2043 
2044 #if __arm64e__ || CONFIG_KERNEL_TAGGING
2045 
2046 #if __arm64e__
2047 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2048 
2049 /*
2050  * Zone encoding is:
2051  *
2052  *   <PAC SIG><1><1><PTR value><5 bits of size class>
2053  *
2054  * VM encoding is:
2055  *
2056  *   <PAC SIG><1><0><PTR value><14 bits of page count>
2057  *
2058  * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2059  * so that PAC authentication extends the proper sign bit.
2060  */
2061 
2062 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2063 #else
2064 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2065 
2066 /*
2067  * Zone encoding is:
2068  *
2069  *   <TBI><1><PTR value><5 bits of size class>
2070  *
2071  * VM encoding is:
2072  *
2073  *   <TBI><0><PTR value><14 bits of page count>
2074  */
2075 
2076 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2077 #endif
2078 
2079 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2080 
2081 __attribute__((always_inline))
2082 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2083 __kalloc_array_decode(vm_address_t ptr)
2084 {
2085 	struct kalloc_result kr;
2086 	vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2087 
2088 	if (ptr & zone_mask) {
2089 		kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2090 		ptr &= ~0x1full;
2091 	} else if (__probable(ptr)) {
2092 		kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2093 		ptr &= ~PAGE_MASK;
2094 		ptr |= zone_mask;
2095 	} else {
2096 		kr.size = 0;
2097 	}
2098 
2099 	kr.addr = (void *)ptr;
2100 	return kr;
2101 }
2102 
2103 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2104 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2105 {
2106 	return (void *)((vm_address_t)ptr | z->z_array_size_class);
2107 }
2108 
2109 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2110 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2111 {
2112 	addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2113 
2114 	return addr | atop(size);
2115 }
2116 
2117 #else
2118 
2119 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2120 
2121 /*
2122  * Encoding is:
2123  * bits  0..46: pointer value
2124  * bits 47..47: 0: zones, 1: VM
2125  * bits 48..63: zones: elem size, VM: number of pages
2126  */
2127 
2128 #define KALLOC_ARRAY_TYPE_BIT   47
2129 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2130 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2131 
2132 __attribute__((always_inline))
2133 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2134 __kalloc_array_decode(vm_address_t ptr)
2135 {
2136 	struct kalloc_result kr;
2137 	uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2138 
2139 	kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2140 	if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2141 		kr.size <<= PAGE_SHIFT;
2142 	}
2143 	/* sign extend, so that it also works with NULL */
2144 	kr.addr = (void *)((long)(ptr << shift) >> shift);
2145 
2146 	return kr;
2147 }
2148 
2149 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2150 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2151 {
2152 	vm_address_t addr = (vm_address_t)ptr;
2153 
2154 	addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2155 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2156 
2157 	return (void *)addr;
2158 }
2159 
2160 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2161 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2162 {
2163 	addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2164 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2165 
2166 	return addr;
2167 }
2168 
2169 #endif
2170 
2171 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2172 kalloc_next_good_size(vm_size_t size, uint32_t period)
2173 {
2174 	uint32_t scale = kalloc_log2down((uint32_t)size);
2175 	vm_size_t step, size_class;
2176 
2177 	if (size < KHEAP_STEP_START) {
2178 		return KHEAP_STEP_START;
2179 	}
2180 	if (size < 2 * KHEAP_STEP_START) {
2181 		return 2 * KHEAP_STEP_START;
2182 	}
2183 
2184 	if (size < KHEAP_MAX_SIZE) {
2185 		step = 1ul << (scale - 1);
2186 	} else {
2187 		step = round_page(1ul << (scale - kalloc_log2down(period)));
2188 	}
2189 
2190 	size_class = (size + step) & -step;
2191 #if KASAN_CLASSIC
2192 	if (size > K_SIZE_CLASS(size_class)) {
2193 		return kalloc_next_good_size(size_class, period);
2194 	}
2195 	size_class = K_SIZE_CLASS(size_class);
2196 #endif
2197 	return size_class;
2198 }
2199 
2200 
2201 #pragma mark kalloc
2202 
2203 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_var_view_t kt_view,bool kt_free __unused)2204 kalloc_type_get_heap(kalloc_type_var_view_t kt_view, bool kt_free __unused)
2205 {
2206 	/*
2207 	 * Redirect data-only views
2208 	 */
2209 	if (kalloc_type_is_data(kt_view->kt_flags)) {
2210 		return KHEAP_DATA_BUFFERS;
2211 	}
2212 
2213 	if (kt_view->kt_flags & KT_PROCESSED) {
2214 		return KHEAP_KT_VAR;
2215 	}
2216 
2217 	return KHEAP_DEFAULT;
2218 }
2219 
2220 __attribute__((noinline))
2221 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2222 kalloc_large(
2223 	kalloc_heap_t         kheap,
2224 	vm_size_t             req_size,
2225 	zalloc_flags_t        flags,
2226 	uint16_t              kt_hash,
2227 	void                 *owner __unused)
2228 {
2229 	kma_flags_t kma_flags = KMA_KASAN_GUARD | KMA_TAG;
2230 	vm_tag_t tag;
2231 	vm_offset_t addr, size;
2232 
2233 	if (flags & Z_NOFAIL) {
2234 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2235 		    (size_t)req_size);
2236 	}
2237 
2238 	/*
2239 	 * kmem_alloc could block so we return if noblock
2240 	 *
2241 	 * also, reject sizes larger than our address space is quickly,
2242 	 * as kt_size or IOMallocArraySize() expect this.
2243 	 */
2244 	if ((flags & Z_NOWAIT) ||
2245 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2246 		return (struct kalloc_result){ };
2247 	}
2248 
2249 	if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2250 		return (struct kalloc_result){ };
2251 	}
2252 
2253 	/*
2254 	 * (73465472) on Intel we didn't use to pass this flag,
2255 	 * which in turned allowed kalloc_large() memory to be shared
2256 	 * with user directly.
2257 	 *
2258 	 * We're bound by this unfortunate ABI.
2259 	 */
2260 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2261 #ifndef __x86_64__
2262 		kma_flags |= KMA_KOBJECT;
2263 #endif
2264 	} else {
2265 		assert(kheap == KHEAP_DATA_BUFFERS);
2266 		kma_flags &= ~KMA_TAG;
2267 	}
2268 	if (flags & Z_NOPAGEWAIT) {
2269 		kma_flags |= KMA_NOPAGEWAIT;
2270 	}
2271 	if (flags & Z_ZERO) {
2272 		kma_flags |= KMA_ZERO;
2273 	}
2274 	if (kheap == KHEAP_DATA_BUFFERS) {
2275 		kma_flags |= KMA_DATA;
2276 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2277 		kma_flags |= KMA_SPRAYQTN;
2278 	}
2279 
2280 
2281 	tag = zalloc_flags_get_tag(flags);
2282 	if (flags & Z_VM_TAG_BT_BIT) {
2283 		tag = vm_tag_bt() ?: tag;
2284 	}
2285 	if (tag == VM_KERN_MEMORY_NONE) {
2286 		tag = kheap->kh_tag;
2287 	}
2288 
2289 	size = round_page(req_size);
2290 	if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2291 		req_size = round_page(size);
2292 	}
2293 
2294 	addr = kmem_alloc_guard(kernel_map, req_size, 0,
2295 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2296 
2297 	if (addr != 0) {
2298 		counter_inc(&kalloc_large_count);
2299 		counter_add(&kalloc_large_total, size);
2300 		KALLOC_ZINFO_SALLOC(size);
2301 		if (flags & Z_KALLOC_ARRAY) {
2302 			addr = __kalloc_array_encode_vm(addr, req_size);
2303 		}
2304 	} else {
2305 		addr = 0;
2306 	}
2307 
2308 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2309 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2310 }
2311 
2312 #if KASAN
2313 
2314 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2315 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2316 {
2317 #if KASAN_CLASSIC
2318 	/*
2319 	 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2320 	 * tagging of the memory region is performed here.
2321 	 */
2322 	kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2323 	    __builtin_frame_address(0));
2324 #endif /* KASAN_CLASSIC */
2325 
2326 #if KASAN_TBI
2327 	kasan_tbi_retag_unused_space((vm_offset_t)addr, size, used ? :1);
2328 #endif /* KASAN_TBI */
2329 }
2330 #endif /* KASAN */
2331 
2332 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2333 kalloc_zone(
2334 	zone_t                  z,
2335 	zone_stats_t            zstats,
2336 	zalloc_flags_t          flags,
2337 	vm_size_t               req_size)
2338 {
2339 	struct kalloc_result kr;
2340 	vm_size_t esize;
2341 
2342 	kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2343 	esize = kr.size;
2344 
2345 	if (__probable(kr.addr)) {
2346 		if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2347 			req_size = esize;
2348 		} else {
2349 			kr.size = req_size;
2350 		}
2351 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2352 		kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2353 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2354 
2355 #if KASAN
2356 		kalloc_mark_unused_space(kr.addr, esize, kr.size);
2357 #endif /* KASAN */
2358 
2359 		if (flags & Z_KALLOC_ARRAY) {
2360 			kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2361 		}
2362 	}
2363 
2364 	DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2365 	return kr;
2366 }
2367 
2368 static zone_id_t
kalloc_use_shared_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2369 kalloc_use_shared_heap(
2370 	kalloc_heap_t           kheap,
2371 	zone_stats_t            zstats,
2372 	zone_id_t               zstart,
2373 	zalloc_flags_t         *flags)
2374 {
2375 	if (kheap->kh_heap_id != KHEAP_ID_DATA_BUFFERS) {
2376 		zone_stats_t zstats_cpu = zpercpu_get(zstats);
2377 
2378 		if (os_atomic_load(&zstats_cpu->zs_alloc_not_shared, relaxed) == 0) {
2379 			*flags |= Z_SET_NOTSHARED;
2380 			return KHEAP_SHARED->kh_zstart;
2381 		}
2382 	}
2383 
2384 	return zstart;
2385 }
2386 
2387 #undef kalloc_ext
2388 
2389 struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2390 kalloc_ext(
2391 	void                   *kheap_or_kt_view,
2392 	vm_size_t               size,
2393 	zalloc_flags_t          flags,
2394 	void                   *owner)
2395 {
2396 	kalloc_type_var_view_t kt_view;
2397 	kalloc_heap_t kheap;
2398 	zone_stats_t zstats = NULL;
2399 	zone_t z;
2400 	uint16_t kt_hash;
2401 	zone_id_t zstart;
2402 
2403 	if (kt_is_var_view(kheap_or_kt_view)) {
2404 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2405 		kheap   = kalloc_type_get_heap(kt_view, false);
2406 		/*
2407 		 * Use stats from view if present, else use stats from kheap.
2408 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2409 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2410 		 * use stats from the respective zones.
2411 		 */
2412 		zstats  = kt_view->kt_stats;
2413 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2414 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
2415 	} else {
2416 		kt_view = NULL;
2417 		kheap   = kheap_or_kt_view;
2418 		kt_hash = kheap->kh_type_hash;
2419 		zstart  = kheap->kh_zstart;
2420 	}
2421 
2422 	if (!zstats) {
2423 		zstats = kheap->kh_stats;
2424 	}
2425 
2426 	zstart = kalloc_use_shared_heap(kheap, zstats, zstart, &flags);
2427 	z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2428 	if (z) {
2429 		return kalloc_zone(z, zstats, flags, size);
2430 	} else {
2431 		return kalloc_large(kheap, size, flags, kt_hash, owner);
2432 	}
2433 }
2434 
2435 #if XNU_PLATFORM_MacOSX
2436 void *
2437 kalloc_external(vm_size_t size);
2438 void *
kalloc_external(vm_size_t size)2439 kalloc_external(vm_size_t size)
2440 {
2441 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2442 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2443 }
2444 #endif /* XNU_PLATFORM_MacOSX */
2445 
2446 void *
2447 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2448 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2449 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2450 {
2451 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2452 	return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags);
2453 }
2454 
2455 __abortlike
2456 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2457 kalloc_data_require_panic(void *addr, vm_size_t size)
2458 {
2459 	zone_id_t zid = zone_id_for_element(addr, size);
2460 
2461 	if (zid != ZONE_ID_INVALID) {
2462 		zone_t z = &zone_array[zid];
2463 		zone_security_flags_t zsflags = zone_security_array[zid];
2464 
2465 		if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
2466 			panic("kalloc_data_require failed: address %p in [%s%s]",
2467 			    addr, zone_heap_name(z), zone_name(z));
2468 		}
2469 
2470 		panic("kalloc_data_require failed: address %p in [%s%s], "
2471 		    "size too large %zd > %zd", addr,
2472 		    zone_heap_name(z), zone_name(z),
2473 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2474 	} else {
2475 		panic("kalloc_data_require failed: address %p not in zone native map",
2476 		    addr);
2477 	}
2478 }
2479 
2480 __abortlike
2481 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2482 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2483 {
2484 	zone_id_t zid = zone_id_for_element(addr, size);
2485 
2486 	if (zid != ZONE_ID_INVALID) {
2487 		zone_t z = &zone_array[zid];
2488 		zone_security_flags_t zsflags = zone_security_array[zid];
2489 
2490 		switch (zsflags.z_kheap_id) {
2491 		case KHEAP_ID_NONE:
2492 		case KHEAP_ID_DATA_BUFFERS:
2493 		case KHEAP_ID_KT_VAR:
2494 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2495 			    addr, zone_heap_name(z), zone_name(z));
2496 		default:
2497 			break;
2498 		}
2499 
2500 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2501 		    "size too large %zd > %zd", addr,
2502 		    zone_heap_name(z), zone_name(z),
2503 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2504 	} else {
2505 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2506 		    addr);
2507 	}
2508 }
2509 
2510 void
kalloc_data_require(void * addr,vm_size_t size)2511 kalloc_data_require(void *addr, vm_size_t size)
2512 {
2513 	zone_id_t zid = zone_id_for_element(addr, size);
2514 
2515 	if (zid != ZONE_ID_INVALID) {
2516 		zone_t z = &zone_array[zid];
2517 		zone_security_flags_t zsflags = zone_security_array[zid];
2518 		if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS &&
2519 		    size <= zone_elem_inner_size(z)) {
2520 			return;
2521 		}
2522 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2523 	    (vm_address_t)pgz_decode(addr, size), size)) {
2524 		return;
2525 	}
2526 
2527 	kalloc_data_require_panic(addr, size);
2528 }
2529 
2530 void
kalloc_non_data_require(void * addr,vm_size_t size)2531 kalloc_non_data_require(void *addr, vm_size_t size)
2532 {
2533 	zone_id_t zid = zone_id_for_element(addr, size);
2534 
2535 	if (zid != ZONE_ID_INVALID) {
2536 		zone_t z = &zone_array[zid];
2537 		zone_security_flags_t zsflags = zone_security_array[zid];
2538 		switch (zsflags.z_kheap_id) {
2539 		case KHEAP_ID_NONE:
2540 			if (!zsflags.z_kalloc_type) {
2541 				break;
2542 			}
2543 			OS_FALLTHROUGH;
2544 		case KHEAP_ID_KT_VAR:
2545 			if (size < zone_elem_inner_size(z)) {
2546 				return;
2547 			}
2548 			break;
2549 		default:
2550 			break;
2551 		}
2552 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2553 	    (vm_address_t)pgz_decode(addr, size), size)) {
2554 		return;
2555 	}
2556 
2557 	kalloc_non_data_require_panic(addr, size);
2558 }
2559 
2560 void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2561 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2562 {
2563 	/*
2564 	 * Callsites from a kext that aren't in the BootKC on macOS or
2565 	 * any callsites on armv7 are not processed during startup,
2566 	 * default to using kheap_alloc
2567 	 *
2568 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2569 	 * NULL as we need to use the vm for the allocation
2570 	 *
2571 	 */
2572 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2573 		vm_size_t size = kalloc_type_get_size(kt_view->kt_size);
2574 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2575 		return kalloc_ext(KHEAP_DEFAULT, size, flags, NULL).addr;
2576 	}
2577 
2578 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2579 	return kalloc_type_impl(kt_view, flags);
2580 }
2581 
2582 void *
2583 kalloc_type_var_impl_external(
2584 	kalloc_type_var_view_t  kt_view,
2585 	vm_size_t               size,
2586 	zalloc_flags_t          flags,
2587 	void                   *owner);
2588 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2589 kalloc_type_var_impl_external(
2590 	kalloc_type_var_view_t  kt_view,
2591 	vm_size_t               size,
2592 	zalloc_flags_t          flags,
2593 	void                   *owner)
2594 {
2595 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2596 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2597 }
2598 
2599 #pragma mark kfree
2600 
2601 __abortlike
2602 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2603 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2604 {
2605 	zone_security_flags_t zsflags = zone_security_config(z);
2606 	const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2607 
2608 	if (zsflags.z_kalloc_type) {
2609 		panic_include_kalloc_types = true;
2610 		kalloc_type_src_zone = z;
2611 		panic("kfree: addr %p found in kalloc type zone '%s'"
2612 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2613 	}
2614 
2615 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2616 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2617 		    data, size, zone_heap_name(z), z->z_name);
2618 	} else {
2619 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2620 		    data, size, zone_heap_name(z), kheap_name);
2621 	}
2622 }
2623 
2624 __abortlike
2625 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2626 kfree_size_confusion_panic(zone_t z, void *data,
2627     size_t oob_offs, size_t size, size_t zsize)
2628 {
2629 	if (z) {
2630 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2631 		    "with elem_size %zd",
2632 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2633 	} else {
2634 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2635 		    data, size, oob_offs);
2636 	}
2637 }
2638 
2639 __abortlike
2640 static void
kfree_size_invalid_panic(void * data,size_t size)2641 kfree_size_invalid_panic(void *data, size_t size)
2642 {
2643 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2644 	    data, size);
2645 }
2646 
2647 __abortlike
2648 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2649 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2650     size_t max_size)
2651 {
2652 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2653 	    data, size, min_size, max_size);
2654 }
2655 
2656 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2657 kfree_size_require(
2658 	kalloc_heap_t kheap,
2659 	void *addr,
2660 	vm_size_t min_size,
2661 	vm_size_t max_size)
2662 {
2663 	assert3u(min_size, <=, max_size);
2664 	zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2665 	vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2666 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2667 	if (elem_size > max_zone_size || elem_size < min_size) {
2668 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2669 	}
2670 }
2671 
2672 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2673 kfree_large(
2674 	vm_offset_t             addr,
2675 	vm_size_t               size,
2676 	kmf_flags_t             flags,
2677 	void                   *owner)
2678 {
2679 	size = kmem_free_guard(kernel_map, addr, size,
2680 	    flags | KMF_TAG | KMF_KASAN_GUARD,
2681 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2682 
2683 	counter_dec(&kalloc_large_count);
2684 	counter_add(&kalloc_large_total, -(uint64_t)size);
2685 	KALLOC_ZINFO_SFREE(size);
2686 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2687 }
2688 
2689 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2690 kfree_zone(
2691 	void                   *kheap_or_kt_view __unsafe_indexable,
2692 	void                   *data,
2693 	vm_size_t               size,
2694 	zone_t                  z,
2695 	vm_size_t               zsize)
2696 {
2697 	zone_security_flags_t zsflags = zone_security_config(z);
2698 	kalloc_type_var_view_t kt_view;
2699 	kalloc_heap_t kheap;
2700 	zone_stats_t zstats = NULL;
2701 
2702 	if (kt_is_var_view(kheap_or_kt_view)) {
2703 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2704 		kheap   = kalloc_type_get_heap(kt_view, true);
2705 		/*
2706 		 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2707 		 * we will end up having incorrect stats. Cross frees may happen on
2708 		 * macOS due to allocation from an unprocessed view and free from
2709 		 * a processed view or vice versa.
2710 		 */
2711 		zstats  = kt_view->kt_stats;
2712 	} else {
2713 		kt_view = NULL;
2714 		kheap   = kheap_or_kt_view;
2715 	}
2716 
2717 	if (!zstats) {
2718 		zstats = kheap->kh_stats;
2719 	}
2720 
2721 	zsflags = zone_security_config(z);
2722 	if (kheap == KHEAP_DATA_BUFFERS) {
2723 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2724 			kfree_heap_confusion_panic(kheap, data, size, z);
2725 		}
2726 	} else {
2727 		if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2728 		    (zsflags.z_kheap_id != KHEAP_ID_SHARED)) {
2729 			kfree_heap_confusion_panic(kheap, data, size, z);
2730 		}
2731 	}
2732 
2733 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2734 
2735 	/* needs to be __nosan because the user size might be partial */
2736 	__nosan_bzero(data, zsize);
2737 	zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2738 }
2739 
2740 void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2741 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2742 {
2743 	vm_size_t bucket_size;
2744 	zone_t z;
2745 
2746 	if (data == NULL) {
2747 		return;
2748 	}
2749 
2750 	if (size > KFREE_ABSURD_SIZE) {
2751 		kfree_size_invalid_panic(data, size);
2752 	}
2753 
2754 	if (size <= KHEAP_MAX_SIZE) {
2755 		vm_size_t oob_offs;
2756 
2757 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2758 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2759 			kfree_size_confusion_panic(z, data,
2760 			    oob_offs, size, bucket_size);
2761 		}
2762 
2763 		data = (char *)data - oob_offs;
2764 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2765 	} else {
2766 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2767 	}
2768 }
2769 
2770 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2771 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2772 {
2773 	vm_offset_t oob_offs;
2774 	vm_size_t size, usize = 0;
2775 	zone_t z;
2776 
2777 	if (data == NULL) {
2778 		return;
2779 	}
2780 
2781 	size = zone_element_size(data, &z, true, &oob_offs);
2782 	if (size) {
2783 #if KASAN_CLASSIC
2784 		usize = kasan_user_size((vm_offset_t)data);
2785 #endif
2786 		data = (char *)data - oob_offs;
2787 		kfree_zone(kheap, data, usize, z, size);
2788 	} else {
2789 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2790 	}
2791 }
2792 
2793 #if XNU_PLATFORM_MacOSX
2794 void
2795 kfree_external(void *addr, vm_size_t size);
2796 void
kfree_external(void * addr,vm_size_t size)2797 kfree_external(void *addr, vm_size_t size)
2798 {
2799 	kalloc_heap_t kheap = KHEAP_DEFAULT;
2800 
2801 	kfree_ext(kheap, addr, size);
2802 }
2803 #endif /* XNU_PLATFORM_MacOSX */
2804 
2805 void
2806 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2807     vm_size_t min_sz, vm_size_t max_sz)
2808 {
2809 	if (__improbable(addr == NULL)) {
2810 		return;
2811 	}
2812 	kfree_size_require(kheap, addr, min_sz, max_sz);
2813 	kfree_addr_ext(kheap, addr);
2814 }
2815 
2816 void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)2817 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2818 {
2819 	zone_stats_t zs = kt_view->kt_zv.zv_stats;
2820 	zone_t       z  = kt_view->kt_zv.zv_zone;
2821 	zone_stats_t zs_cpu = zpercpu_get(zs);
2822 
2823 	if ((flags & Z_SET_NOTSHARED) ||
2824 	    os_atomic_load(&zs_cpu->zs_alloc_not_shared, relaxed)) {
2825 		return zalloc_ext(z, zs, flags).addr;
2826 	}
2827 
2828 	assert(zone_security_config(z).z_kheap_id != KHEAP_ID_DATA_BUFFERS);
2829 	return zalloc_ext(kt_view->kt_zshared, zs, flags | Z_SET_NOTSHARED).addr;
2830 }
2831 
2832 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)2833 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
2834 {
2835 	/*
2836 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
2837 	 * processed during startup so default to using kheap_alloc
2838 	 *
2839 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2840 	 * NULL as we need to use the vm for the allocation/free
2841 	 */
2842 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2843 		return kheap_free(KHEAP_DEFAULT, ptr,
2844 		           kalloc_type_get_size(kt_view->kt_size));
2845 	}
2846 	return kfree_type_impl(kt_view, ptr);
2847 }
2848 
2849 void
2850 kfree_type_var_impl_external(
2851 	kalloc_type_var_view_t  kt_view,
2852 	void                   *ptr,
2853 	vm_size_t               size);
2854 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)2855 kfree_type_var_impl_external(
2856 	kalloc_type_var_view_t  kt_view,
2857 	void                   *ptr,
2858 	vm_size_t               size)
2859 {
2860 	return kfree_type_var_impl(kt_view, ptr, size);
2861 }
2862 
2863 void
2864 kfree_data_external(void *ptr, vm_size_t size);
2865 void
kfree_data_external(void * ptr,vm_size_t size)2866 kfree_data_external(void *ptr, vm_size_t size)
2867 {
2868 	return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2869 }
2870 
2871 void
2872 kfree_data_addr_external(void *ptr);
2873 void
kfree_data_addr_external(void * ptr)2874 kfree_data_addr_external(void *ptr)
2875 {
2876 	return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2877 }
2878 
2879 #pragma mark krealloc
2880 
2881 __abortlike
2882 static void
krealloc_size_invalid_panic(void * data,size_t size)2883 krealloc_size_invalid_panic(void *data, size_t size)
2884 {
2885 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
2886 	    data, size);
2887 }
2888 
2889 __attribute__((noinline))
2890 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2891 krealloc_large(
2892 	kalloc_heap_t         kheap,
2893 	vm_offset_t           addr,
2894 	vm_size_t             old_size,
2895 	vm_size_t             new_size,
2896 	zalloc_flags_t        flags,
2897 	uint16_t              kt_hash,
2898 	void                 *owner __unused)
2899 {
2900 	kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_TAG | KMR_KASAN_GUARD;
2901 	vm_size_t new_req_size = new_size;
2902 	vm_size_t old_req_size = old_size;
2903 	uint64_t delta;
2904 	kmem_return_t kmr;
2905 	vm_tag_t tag;
2906 
2907 	if (flags & Z_NOFAIL) {
2908 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2909 		    (size_t)new_req_size);
2910 	}
2911 
2912 	/*
2913 	 * kmem_alloc could block so we return if noblock
2914 	 *
2915 	 * also, reject sizes larger than our address space is quickly,
2916 	 * as kt_size or IOMallocArraySize() expect this.
2917 	 */
2918 	if ((flags & Z_NOWAIT) ||
2919 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2920 		return (struct kalloc_result){ };
2921 	}
2922 
2923 	/*
2924 	 * (73465472) on Intel we didn't use to pass this flag,
2925 	 * which in turned allowed kalloc_large() memory to be shared
2926 	 * with user directly.
2927 	 *
2928 	 * We're bound by this unfortunate ABI.
2929 	 */
2930 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2931 #ifndef __x86_64__
2932 		kmr_flags |= KMR_KOBJECT;
2933 #endif
2934 	} else {
2935 		assert(kheap == KHEAP_DATA_BUFFERS);
2936 		kmr_flags &= ~KMR_TAG;
2937 	}
2938 	if (flags & Z_NOPAGEWAIT) {
2939 		kmr_flags |= KMR_NOPAGEWAIT;
2940 	}
2941 	if (flags & Z_ZERO) {
2942 		kmr_flags |= KMR_ZERO;
2943 	}
2944 	if (kheap == KHEAP_DATA_BUFFERS) {
2945 		kmr_flags |= KMR_DATA;
2946 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2947 		kmr_flags |= KMR_SPRAYQTN;
2948 	}
2949 	if (flags & Z_REALLOCF) {
2950 		kmr_flags |= KMR_REALLOCF;
2951 	}
2952 
2953 
2954 	tag = zalloc_flags_get_tag(flags);
2955 	if (flags & Z_VM_TAG_BT_BIT) {
2956 		tag = vm_tag_bt() ?: tag;
2957 	}
2958 	if (tag == VM_KERN_MEMORY_NONE) {
2959 		tag = kheap->kh_tag;
2960 	}
2961 
2962 	kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
2963 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
2964 
2965 	new_size = round_page(new_req_size);
2966 	old_size = round_page(old_req_size);
2967 
2968 	if (kmr.kmr_address != 0) {
2969 		delta = (uint64_t)(new_size - old_size);
2970 	} else if (flags & Z_REALLOCF) {
2971 		counter_dec(&kalloc_large_count);
2972 		delta = (uint64_t)(-old_size);
2973 	} else {
2974 		delta = 0;
2975 	}
2976 
2977 	counter_add(&kalloc_large_total, delta);
2978 	KALLOC_ZINFO_SALLOC(delta);
2979 
2980 	if (addr != 0 || (flags & Z_REALLOCF)) {
2981 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
2982 		    void*, addr);
2983 	}
2984 	if (__improbable(kmr.kmr_address == 0)) {
2985 		return (struct kalloc_result){ };
2986 	}
2987 
2988 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
2989 	    void*, kmr.kmr_address);
2990 
2991 	if (flags & Z_KALLOC_ARRAY) {
2992 		kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
2993 		    new_req_size);
2994 	}
2995 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
2996 }
2997 
2998 #undef krealloc_ext
2999 
3000 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3001 krealloc_ext(
3002 	void                 *kheap_or_kt_view __unsafe_indexable,
3003 	void                 *addr,
3004 	vm_size_t             old_size,
3005 	vm_size_t             new_size,
3006 	zalloc_flags_t        flags,
3007 	void                 *owner)
3008 {
3009 	vm_size_t old_bucket_size, new_bucket_size, min_size;
3010 	kalloc_type_var_view_t kt_view;
3011 	kalloc_heap_t kheap;
3012 	zone_stats_t zstats = NULL;
3013 	struct kalloc_result kr;
3014 	vm_offset_t oob_offs = 0;
3015 	zone_t old_z, new_z;
3016 	uint16_t kt_hash = 0;
3017 	zone_id_t zstart;
3018 
3019 	if (old_size > KFREE_ABSURD_SIZE) {
3020 		krealloc_size_invalid_panic(addr, old_size);
3021 	}
3022 
3023 	if (addr == NULL && new_size == 0) {
3024 		return (struct kalloc_result){ };
3025 	}
3026 
3027 	if (kt_is_var_view(kheap_or_kt_view)) {
3028 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
3029 		kheap   = kalloc_type_get_heap(kt_view, false);
3030 		/*
3031 		 * Similar to kalloc_ext: Use stats from view if present,
3032 		 * else use stats from kheap.
3033 		 *
3034 		 * krealloc_type isn't exposed to kexts, so we don't need to
3035 		 * handle cross frees and can rely on stats from view or kheap.
3036 		 */
3037 		zstats  = kt_view->kt_stats;
3038 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
3039 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
3040 	} else {
3041 		kt_view = NULL;
3042 		kheap   = kheap_or_kt_view;
3043 		kt_hash = kheap->kh_type_hash;
3044 		zstart  = kheap->kh_zstart;
3045 	}
3046 
3047 	if (!zstats) {
3048 		zstats = kheap->kh_stats;
3049 	}
3050 	/*
3051 	 * Find out the size of the bucket in which the new sized allocation
3052 	 * would land. If it matches the bucket of the original allocation,
3053 	 * simply return the same address.
3054 	 */
3055 	if (new_size == 0) {
3056 		new_z = ZONE_NULL;
3057 		new_bucket_size = new_size = 0;
3058 	} else {
3059 		zstart = kalloc_use_shared_heap(kheap, zstats, zstart, &flags);
3060 		new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3061 		new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3062 	}
3063 #if !KASAN_CLASSIC
3064 	if (flags & Z_FULLSIZE) {
3065 		new_size = new_bucket_size;
3066 	}
3067 #endif /* !KASAN_CLASSIC */
3068 
3069 	if (addr == NULL) {
3070 		old_z = ZONE_NULL;
3071 		old_size = old_bucket_size = 0;
3072 	} else if (kheap_size_from_zone(addr, old_size, flags)) {
3073 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3074 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3075 			kfree_size_confusion_panic(old_z, addr,
3076 			    oob_offs, old_size, old_bucket_size);
3077 		}
3078 		__builtin_assume(old_z != ZONE_NULL);
3079 	} else {
3080 		old_z = ZONE_NULL;
3081 		old_bucket_size = round_page(old_size);
3082 	}
3083 	min_size = MIN(old_size, new_size);
3084 
3085 	if (old_bucket_size == new_bucket_size && old_z) {
3086 		kr.addr = (char *)addr - oob_offs;
3087 		kr.size = new_size;
3088 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3089 		kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3090 		    new_size, new_bucket_size);
3091 		if (kr.addr != addr) {
3092 			memmove(kr.addr, addr, min_size);
3093 			bzero((char *)kr.addr + min_size,
3094 			    kr.size - min_size);
3095 		}
3096 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3097 #if KASAN
3098 		/*
3099 		 * On KASAN kernels, treat a reallocation effectively as a new
3100 		 * allocation and add a sanity check around the existing one
3101 		 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3102 		 * to much extra work, on KASAN_TBI, assign a new tag both to the
3103 		 * buffer and to the potential free space.
3104 		 */
3105 #if KASAN_CLASSIC
3106 		kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3107 		kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3108 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3109 #endif /* KASAN_CLASSIC */
3110 #if KASAN_TBI
3111 		/*
3112 		 * Validate the current buffer, then generate a new tag,
3113 		 * even if the address is stable, it's a "new" allocation.
3114 		 */
3115 		__asan_loadN((vm_offset_t)addr, old_size);
3116 		kr.addr = (void *)vm_memtag_assign_tag((vm_offset_t)kr.addr, kr.size);
3117 		vm_memtag_set_tag((vm_offset_t)kr.addr, kr.size);
3118 		kasan_tbi_retag_unused_space((vm_offset_t)kr.addr, new_bucket_size, kr.size);
3119 #endif /* KASAN_TBI */
3120 #endif /* KASAN */
3121 		goto out_success;
3122 	}
3123 
3124 #if !KASAN
3125 	/*
3126 	 * Fallthrough to krealloc_large() for KASAN,
3127 	 * because we can't use kasan_check_alloc()
3128 	 * on kalloc_large() memory.
3129 	 *
3130 	 * kmem_realloc_guard() will perform all the validations,
3131 	 * and re-tagging.
3132 	 */
3133 	if (old_bucket_size == new_bucket_size) {
3134 		kr.addr = (char *)addr - oob_offs;
3135 		kr.size = new_size;
3136 		goto out_success;
3137 	}
3138 #endif
3139 
3140 	if (addr && !old_z && new_size && !new_z) {
3141 		return krealloc_large(kheap, (vm_offset_t)addr,
3142 		           old_size, new_size, flags, kt_hash, owner);
3143 	}
3144 
3145 	if (!new_size) {
3146 		kr.addr = NULL;
3147 		kr.size = 0;
3148 	} else if (new_z) {
3149 		kr = kalloc_zone(new_z, zstats,
3150 		    flags & ~Z_KALLOC_ARRAY, new_size);
3151 	} else if (old_z || addr == NULL) {
3152 		kr = kalloc_large(kheap, new_size,
3153 		    flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3154 	}
3155 
3156 	if (addr && kr.addr) {
3157 		__nosan_memcpy(kr.addr, addr, min_size);
3158 	}
3159 
3160 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3161 		if (old_z) {
3162 			kfree_zone(kheap_or_kt_view,
3163 			    (char *)addr - oob_offs, old_size,
3164 			    old_z, old_bucket_size);
3165 		} else {
3166 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3167 		}
3168 	}
3169 
3170 	if (__improbable(kr.addr == NULL)) {
3171 		return kr;
3172 	}
3173 
3174 out_success:
3175 	if ((flags & Z_KALLOC_ARRAY) == 0) {
3176 		return kr;
3177 	}
3178 
3179 	if (new_z) {
3180 		kr.addr = __kalloc_array_encode_zone(new_z,
3181 		    kr.addr, kr.size);
3182 	} else {
3183 		kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3184 		    kr.size);
3185 	}
3186 	return kr;
3187 }
3188 
3189 void *
3190 krealloc_data_external(
3191 	void               *ptr,
3192 	vm_size_t           old_size,
3193 	vm_size_t           new_size,
3194 	zalloc_flags_t      flags);
3195 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3196 krealloc_data_external(
3197 	void               *ptr,
3198 	vm_size_t           old_size,
3199 	vm_size_t           new_size,
3200 	zalloc_flags_t      flags)
3201 {
3202 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3203 	return krealloc_ext(KHEAP_DATA_BUFFERS, ptr, old_size, new_size, flags, NULL).addr;
3204 }
3205 
3206 __startup_func
3207 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3208 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3209 {
3210 	kheap->kh_zstart      = parent_heap->kh_zstart;
3211 	kheap->kh_heap_id     = parent_heap->kh_heap_id;
3212 	kheap->kh_tag         = parent_heap->kh_tag;
3213 	kheap->kh_stats       = zalloc_percpu_permanent_type(struct zone_stats);
3214 	zone_view_count += 1;
3215 }
3216 
3217 __startup_func
3218 static void
kheap_init_data(kalloc_heap_t kheap)3219 kheap_init_data(kalloc_heap_t kheap)
3220 {
3221 	kheap_init(KHEAP_DATA_BUFFERS, kheap);
3222 	kheap->kh_views               = KHEAP_DATA_BUFFERS->kh_views;
3223 	KHEAP_DATA_BUFFERS->kh_views  = kheap;
3224 }
3225 
3226 __startup_func
3227 static void
kheap_init_var(kalloc_heap_t kheap)3228 kheap_init_var(kalloc_heap_t kheap)
3229 {
3230 	uint16_t idx;
3231 	struct kheap_info *parent_heap;
3232 
3233 	kheap_init(KHEAP_KT_VAR, kheap);
3234 	idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3235 	    KT_VAR__FIRST_FLEXIBLE_HEAP;
3236 	parent_heap = &kalloc_type_heap_array[idx];
3237 	kheap->kh_zstart = parent_heap->kh_zstart;
3238 	kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3239 		(uint32_t) early_random(), 0);
3240 	kheap->kh_views       = parent_heap->kh_views;
3241 	parent_heap->kh_views = kheap;
3242 }
3243 
3244 __startup_func
3245 void
kheap_startup_init(kalloc_heap_t kheap)3246 kheap_startup_init(kalloc_heap_t kheap)
3247 {
3248 	switch (kheap->kh_heap_id) {
3249 	case KHEAP_ID_DATA_BUFFERS:
3250 		kheap_init_data(kheap);
3251 		break;
3252 	case KHEAP_ID_KT_VAR:
3253 		kheap_init_var(kheap);
3254 		break;
3255 	default:
3256 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3257 		    kheap->kh_heap_id);
3258 	}
3259 }
3260 
3261 #pragma mark IOKit/libkern helpers
3262 
3263 #if XNU_PLATFORM_MacOSX
3264 
3265 void *
3266 kern_os_malloc_external(size_t size);
3267 void *
kern_os_malloc_external(size_t size)3268 kern_os_malloc_external(size_t size)
3269 {
3270 	if (size == 0) {
3271 		return NULL;
3272 	}
3273 
3274 	return kheap_alloc(KERN_OS_MALLOC, size,
3275 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3276 }
3277 
3278 void
3279 kern_os_free_external(void *addr);
3280 void
kern_os_free_external(void * addr)3281 kern_os_free_external(void *addr)
3282 {
3283 	kheap_free_addr(KERN_OS_MALLOC, addr);
3284 }
3285 
3286 void *
3287 kern_os_realloc_external(void *addr, size_t nsize);
3288 void *
kern_os_realloc_external(void * addr,size_t nsize)3289 kern_os_realloc_external(void *addr, size_t nsize)
3290 {
3291 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3292 	vm_size_t osize, oob_offs = 0;
3293 
3294 	if (addr == NULL) {
3295 		return kern_os_malloc_external(nsize);
3296 	}
3297 
3298 	osize = zone_element_size(addr, NULL, false, &oob_offs);
3299 	if (osize == 0) {
3300 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3301 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3302 #if KASAN_CLASSIC
3303 	} else {
3304 		osize = kasan_user_size((vm_offset_t)addr);
3305 #endif
3306 	}
3307 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3308 }
3309 
3310 #endif /* XNU_PLATFORM_MacOSX */
3311 
3312 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3313 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3314 {
3315 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3316 #pragma unused(size)
3317 	zfree(zone, addr);
3318 #else
3319 	if (zone_owns(zone, addr)) {
3320 		zfree(zone, addr);
3321 	} else {
3322 		/*
3323 		 * Third party kexts might not know about the operator new
3324 		 * and be allocated from the default heap
3325 		 */
3326 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3327 		    zone->z_name);
3328 		kheap_free(KHEAP_DEFAULT, addr, size);
3329 	}
3330 #endif
3331 }
3332 
3333 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3334 IOMallocType_from_vm(kalloc_type_view_t ktv)
3335 {
3336 	return kalloc_type_from_vm(ktv->kt_flags);
3337 }
3338 
3339 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3340 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3341 {
3342 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3343 #pragma unused(esize)
3344 #else
3345 	/*
3346 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3347 	 * an allocation of an OSObject that is defined in xnu or first pary
3348 	 * kexts, by directly calling new will lead to using the default heap
3349 	 * as it will call OSObject_operator_new_external. If this object
3350 	 * is freed by xnu, it panics as xnu uses the typed free which
3351 	 * requires the object to have been allocated in a kalloc.type zone.
3352 	 * To workaround this issue, detect if the allocation being freed is
3353 	 * from the default heap and allow freeing to it.
3354 	 */
3355 	zone_id_t zid = zone_id_for_element(addr, esize);
3356 	if (__probable(zid < MAX_ZONES)) {
3357 		zone_security_flags_t zsflags = zone_security_array[zid];
3358 		if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3359 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3360 		}
3361 	}
3362 #endif
3363 	kfree_type_impl_external(ktv, addr);
3364 }
3365 
3366 #pragma mark tests
3367 #if DEBUG || DEVELOPMENT
3368 
3369 #include <sys/random.h>
3370 
3371 /*
3372  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3373  *
3374  * Note: Presence of zones with name kalloc.type* is used to
3375  * determine if the feature is on.
3376  */
3377 static int
kalloc_type_feature_on(void)3378 kalloc_type_feature_on(void)
3379 {
3380 	boolean_t zone_found = false;
3381 	const char kalloc_type_str[] = "kalloc.type";
3382 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3383 		zone_t z = kalloc_type_zarray[i];
3384 		while (z != NULL) {
3385 			zone_found = true;
3386 			if (strncmp(z->z_name, kalloc_type_str,
3387 			    strlen(kalloc_type_str)) != 0) {
3388 				return 0;
3389 			}
3390 			z = z->z_kt_next;
3391 		}
3392 	}
3393 
3394 	if (!zone_found) {
3395 		return 0;
3396 	}
3397 
3398 	return 1;
3399 }
3400 
3401 /*
3402  * Ensure that the policy uses the zone budget completely
3403  */
3404 static int
kalloc_type_test_policy(int64_t in)3405 kalloc_type_test_policy(int64_t in)
3406 {
3407 	uint16_t zone_budget = (uint16_t) in;
3408 	uint16_t max_bucket_freq = 25;
3409 	uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3410 	uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3411 	uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3412 	uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3413 	uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3414 	uint16_t wasted_zone_budget = 0, total_types = 0;
3415 	uint16_t n_zones = 0, n_zones_cal = 0;
3416 	int ret = 0;
3417 
3418 	/*
3419 	 * Need a minimum of 2 zones per size class
3420 	 */
3421 	if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3422 		return ret;
3423 	}
3424 	read_random((void *)&random[0], sizeof(random));
3425 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3426 		uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3427 		uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3428 
3429 		freq_list[i] = r1 > r2 ? r2 : r1;
3430 		freq_total_list[i] = r1 > r2 ? r1 : r2;
3431 	}
3432 	wasted_zone_budget = kalloc_type_apply_policy(
3433 		freq_list, freq_total_list,
3434 		zones_per_sig, zones_per_type, zone_budget);
3435 
3436 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3437 		total_types += freq_total_list[i];
3438 	}
3439 
3440 	n_zones = kmem_get_random16(total_types);
3441 	printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3442 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3443 		uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3444 		    freq_total_list[i], total_types,
3445 		    (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3446 
3447 		n_zones_cal += n_zones_for_type;
3448 
3449 		printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3450 	}
3451 	printf("-----------------------\n%u\t%u\n", total_types,
3452 	    n_zones_cal);
3453 
3454 	if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3455 		ret = 1;
3456 	}
3457 	return ret;
3458 }
3459 
3460 /*
3461  * Ensure that size of adopters of kalloc_type fit in the zone
3462  * they have been assigned.
3463  */
3464 static int
kalloc_type_check_size(zone_t z)3465 kalloc_type_check_size(zone_t z)
3466 {
3467 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3468 
3469 	while (kt_cur != NULL) {
3470 		if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3471 			return 0;
3472 		}
3473 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3474 	}
3475 
3476 	return 1;
3477 }
3478 
3479 struct test_kt_data {
3480 	int a;
3481 };
3482 
3483 static int
kalloc_type_test_data_redirect(void)3484 kalloc_type_test_data_redirect(void)
3485 {
3486 	struct kalloc_type_view ktv_data = {
3487 		.kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3488 		.kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3489 	};
3490 	if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3491 		printf("%s: data redirect failed\n", __func__);
3492 		return 0;
3493 	}
3494 	return 1;
3495 }
3496 
3497 static int
run_kalloc_type_test(int64_t in,int64_t * out)3498 run_kalloc_type_test(int64_t in, int64_t *out)
3499 {
3500 	*out = 0;
3501 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3502 		zone_t z = kalloc_type_zarray[i];
3503 		while (z != NULL) {
3504 			if (!kalloc_type_check_size(z)) {
3505 				printf("%s: size check failed\n", __func__);
3506 				return 0;
3507 			}
3508 			z = z->z_kt_next;
3509 		}
3510 	}
3511 
3512 	if (!kalloc_type_test_policy(in)) {
3513 		printf("%s: policy check failed\n", __func__);
3514 		return 0;
3515 	}
3516 
3517 	if (!kalloc_type_feature_on()) {
3518 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3519 		return 0;
3520 	}
3521 
3522 	if (!kalloc_type_test_data_redirect()) {
3523 		printf("%s: kalloc_type redirect for all data signature failed\n",
3524 		    __func__);
3525 		return 0;
3526 	}
3527 
3528 	printf("%s: test passed\n", __func__);
3529 
3530 	*out = 1;
3531 	return 0;
3532 }
3533 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3534 
3535 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3536 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3537 {
3538 	zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3539 
3540 	return z ? zone_elem_inner_size(z) : round_page(size);
3541 }
3542 
3543 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3544 run_kalloc_test(int64_t in __unused, int64_t *out)
3545 {
3546 	*out = 0;
3547 	uint64_t *data_ptr;
3548 	void *strippedp_old, *strippedp_new;
3549 	size_t alloc_size = 0, old_alloc_size = 0;
3550 	struct kalloc_result kr = {};
3551 
3552 	printf("%s: test running\n", __func__);
3553 
3554 	/*
3555 	 * Test size 0: alloc, free, realloc
3556 	 */
3557 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3558 	    NULL).addr;
3559 	if (!data_ptr) {
3560 		printf("%s: kalloc 0 returned null\n", __func__);
3561 		return 0;
3562 	}
3563 	kheap_free(KHEAP_DATA_BUFFERS, data_ptr, alloc_size);
3564 
3565 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3566 	    NULL).addr;
3567 	alloc_size = sizeof(uint64_t) + 1;
3568 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, kr.addr, old_alloc_size,
3569 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3570 	if (!data_ptr) {
3571 		printf("%s: krealloc -> old size 0 failed\n", __func__);
3572 		return 0;
3573 	}
3574 	*data_ptr = 0;
3575 
3576 	/*
3577 	 * Test krealloc: same sizeclass, different size classes, 2pgs,
3578 	 * VM (with owner)
3579 	 */
3580 	old_alloc_size = alloc_size;
3581 	alloc_size++;
3582 	kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3583 	    Z_WAITOK | Z_NOFAIL, NULL);
3584 
3585 	strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr);
3586 	strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr);
3587 
3588 	if (!kr.addr || (strippedp_old != strippedp_new) ||
3589 	    (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) !=
3590 	    test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3591 		printf("%s: krealloc -> same size class failed\n", __func__);
3592 		return 0;
3593 	}
3594 	data_ptr = kr.addr;
3595 	*data_ptr = 0;
3596 
3597 	old_alloc_size = alloc_size;
3598 	alloc_size *= 2;
3599 	kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3600 	    Z_WAITOK | Z_NOFAIL, NULL);
3601 
3602 	strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr);
3603 	strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr);
3604 
3605 	if (!kr.addr || (strippedp_old == strippedp_new) ||
3606 	    (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) ==
3607 	    test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3608 		printf("%s: krealloc -> different size class failed\n", __func__);
3609 		return 0;
3610 	}
3611 	data_ptr = kr.addr;
3612 	*data_ptr = 0;
3613 
3614 	kheap_free(KHEAP_DATA_BUFFERS, kr.addr, alloc_size);
3615 
3616 	alloc_size = 3544;
3617 	data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size,
3618 	    Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3619 	if (!data_ptr) {
3620 		printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3621 		    __func__);
3622 		return 0;
3623 	}
3624 	*data_ptr = 0;
3625 
3626 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, alloc_size,
3627 	    PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3628 	if (!data_ptr) {
3629 		printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3630 		return 0;
3631 	}
3632 	*data_ptr = 0;
3633 
3634 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, PAGE_SIZE * 2,
3635 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3636 	if (!data_ptr) {
3637 		printf("%s: krealloc -> VM1 returned not null\n", __func__);
3638 		return 0;
3639 	}
3640 	*data_ptr = 0;
3641 
3642 	data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 2,
3643 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3644 	*data_ptr = 0;
3645 	if (!data_ptr) {
3646 		printf("%s: krealloc -> VM2 returned not null\n", __func__);
3647 		return 0;
3648 	}
3649 
3650 	krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 4,
3651 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3652 
3653 	printf("%s: test passed\n", __func__);
3654 	*out = 1;
3655 	return 0;
3656 }
3657 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3658 
3659 #endif
3660