xref: /xnu-12377.81.4/osfmk/kern/kalloc.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern_internal.h>
79 #include <vm/vm_object_xnu.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83 
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88 
89 #if HAS_MTE
90 #include <arm64/mte_xnu.h>
91 #endif /* HAS_MTE */
92 
93 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
94 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
95 
96 #pragma mark initialization
97 
98 /*
99  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
100  * sized zone.  This allocator is built on top of the zone allocator.  A zone
101  * is created for each potential size that we are willing to get in small
102  * blocks.
103  *
104  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
105  */
106 
107 /*
108  * The kt_zone_cfg table defines the configuration of zones on various
109  * platforms for kalloc_type fixed size allocations.
110  */
111 
112 #if KASAN_CLASSIC
113 #define K_SIZE_CLASS(size)    \
114 	(((size) & PAGE_MASK) == 0 ? (size) : \
115 	((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
116 #else
117 #define K_SIZE_CLASS(size)    (size)
118 #endif
119 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
120 
121 static const uint16_t kt_zone_cfg[] = {
122 	K_SIZE_CLASS(16),
123 	K_SIZE_CLASS(32),
124 	K_SIZE_CLASS(48),
125 	K_SIZE_CLASS(64),
126 	K_SIZE_CLASS(80),
127 	K_SIZE_CLASS(96),
128 	K_SIZE_CLASS(128),
129 	K_SIZE_CLASS(160),
130 	K_SIZE_CLASS(192),
131 	K_SIZE_CLASS(224),
132 	K_SIZE_CLASS(256),
133 	K_SIZE_CLASS(288),
134 	K_SIZE_CLASS(368),
135 	K_SIZE_CLASS(400),
136 	K_SIZE_CLASS(512),
137 	K_SIZE_CLASS(576),
138 	K_SIZE_CLASS(768),
139 	K_SIZE_CLASS(1024),
140 	K_SIZE_CLASS(1152),
141 	K_SIZE_CLASS(1280),
142 	K_SIZE_CLASS(1664),
143 	K_SIZE_CLASS(2048),
144 	K_SIZE_CLASS(4096),
145 	K_SIZE_CLASS(6144),
146 	K_SIZE_CLASS(8192),
147 	K_SIZE_CLASS(12288),
148 	K_SIZE_CLASS(16384),
149 #if __arm64__
150 	K_SIZE_CLASS(24576),
151 	K_SIZE_CLASS(32768),
152 #endif /* __arm64__ */
153 };
154 
155 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
156 
157 /*
158  * kalloc_type callsites are assigned a zone during early boot. They
159  * use the dlut[] (direct lookup table), indexed by size normalized
160  * to the minimum alignment to find the right zone index quickly.
161  */
162 #define INDEX_ZDLUT(size)       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
163 #define KALLOC_DLUT_SIZE        (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
164 #define MAX_SIZE_ZDLUT          ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
165 static __startup_data uint8_t   kalloc_type_dlut[KALLOC_DLUT_SIZE];
166 static __startup_data uint32_t  kheap_zsize[KHEAP_NUM_ZONES];
167 
168 #if VM_TAG_SIZECLASSES
169 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
170 #endif
171 
172 const char * const kalloc_heap_names[] = {
173 	[KHEAP_ID_NONE]          = "",
174 	[KHEAP_ID_EARLY]         = "early.",
175 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
176 	[KHEAP_ID_DATA_SHARED]   = "data_shared.",
177 	[KHEAP_ID_KT_VAR]        = "",
178 };
179 
180 /*
181  * Early heap configuration
182  */
183 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_EARLY[1] = {
184 	{
185 		.kh_name     = "early.kalloc",
186 		.kh_heap_id  = KHEAP_ID_EARLY,
187 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE,
188 	}
189 };
190 
191 /*
192  * Bag of bytes heap configuration
193  */
194 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
195 	{
196 		.kh_name     = "data.kalloc",
197 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
198 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
199 	}
200 };
201 
202 /*
203  * Configuration of variable kalloc type heaps
204  */
205 SECURITY_READ_ONLY_LATE(struct kheap_info)
206 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
207 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
208 	{
209 		.kh_name     = "kalloc.type.var",
210 		.kh_heap_id  = KHEAP_ID_KT_VAR,
211 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
212 	}
213 };
214 
215 /*
216  * Share heap configuration
217  */
218 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_SHARED[1] = {
219 	{
220 		.kh_name     = "data_shared.kalloc",
221 		.kh_heap_id  = KHEAP_ID_DATA_SHARED,
222 		.kh_tag      = VM_KERN_MEMORY_KALLOC_SHARED,
223 	}
224 };
225 
226 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
227 
228 __startup_func
229 static void
kalloc_zsize_compute(void)230 kalloc_zsize_compute(void)
231 {
232 	uint32_t step = KHEAP_STEP_START;
233 	uint32_t size = KHEAP_START_SIZE;
234 
235 	/*
236 	 * Manually initialize extra initial zones
237 	 */
238 	kheap_zsize[0] = size / 2;
239 	kheap_zsize[1] = size;
240 	static_assert(KHEAP_EXTRA_ZONES == 2);
241 
242 	/*
243 	 * Compute sizes for remaining zones
244 	 */
245 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
246 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
247 
248 		kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
249 		kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
250 
251 		step *= 2;
252 		size += step;
253 	}
254 }
255 
256 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)257 kalloc_zone_for_size_with_flags(
258 	zone_id_t               zid,
259 	vm_size_t               size,
260 	zalloc_flags_t          flags)
261 {
262 	vm_size_t max_size = KHEAP_MAX_SIZE;
263 	bool forcopyin = flags & Z_MAY_COPYINMAP;
264 	zone_t zone;
265 
266 	if (flags & Z_KALLOC_ARRAY) {
267 		size = roundup(size, KALLOC_ARRAY_GRANULE);
268 	}
269 
270 	if (forcopyin) {
271 #if __x86_64__
272 		/*
273 		 * On Intel, the OSData() ABI used to allocate
274 		 * from the kernel map starting at PAGE_SIZE.
275 		 *
276 		 * If only vm_map_copyin() or a wrapper is used,
277 		 * then everything will work fine because vm_map_copy_t
278 		 * will perform an actual copy if the data is smaller
279 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
280 		 *
281 		 * However, if anyone is trying to call mach_vm_remap(),
282 		 * then bad things (TM) happen.
283 		 *
284 		 * Avoid this by preserving the ABI and moving
285 		 * to kalloc_large() earlier.
286 		 *
287 		 * Any recent code really ought to use IOMemoryDescriptor
288 		 * for this purpose however.
289 		 */
290 		max_size = PAGE_SIZE - 1;
291 #endif
292 	}
293 
294 	if (size <= max_size) {
295 		uint32_t idx;
296 
297 		if (size <= KHEAP_START_SIZE) {
298 			zid  += (size > 16);
299 		} else {
300 			/*
301 			 * . log2down(size - 1) is log2up(size) - 1
302 			 * . (size - 1) >> (log2down(size - 1) - 1)
303 			 *   is either 0x2 or 0x3
304 			 */
305 			idx   = kalloc_log2down((uint32_t)(size - 1));
306 			zid  += KHEAP_EXTRA_ZONES +
307 			    2 * (idx - KHEAP_START_IDX) +
308 			    ((uint32_t)(size - 1) >> (idx - 1)) - 2;
309 		}
310 
311 		zone = zone_by_id(zid);
312 #if KASAN_CLASSIC
313 		/*
314 		 * Under kasan classic, certain size classes are a redzone
315 		 * away from the mathematical formula above, and we need
316 		 * to "go to the next zone".
317 		 *
318 		 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
319 		 * this will never go to an "invalid" zone that doesn't
320 		 * belong to the kheap.
321 		 */
322 		if (size > zone_elem_inner_size(zone)) {
323 			zone++;
324 		}
325 #endif
326 		return zone;
327 	}
328 
329 	return ZONE_NULL;
330 }
331 
332 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)333 kalloc_zone_for_size(zone_id_t zid, size_t size)
334 {
335 	return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
336 }
337 
338 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)339 kheap_size_from_zone(
340 	void                   *addr,
341 	vm_size_t               size,
342 	zalloc_flags_t          flags)
343 {
344 	vm_size_t max_size = KHEAP_MAX_SIZE;
345 	bool forcopyin = flags & Z_MAY_COPYINMAP;
346 
347 #if __x86_64__
348 	/*
349 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
350 	 * behavior, then the element could have a PAGE_SIZE reported size,
351 	 * yet still be from a zone for Z_MAY_COPYINMAP.
352 	 */
353 	if (forcopyin) {
354 		if (size == PAGE_SIZE &&
355 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
356 			return true;
357 		}
358 
359 		max_size = PAGE_SIZE - 1;
360 	}
361 #else
362 #pragma unused(addr, forcopyin)
363 #endif
364 
365 	return size <= max_size;
366 }
367 
368 /*
369  * All data zones shouldn't use the early zone. Therefore set the no early alloc
370  * bit right after creation.
371  */
372 __startup_func
373 static void
kalloc_set_no_early_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)374 kalloc_set_no_early_for_data(
375 	zone_kheap_id_t       kheap_id,
376 	zone_stats_t          zstats)
377 {
378 	if (zone_is_data_kheap(kheap_id)) {
379 		zpercpu_foreach(zs, zstats) {
380 			os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
381 		}
382 	}
383 }
384 
385 __startup_func
386 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)387 kalloc_zone_init(
388 	const char           *kheap_name,
389 	zone_kheap_id_t       kheap_id,
390 	zone_id_t            *kheap_zstart,
391 	zone_create_flags_t   zc_flags)
392 {
393 	if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
394 		zc_flags |= ZC_DATA;
395 	}
396 
397 	if (kheap_id == KHEAP_ID_DATA_SHARED) {
398 		zc_flags |= ZC_SHARED_DATA;
399 	}
400 
401 	for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
402 		uint32_t size = kheap_zsize[i];
403 		char buf[MAX_ZONE_NAME], *z_name;
404 		int len;
405 
406 		len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
407 		z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
408 		strlcpy(z_name, buf, len + 1);
409 
410 		(void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
411 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
412 			uint32_t scale = kalloc_log2down(size / 32);
413 
414 			if (size == 32 << scale) {
415 			        z->z_array_size_class = scale;
416 			} else {
417 			        z->z_array_size_class = scale | 0x10;
418 			}
419 #endif
420 			zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
421 			if (i == 0) {
422 			        *kheap_zstart = zone_index(z);
423 			}
424 			kalloc_set_no_early_for_data(kheap_id, z->z_stats);
425 		});
426 	}
427 }
428 
429 __startup_func
430 static void
kalloc_heap_init(struct kalloc_heap * kheap)431 kalloc_heap_init(struct kalloc_heap *kheap)
432 {
433 	kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
434 	    ZC_NONE);
435 	/*
436 	 * Count all the "raw" views for zones in the heap.
437 	 */
438 	zone_view_count += KHEAP_NUM_ZONES;
439 }
440 
441 #define KEXT_ALIGN_SHIFT           6
442 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
443 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
444 #define kt_scratch_size            (256ul << 10)
445 #define KALLOC_TYPE_SECTION(type) \
446 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
447 
448 /*
449  * Enum to specify the kalloc_type variant being used.
450  */
451 __options_decl(kalloc_type_variant_t, uint16_t, {
452 	KTV_FIXED     = 0x0001,
453 	KTV_VAR       = 0x0002,
454 });
455 
456 /*
457  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
458  * variable) of the desired variable/function.
459  */
460 #define kalloc_type_var(type, var)              \
461 	((type) == KTV_FIXED?                       \
462 	(vm_offset_t) kalloc_type_##var##_fixed:    \
463 	(vm_offset_t) kalloc_type_##var##_var)
464 #define kalloc_type_func(type, func, ...)       \
465 	((type) == KTV_FIXED?                       \
466 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
467 	kalloc_type_##func##_var(__VA_ARGS__))
468 
469 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
470 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
471     ZSECURITY_CONFIG_KT_VAR_BUDGET);
472 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
473     ZSECURITY_CONFIG_KT_BUDGET);
474 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
475 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
476 
477 
478 /**
479  * @const kexts_enroll_data_shared
480  *
481  * @brief
482  * We have two heaps for data allocations:
483  *     - KHEAP_DATA_BUFFERS, which is for allocations that never shared.
484  *     - KHEAP_DATA_SHARED, which is for allocations that need to be shared.
485  *
486  * This is a control that indicates which heap we expose to kexts via the
487  * exported allocations functions.
488  */
489 STATIC_IF_KEY_DEFINE_TRUE(kexts_enroll_data_shared);
490 
491 /**
492  * @const restricted_data_mode
493  *
494  * @brief
495  * This is a control that sets the mode of mapping policies
496  * enforcement on data allocations:
497  *     - none: the state before the change (no telemetry, no enforcement).
498  *     - telemetry: do not enforce, do emit telemetry
499  *     - enforce: type the KHEAP_DATA_BUFFERS pages as restricted mappings.
500  *
501  * Combined with kexts_enroll_data_shared, we can create the modes we need
502  * for none/telemetry/enforcement on core kernel/kexts.
503  *
504  * restricted_data_mode_t is an enum used to specify the mode being used.
505  */
506 
507 __options_decl(restricted_data_mode_t, uint8_t, {
508 	RESTRICTED_DATA_MODE_NONE      = 0x0000,
509 	RESTRICTED_DATA_MODE_TELEMETRY = 0x0001,
510 	RESTRICTED_DATA_MODE_ENFORCE   = 0x0002
511 });
512 
513 TUNABLE(restricted_data_mode_t,
514     restricted_data_mode,
515     "restricted_data_mode",
516 #if __x86_64__
517     RESTRICTED_DATA_MODE_NONE
518 #else
519     RESTRICTED_DATA_MODE_TELEMETRY
520 #endif /* __x86_64__ */
521     );
522 
523 inline bool
kalloc_is_restricted_data_mode_telemetry(void)524 kalloc_is_restricted_data_mode_telemetry(void)
525 {
526 	return restricted_data_mode == RESTRICTED_DATA_MODE_TELEMETRY;
527 }
528 
529 inline bool
kalloc_is_restricted_data_mode_enforced(void)530 kalloc_is_restricted_data_mode_enforced(void)
531 {
532 	return restricted_data_mode == RESTRICTED_DATA_MODE_ENFORCE;
533 }
534 
535 inline bool
kmem_needs_data_share_range(void)536 kmem_needs_data_share_range(void)
537 {
538 	/*
539 	 * The dedicated range is required only for
540 	 * telemetry reporting, when we need to distinguish
541 	 * between the two kind of data via kmem ranges.
542 	 *
543 	 * Even though this is strictly like checking telemetry
544 	 * mode, it's better to have well-defined abstraction layer
545 	 * for that adopted in all the call-sites, to be flexible
546 	 * w.r.t future changes / unrolling.
547 	 */
548 	return kalloc_is_restricted_data_mode_telemetry();
549 }
550 
551 /*
552  * Section start/end for fixed kalloc_type views
553  */
554 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
555 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
556 
557 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
558 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
559 
560 /*
561  * Section start/end for variable kalloc_type views
562  */
563 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
564 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
565 
566 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
567 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
568 
569 __startup_data
570 static kalloc_type_views_t *kt_buffer = NULL;
571 __startup_data
572 static uint64_t kt_count;
573 __startup_data
574 uint32_t kalloc_type_hash_seed;
575 
576 __startup_data
577 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
578 __startup_data
579 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
580 
581 struct nzones_with_idx {
582 	uint16_t nzones;
583 	uint16_t idx;
584 };
585 int16_t zone_carry = 0;
586 
587 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
588     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
589 
590 /*
591  * For use by lldb to iterate over kalloc types
592  */
593 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
594 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
595 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
596 
597 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
598 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
599     KMEM_DIRECTION_MASK),
600     "Insufficient bits to represent range and dir for VM allocations");
601 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
602     "validate idx mask");
603 /* qsort routines */
604 typedef int (*cmpfunc_t)(const void *a, const void *b);
605 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
606 
607 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)608 kalloc_type_get_idx(uint32_t kt_size)
609 {
610 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
611 }
612 
613 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)614 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
615 {
616 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
617 }
618 
619 static void
kalloc_type_build_dlut(void)620 kalloc_type_build_dlut(void)
621 {
622 	vm_size_t size = 0;
623 	for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
624 		uint8_t zindex = 0;
625 		while (kt_zone_cfg[zindex] < size) {
626 			zindex++;
627 		}
628 		kalloc_type_dlut[i] = zindex;
629 	}
630 }
631 
632 static uint32_t
kalloc_type_idx_for_size(uint32_t size)633 kalloc_type_idx_for_size(uint32_t size)
634 {
635 	assert(size <= KHEAP_MAX_SIZE);
636 	uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
637 	return kalloc_type_set_idx(size, idx);
638 }
639 
640 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t early_zone)641 kalloc_type_assign_zone_fixed(
642 	kalloc_type_view_t     *cur,
643 	kalloc_type_view_t     *end,
644 	zone_t                  z,
645 	zone_t                  sig_zone,
646 	zone_t                  early_zone)
647 {
648 	/*
649 	 * Assign the zone created for every kalloc_type_view
650 	 * of the same unique signature
651 	 */
652 	bool need_raw_view = false;
653 
654 	while (cur < end) {
655 		kalloc_type_view_t kt = *cur;
656 		struct zone_view *zv = &kt->kt_zv;
657 		zv->zv_zone = z;
658 		kalloc_type_flags_t kt_flags = kt->kt_flags;
659 		zone_security_flags_t zsflags = zone_security_config(z);
660 
661 		assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
662 		if (!early_zone) {
663 			assert(zone_is_data_kheap(zsflags.z_kheap_id));
664 		}
665 
666 		if (kt_flags & KT_SLID) {
667 			kt->kt_signature -= vm_kernel_slide;
668 			kt->kt_zv.zv_name -= vm_kernel_slide;
669 		}
670 
671 		if ((kt_flags & KT_PRIV_ACCT) ||
672 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
673 			zv->zv_stats = zalloc_percpu_permanent_type(
674 				struct zone_stats);
675 			need_raw_view = true;
676 			zone_view_count += 1;
677 		} else {
678 			zv->zv_stats = z->z_stats;
679 		}
680 
681 		if ((kt_flags & KT_NOEARLY) || !early_zone) {
682 			if ((kt_flags & KT_NOEARLY) && !(kt_flags & KT_PRIV_ACCT)) {
683 				panic("KT_NOEARLY used w/o private accounting for view %s",
684 				    zv->zv_name);
685 			}
686 
687 			zpercpu_foreach(zs, zv->zv_stats) {
688 				os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
689 			}
690 		}
691 
692 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
693 			kt->kt_zearly = early_zone;
694 			kt->kt_zsig = sig_zone;
695 			/*
696 			 * If we haven't yet set the signature equivalance then set it
697 			 * otherwise validate that the zone has the same signature equivalance
698 			 * as the sig_zone provided
699 			 */
700 			if (!zone_get_sig_eq(z)) {
701 				zone_set_sig_eq(z, zone_index(sig_zone));
702 			} else {
703 				assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
704 			}
705 		}
706 		zv->zv_next = (zone_view_t) z->z_views;
707 		zv->zv_zone->z_views = (zone_view_t) kt;
708 		cur++;
709 	}
710 	if (need_raw_view) {
711 		zone_view_count += 1;
712 	}
713 }
714 
715 __startup_func
716 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)717 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
718     kalloc_type_var_view_t *end, uint32_t heap_idx)
719 {
720 	struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
721 	while (cur < end) {
722 		kalloc_type_var_view_t kt = *cur;
723 		kt->kt_heap_start = cfg->kh_zstart;
724 		kalloc_type_flags_t kt_flags = kt->kt_flags;
725 
726 		if (kt_flags & KT_SLID) {
727 			if (kt->kt_sig_hdr) {
728 				kt->kt_sig_hdr -= vm_kernel_slide;
729 			}
730 			kt->kt_sig_type -= vm_kernel_slide;
731 			kt->kt_name -= vm_kernel_slide;
732 		}
733 
734 		if ((kt_flags & KT_PRIV_ACCT) ||
735 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
736 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
737 			zone_view_count += 1;
738 		}
739 
740 		kt->kt_next = (zone_view_t) cfg->kt_views;
741 		cfg->kt_views = kt;
742 		cur++;
743 	}
744 }
745 
746 __startup_func
747 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)748 kalloc_type_slide_fixed(vm_offset_t addr)
749 {
750 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
751 	ktv->kt_signature += vm_kernel_slide;
752 	ktv->kt_zv.zv_name += vm_kernel_slide;
753 	ktv->kt_flags |= KT_SLID;
754 }
755 
756 __startup_func
757 static inline void
kalloc_type_slide_var(vm_offset_t addr)758 kalloc_type_slide_var(vm_offset_t addr)
759 {
760 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
761 	if (ktv->kt_sig_hdr) {
762 		ktv->kt_sig_hdr += vm_kernel_slide;
763 	}
764 	ktv->kt_sig_type += vm_kernel_slide;
765 	ktv->kt_name += vm_kernel_slide;
766 	ktv->kt_flags |= KT_SLID;
767 }
768 
769 __startup_func
770 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)771 kalloc_type_validate_flags(
772 	kalloc_type_flags_t   kt_flags,
773 	const char           *kt_name,
774 	uuid_string_t         kext_uuid)
775 {
776 	if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
777 		panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
778 		    "required xnu headers", kt_name, kext_uuid);
779 	}
780 }
781 
782 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)783 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
784 {
785 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
786 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
787 	return ktv->kt_flags;
788 }
789 
790 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)791 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
792 {
793 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
794 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
795 	return ktv->kt_flags;
796 }
797 
798 /*
799  * Check if signature of type is made up of only data and padding,
800  * which is meant to never be shared.
801  */
802 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)803 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
804 {
805 	assert(kt_flags & KT_CHANGED);
806 	return kt_flags & KT_DATA_ONLY;
807 }
808 
809 /*
810  * Check if signature of type is made up of only pointers
811  */
812 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)813 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
814 {
815 	assert(kt_flags & KT_CHANGED2);
816 	return kt_flags & KT_PTR_ARRAY;
817 }
818 
819 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)820 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
821 {
822 	assert(kt_flags & KT_CHANGED);
823 	return kt_flags & KT_VM;
824 }
825 
826 __startup_func
827 static inline vm_size_t
kalloc_type_view_sz_fixed(void)828 kalloc_type_view_sz_fixed(void)
829 {
830 	return sizeof(struct kalloc_type_view);
831 }
832 
833 __startup_func
834 static inline vm_size_t
kalloc_type_view_sz_var(void)835 kalloc_type_view_sz_var(void)
836 {
837 	return sizeof(struct kalloc_type_var_view);
838 }
839 
840 __startup_func
841 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)842 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
843     vm_offset_t end)
844 {
845 	return (end - start) / kalloc_type_func(type, view_sz);
846 }
847 
848 __startup_func
849 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)850 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
851 {
852 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
853 }
854 
855 __startup_func
856 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)857 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
858 {
859 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
860 }
861 
862 __startup_func
863 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)864 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
865 {
866 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
867 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
868 	    cur_data_view->kt_size);
869 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
870 	    NULL);
871 }
872 
873 __startup_func
874 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)875 kalloc_type_handle_data_view_var(vm_offset_t addr)
876 {
877 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
878 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
879 }
880 
881 __startup_func
882 static void
kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)883 kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)
884 {
885 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
886 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_SHARED->kh_zstart,
887 	    cur_data_view->kt_size);
888 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
889 	    NULL);
890 }
891 
892 __startup_func
893 static void
kalloc_type_handle_data_shared_view_var(vm_offset_t addr)894 kalloc_type_handle_data_shared_view_var(vm_offset_t addr)
895 {
896 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
897 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_SHARED_HEAP);
898 }
899 
900 __startup_func
901 static uint32_t
kalloc_type_handle_parray_var(void)902 kalloc_type_handle_parray_var(void)
903 {
904 	uint32_t i = 0;
905 	kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
906 	const char *p_name = kt->kt_name;
907 
908 	/*
909 	 * The sorted list of variable kalloc_type_view has pointer arrays at the
910 	 * beginning. Walk through them and assign a random pointer heap to each
911 	 * type detected by typename.
912 	 */
913 	while (kalloc_type_is_ptr_array(kt->kt_flags)) {
914 		uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
915 		const char *c_name = kt->kt_name;
916 		uint32_t p_i = i;
917 
918 		while (strcmp(c_name, p_name) == 0) {
919 			i++;
920 			kt = kt_buffer[i].ktv_var;
921 			c_name = kt->kt_name;
922 		}
923 		p_name = c_name;
924 		kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
925 		    &kt_buffer[i].ktv_var, heap_id);
926 	}
927 
928 	/*
929 	 * Returns the the index of the first view that isn't a pointer array
930 	 */
931 	return i;
932 }
933 
934 __startup_func
935 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)936 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
937 {
938 	/*
939 	 * Limit range_id to ptr ranges
940 	 */
941 	uint32_t range_id = kmem_adjust_range_id(hash);
942 	uint32_t direction = hash & 0x8000;
943 	return (range_id | KMEM_HASH_SET | direction) << shift;
944 }
945 
946 __startup_func
947 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)948 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
949     kalloc_type_flags_t *kt_flags)
950 {
951 	uint32_t hash = 0;
952 
953 	assert(sig_ty != NULL);
954 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
955 	    kalloc_type_hash_seed);
956 	if (sig_hdr) {
957 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
958 	}
959 	os_hash_jenkins_finish(hash);
960 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
961 
962 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
963 }
964 
965 __startup_func
966 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)967 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
968 {
969 	/*
970 	 * Use backtraces on fixed as we don't have signatures for types that go
971 	 * to the VM due to rdar://85182551.
972 	 */
973 	(void) addr;
974 }
975 
976 __startup_func
977 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)978 kalloc_type_set_type_hash_var(vm_offset_t addr)
979 {
980 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
981 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
982 	    &ktv->kt_flags);
983 }
984 
985 __startup_func
986 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)987 kalloc_type_mark_processed_fixed(vm_offset_t addr)
988 {
989 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
990 	ktv->kt_flags |= KT_PROCESSED;
991 }
992 
993 __startup_func
994 static void
kalloc_type_mark_processed_var(vm_offset_t addr)995 kalloc_type_mark_processed_var(vm_offset_t addr)
996 {
997 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
998 	ktv->kt_flags |= KT_PROCESSED;
999 }
1000 
1001 __startup_func
1002 static void
kalloc_type_update_view_fixed(vm_offset_t addr)1003 kalloc_type_update_view_fixed(vm_offset_t addr)
1004 {
1005 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
1006 	ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
1007 }
1008 
1009 __startup_func
1010 static void
kalloc_type_update_view_var(vm_offset_t addr)1011 kalloc_type_update_view_var(vm_offset_t addr)
1012 {
1013 	(void) addr;
1014 }
1015 
1016 __startup_func
1017 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)1018 kalloc_type_view_copy(
1019 	const kalloc_type_variant_t   type,
1020 	vm_offset_t                   start,
1021 	vm_offset_t                   end,
1022 	uint64_t                     *cur_count,
1023 	bool                          slide,
1024 	uuid_string_t                 kext_uuid)
1025 {
1026 	uint64_t count = kalloc_type_view_count(type, start, end);
1027 	if (count + *cur_count >= kt_count) {
1028 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
1029 	}
1030 	vm_offset_t cur = start;
1031 	while (cur < end) {
1032 		if (slide) {
1033 			kalloc_type_func(type, slide, cur);
1034 		}
1035 		kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
1036 		    kext_uuid);
1037 		kalloc_type_func(type, mark_processed, cur);
1038 		/*
1039 		 * Skip views that go to the VM
1040 		 */
1041 		if (kalloc_type_from_vm(kt_flags)) {
1042 			cur += kalloc_type_func(type, view_sz);
1043 			continue;
1044 		}
1045 
1046 		/*
1047 		 * Check if the signature indicates that the entire allocation is data.
1048 		 *
1049 		 * Note that KT_VAR_DATA_HEAP is fake "data" heap, variable kalloc_type handles
1050 		 * the actual redirection in the entry points kalloc/kfree_type_var_impl.
1051 		 */
1052 		if (kalloc_type_is_data(kt_flags)) {
1053 			kalloc_type_func(type, handle_data_view, cur);
1054 			cur += kalloc_type_func(type, view_sz);
1055 			continue;
1056 		}
1057 
1058 		/*
1059 		 * Set type hash that is used by kmem_*_guard
1060 		 */
1061 		kalloc_type_func(type, set_type_hash, cur);
1062 		kalloc_type_func(type, update_view, cur);
1063 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
1064 		cur += kalloc_type_func(type, view_sz);
1065 		*cur_count = *cur_count + 1;
1066 	}
1067 }
1068 
1069 __startup_func
1070 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)1071 kalloc_type_view_parse(const kalloc_type_variant_t type)
1072 {
1073 	kc_format_t kc_format;
1074 	uint64_t cur_count = 0;
1075 
1076 	if (!PE_get_primary_kc_format(&kc_format)) {
1077 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
1078 	}
1079 
1080 	if (kc_format == KCFormatStatic) {
1081 		/*
1082 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
1083 		 * xnu are coalesced.
1084 		 */
1085 		kalloc_type_view_copy(type,
1086 		    kalloc_type_var(type, sec_start),
1087 		    kalloc_type_var(type, sec_end),
1088 		    &cur_count, false, NULL);
1089 	} else if (kc_format == KCFormatFileset) {
1090 		/*
1091 		 * If kc uses filesets, traverse __kalloc_type section for each
1092 		 * macho in the BootKC.
1093 		 */
1094 		kernel_mach_header_t *kc_mh = NULL;
1095 		kernel_mach_header_t *kext_mh = NULL;
1096 
1097 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1098 		struct load_command *lc =
1099 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1100 		for (uint32_t i = 0; i < kc_mh->ncmds;
1101 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1102 			if (lc->cmd != LC_FILESET_ENTRY) {
1103 				continue;
1104 			}
1105 			struct fileset_entry_command *fse =
1106 			    (struct fileset_entry_command *)(vm_offset_t)lc;
1107 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1108 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1109 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1110 			if (sect != NULL) {
1111 				unsigned long uuidlen = 0;
1112 				void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
1113 				uuid_string_t kext_uuid_str;
1114 				if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
1115 					uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1116 				}
1117 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1118 				    &cur_count, false, kext_uuid_str);
1119 			}
1120 		}
1121 	} else if (kc_format == KCFormatKCGEN) {
1122 		/*
1123 		 * Parse __kalloc_type section from xnu
1124 		 */
1125 		kalloc_type_view_copy(type,
1126 		    kalloc_type_var(type, sec_start),
1127 		    kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1128 
1129 #ifndef __BUILDING_XNU_LIB_UNITTEST__ /* no kexts in unit-test */
1130 		/*
1131 		 * Parse __kalloc_type section for kexts
1132 		 *
1133 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1134 		 * as this platform has insufficient memory for type based
1135 		 * segregation. kalloc_type_impl_external will direct callsites
1136 		 * based on their size.
1137 		 */
1138 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1139 		vm_offset_t cur = 0;
1140 		vm_offset_t end = 0;
1141 
1142 		/*
1143 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1144 		 * and traverse it.
1145 		 */
1146 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1147 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1148 		assert(prelink_sect);
1149 		cur = prelink_sect->addr;
1150 		end = prelink_sect->addr + prelink_sect->size;
1151 
1152 		while (cur < end) {
1153 			uint64_t kext_text_sz = 0;
1154 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1155 
1156 			if (kext_mh->magic == 0) {
1157 				/*
1158 				 * Assert that we have processed all kexts and all that is left
1159 				 * is padding
1160 				 */
1161 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1162 				break;
1163 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1164 			    kext_mh->magic != MH_CIGAM_64) {
1165 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1166 				    cur);
1167 			}
1168 
1169 			/*
1170 			 * Kext macho found, iterate through its segments
1171 			 */
1172 			struct load_command *lc =
1173 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1174 			bool isSplitKext = false;
1175 
1176 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1177 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1178 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1179 					isSplitKext = true;
1180 					continue;
1181 				} else if (lc->cmd != LC_SEGMENT_64) {
1182 					continue;
1183 				}
1184 
1185 				kernel_segment_command_t *seg_cmd =
1186 				    (struct segment_command_64 *)(vm_offset_t)lc;
1187 				/*
1188 				 * Parse kalloc_type section
1189 				 */
1190 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1191 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1192 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1193 					if (kt_sect) {
1194 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1195 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1196 						    true, NULL);
1197 					}
1198 				}
1199 				/*
1200 				 * If the kext has a __TEXT segment, that is the only thing that
1201 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1202 				 * macho is right after.
1203 				 */
1204 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1205 					kext_text_sz = seg_cmd->filesize;
1206 				}
1207 			}
1208 			/*
1209 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1210 			 * only a __LINKEDIT segment) then the next macho will be after all the
1211 			 * header commands.
1212 			 */
1213 			if (!kext_text_sz) {
1214 				kext_text_sz = kext_mh->sizeofcmds;
1215 			} else if (!isSplitKext) {
1216 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1217 				break;
1218 			}
1219 
1220 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1221 		}
1222 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1223 	} else {
1224 		/*
1225 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1226 		 * parsing kalloc_type_view structs during startup.
1227 		 */
1228 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1229 		    " for kc_format = %d\n", kc_format);
1230 	}
1231 	return cur_count;
1232 }
1233 
1234 __startup_func
1235 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1236 kalloc_type_cmp_fixed(const void *a, const void *b)
1237 {
1238 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1239 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1240 
1241 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1242 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1243 	/*
1244 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1245 	 * signature else sort by size
1246 	 */
1247 	if (idxA == idxB) {
1248 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1249 		/*
1250 		 * If the kalloc_type_views have the same signature sort by site
1251 		 * name
1252 		 */
1253 		if (result == 0) {
1254 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1255 		}
1256 		return result;
1257 	}
1258 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1259 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1260 	return (int)(sizeA - sizeB);
1261 }
1262 
1263 __startup_func
1264 static int
kalloc_type_cmp_var(const void * a,const void * b)1265 kalloc_type_cmp_var(const void *a, const void *b)
1266 {
1267 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1268 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1269 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1270 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1271 	bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1272 	bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1273 	int result = 0;
1274 
1275 	/*
1276 	 * Switched around (B - A) because we want the pointer arrays to be at the
1277 	 * top
1278 	 */
1279 	result = ktB_ptrArray - ktA_ptrArray;
1280 	if (result == 0) {
1281 		result = strcmp(ktA_hdr, ktB_hdr);
1282 		if (result == 0) {
1283 			result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1284 			if (result == 0) {
1285 				result = strcmp(ktA->kt_name, ktB->kt_name);
1286 			}
1287 		}
1288 	}
1289 	return result;
1290 }
1291 
1292 __startup_func
1293 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1294 kalloc_type_create_iterators_fixed(
1295 	uint16_t           *kt_skip_list_start,
1296 	uint64_t            count)
1297 {
1298 	uint16_t *kt_skip_list = kt_skip_list_start;
1299 	uint16_t p_idx = UINT16_MAX; /* previous size idx */
1300 	uint16_t c_idx = 0; /* current size idx */
1301 	uint16_t unique_sig = 0;
1302 	uint16_t total_sig = 0;
1303 	const char *p_sig = NULL;
1304 	const char *p_name = "";
1305 	const char *c_sig = NULL;
1306 	const char *c_name = NULL;
1307 
1308 	/*
1309 	 * Walk over each kalloc_type_view
1310 	 */
1311 	for (uint16_t i = 0; i < count; i++) {
1312 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1313 
1314 		c_idx = kalloc_type_get_idx(kt->kt_size);
1315 		c_sig = kt->kt_signature;
1316 		c_name = kt->kt_zv.zv_name;
1317 		/*
1318 		 * When current kalloc_type_view is in a different kalloc size
1319 		 * bucket than the previous, it means we have processed all in
1320 		 * the previous size bucket, so store the accumulated values
1321 		 * and advance the indices.
1322 		 */
1323 		if (p_idx == UINT16_MAX || c_idx != p_idx) {
1324 			/*
1325 			 * Updates for frequency lists
1326 			 */
1327 			if (p_idx != UINT16_MAX) {
1328 				kt_freq_list[p_idx] = unique_sig;
1329 				kt_freq_list_total[p_idx] = total_sig - unique_sig;
1330 			}
1331 			unique_sig = 1;
1332 			total_sig = 1;
1333 
1334 			p_idx = c_idx;
1335 			p_sig = c_sig;
1336 			p_name = c_name;
1337 
1338 			/*
1339 			 * Updates to signature skip list
1340 			 */
1341 			*kt_skip_list = i;
1342 			kt_skip_list++;
1343 
1344 			continue;
1345 		}
1346 
1347 		/*
1348 		 * When current kalloc_type_views is in the kalloc size bucket as
1349 		 * previous, analyze the siganture to see if it is unique.
1350 		 *
1351 		 * Signatures are collapsible if one is a substring of the next.
1352 		 */
1353 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1354 			/*
1355 			 * Unique signature detected. Update counts and advance index
1356 			 */
1357 			unique_sig++;
1358 			total_sig++;
1359 
1360 			*kt_skip_list = i;
1361 			kt_skip_list++;
1362 			p_sig = c_sig;
1363 			p_name = c_name;
1364 			continue;
1365 		}
1366 		/*
1367 		 * Need this here as we do substring matching for signatures so you
1368 		 * want to track the longer signature seen rather than the substring
1369 		 */
1370 		p_sig = c_sig;
1371 
1372 		/*
1373 		 * Check if current kalloc_type_view corresponds to a new type
1374 		 */
1375 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1376 			total_sig++;
1377 			p_name = c_name;
1378 		}
1379 	}
1380 	/*
1381 	 * Final update
1382 	 */
1383 	assert(c_idx == p_idx);
1384 	assert(kt_freq_list[c_idx] == 0);
1385 	kt_freq_list[c_idx] = unique_sig;
1386 	kt_freq_list_total[c_idx] = total_sig - unique_sig;
1387 	*kt_skip_list = (uint16_t) count;
1388 
1389 	return ++kt_skip_list;
1390 }
1391 
1392 __startup_func
1393 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1394 kalloc_type_create_iterators_var(
1395 	uint32_t           *kt_skip_list_start,
1396 	uint32_t            buf_start)
1397 {
1398 	uint32_t *kt_skip_list = kt_skip_list_start;
1399 	uint32_t n = 0;
1400 
1401 	kt_skip_list[n] = buf_start;
1402 	assert(kt_count > buf_start + 1);
1403 	for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1404 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1405 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1406 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1407 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1408 		assert(ktA->kt_sig_type != NULL);
1409 		assert(ktB->kt_sig_type != NULL);
1410 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1411 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1412 			n++;
1413 			kt_skip_list[n] = i;
1414 		}
1415 	}
1416 	/*
1417 	 * Final update
1418 	 */
1419 	n++;
1420 	kt_skip_list[n] = (uint32_t) kt_count;
1421 	return n;
1422 }
1423 
1424 __startup_func
1425 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1426 kalloc_type_distribute_budget(
1427 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1428 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1429 	uint16_t            zone_budget,
1430 	uint16_t            min_zones_per_size)
1431 {
1432 	uint16_t total_sig = 0;
1433 	uint16_t min_sig = 0;
1434 	uint16_t assigned_zones = 0;
1435 	uint16_t remaining_zones = zone_budget;
1436 	uint16_t modulo = 0;
1437 
1438 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1439 		uint16_t sig_freq = freq_list[i];
1440 		uint16_t min_zones = min_zones_per_size;
1441 
1442 		if (sig_freq < min_zones_per_size) {
1443 			min_zones = sig_freq;
1444 		}
1445 		total_sig += sig_freq;
1446 		kt_zones[i] = min_zones;
1447 		min_sig += min_zones;
1448 	}
1449 	if (remaining_zones > total_sig) {
1450 		remaining_zones = total_sig;
1451 	}
1452 	assert(remaining_zones >= min_sig);
1453 	remaining_zones -= min_sig;
1454 	total_sig -= min_sig;
1455 	assigned_zones += min_sig;
1456 
1457 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1458 		uint16_t freq = freq_list[i];
1459 
1460 		if (freq < min_zones_per_size) {
1461 			continue;
1462 		}
1463 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1464 		uint16_t n_zones = (uint16_t) numer / total_sig;
1465 
1466 		/*
1467 		 * Accumulate remainder and increment n_zones when it goes above
1468 		 * denominator
1469 		 */
1470 		modulo += numer % total_sig;
1471 		if (modulo >= total_sig) {
1472 			n_zones++;
1473 			modulo -= total_sig;
1474 		}
1475 
1476 		/*
1477 		 * Cap the total number of zones to the unique signatures
1478 		 */
1479 		if ((n_zones + min_zones_per_size) > freq) {
1480 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1481 			modulo += (extra_zones * total_sig);
1482 			n_zones -= extra_zones;
1483 		}
1484 		kt_zones[i] += n_zones;
1485 		assigned_zones += n_zones;
1486 	}
1487 
1488 	if (kt_options & KT_OPTIONS_DEBUG) {
1489 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1490 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1491 	}
1492 	return remaining_zones + min_sig - assigned_zones;
1493 }
1494 
1495 __startup_func
1496 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1497 kalloc_type_cmp_type_zones(const void *a, const void *b)
1498 {
1499 	const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1500 	const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1501 
1502 	return (int)(B.nzones - A.nzones);
1503 }
1504 
1505 __startup_func
1506 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1507 kalloc_type_redistribute_budget(
1508 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1509 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1510 {
1511 	uint16_t count = 0, cur_count = 0;
1512 	struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1513 	uint16_t top_zone_total = 0;
1514 
1515 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1516 		uint16_t zones = kt_zones[i];
1517 
1518 		/*
1519 		 * If a sizeclass got no zones but has types to divide make a note
1520 		 * of it
1521 		 */
1522 		if (zones == 0 && (freq_total_list[i] != 0)) {
1523 			count++;
1524 		}
1525 
1526 		sorted_zones[i].nzones = kt_zones[i];
1527 		sorted_zones[i].idx = i;
1528 	}
1529 
1530 	qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1531 	    sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1532 
1533 	for (uint16_t i = 0; i < 3; i++) {
1534 		top_zone_total += sorted_zones[i].nzones;
1535 	}
1536 
1537 	/*
1538 	 * Borrow zones from the top 3 sizeclasses and redistribute to those
1539 	 * that didn't get a zone but that types to divide
1540 	 */
1541 	cur_count = count;
1542 	for (uint16_t i = 0; i < 3; i++) {
1543 		uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1544 		uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1545 
1546 		if (zone_borrow > (zone_available / 2)) {
1547 			zone_borrow = zone_available / 2;
1548 		}
1549 		kt_zones[sorted_zones[i].idx] -= zone_borrow;
1550 		cur_count -= zone_borrow;
1551 	}
1552 
1553 	for (uint16_t i = 0; i < 3; i++) {
1554 		if (cur_count == 0) {
1555 			break;
1556 		}
1557 		kt_zones[sorted_zones[i].idx]--;
1558 		cur_count--;
1559 	}
1560 
1561 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1562 		if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1563 		    (count > cur_count)) {
1564 			kt_zones[i]++;
1565 			count--;
1566 		}
1567 	}
1568 }
1569 
1570 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1571 kalloc_type_apply_policy(
1572 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1573 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1574 	uint16_t            kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1575 	uint16_t            kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1576 	uint16_t            zone_budget)
1577 {
1578 	uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1579 	uint16_t zbudget_type = zone_budget - zbudget_sig;
1580 	uint16_t wasted_zones = 0;
1581 
1582 #if DEBUG || DEVELOPMENT
1583 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1584 		__assert_only uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1585 		assert(zone_budget + current_zones <= MAX_ZONES);
1586 	}
1587 #endif
1588 
1589 	wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1590 	    zbudget_sig, 2);
1591 	wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1592 	    kt_zones_type, zbudget_type, 0);
1593 	kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1594 
1595 	/*
1596 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1597 	 */
1598 	if (kt_options & KT_OPTIONS_DEBUG) {
1599 		printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1600 		    "zones_type\n");
1601 		for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1602 			printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1603 			    freq_total_list[i] + freq_list[i], freq_list[i],
1604 			    kt_zones_sig[i] + kt_zones_type[i],
1605 			    kt_zones_sig[i], kt_zones_type[i]);
1606 		}
1607 	}
1608 
1609 	return wasted_zones;
1610 }
1611 
1612 
1613 __startup_func
1614 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1615 kalloc_type_create_zone_for_size(
1616 	zone_t             *kt_zones_for_size,
1617 	uint16_t            kt_zones,
1618 	vm_size_t           z_size)
1619 {
1620 	zone_t p_zone = NULL;
1621 	char *z_name = NULL;
1622 	zone_t shared_z = NULL;
1623 
1624 	for (uint16_t i = 0; i < kt_zones; i++) {
1625 		z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1626 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1627 		    (size_t) z_size);
1628 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1629 		if (i != 0) {
1630 			p_zone->z_kt_next = z;
1631 		}
1632 		p_zone = z;
1633 		kt_zones_for_size[i] = z;
1634 	}
1635 	/*
1636 	 * Create shared zone for sizeclass if it doesn't already exist
1637 	 */
1638 	if (kt_shared_fixed) {
1639 		shared_z = kalloc_zone_for_size(KHEAP_EARLY->kh_zstart, z_size);
1640 		if (zone_elem_inner_size(shared_z) != z_size) {
1641 			z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1642 			snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1643 			    (size_t) z_size);
1644 			shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1645 			    ^(zone_t zone){
1646 				zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_EARLY;
1647 			});
1648 		}
1649 	}
1650 	kt_zones_for_size[kt_zones] = shared_z;
1651 }
1652 
1653 __startup_func
1654 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1655 kalloc_type_zones_for_type(
1656 	uint16_t            zones_total_type,
1657 	uint16_t            unique_types,
1658 	uint16_t            total_types,
1659 	bool                last_sig)
1660 {
1661 	uint16_t zones_for_type = 0, n_mod = 0;
1662 
1663 	if (zones_total_type == 0) {
1664 		return 0;
1665 	}
1666 
1667 	zones_for_type = (zones_total_type * unique_types) / total_types;
1668 	n_mod = (zones_total_type * unique_types) % total_types;
1669 	zone_carry += n_mod;
1670 
1671 	/*
1672 	 * Drain carry opportunistically
1673 	 */
1674 	if (((unique_types > 3) && (zone_carry > 0)) ||
1675 	    (zone_carry >= (int) total_types) ||
1676 	    (last_sig && (zone_carry > 0))) {
1677 		zone_carry -= total_types;
1678 		zones_for_type++;
1679 	}
1680 
1681 	if (last_sig) {
1682 		assert(zone_carry == 0);
1683 	}
1684 
1685 	return zones_for_type;
1686 }
1687 
1688 __startup_func
1689 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1690 kalloc_type_build_skip_list(
1691 	kalloc_type_view_t     *start,
1692 	kalloc_type_view_t     *end,
1693 	uint16_t               *kt_skip_list)
1694 {
1695 	kalloc_type_view_t *cur = start;
1696 	kalloc_type_view_t prev = *start;
1697 	uint16_t i = 0, idx = 0;
1698 
1699 	kt_skip_list[idx] = i;
1700 	idx++;
1701 
1702 	while (cur < end) {
1703 		kalloc_type_view_t kt_cur = *cur;
1704 
1705 		if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1706 			kt_skip_list[idx] = i;
1707 
1708 			prev = kt_cur;
1709 			idx++;
1710 		}
1711 		i++;
1712 		cur++;
1713 	}
1714 
1715 	/*
1716 	 * Final update
1717 	 */
1718 	kt_skip_list[idx] = i;
1719 	return idx;
1720 }
1721 
1722 __startup_func
1723 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1724 kalloc_type_init_sig_eq(
1725 	zone_t             *zones,
1726 	uint16_t            n_zones,
1727 	zone_t              sig_zone)
1728 {
1729 	for (uint16_t i = 0; i < n_zones; i++) {
1730 		zone_t z = zones[i];
1731 
1732 		assert(!zone_get_sig_eq(z));
1733 		zone_set_sig_eq(z, zone_index(sig_zone));
1734 	}
1735 }
1736 
1737 #ifndef __BUILDING_XNU_LIB_UNITTEST__
1738 #define KT_ZONES_FOR_SIZE_SIZE 32
1739 #else /* __BUILDING_XNU_LIB_UNITTEST__ */
1740 /* different init sequence in unit-test requires a bigger buffer in the kalloc zones initialization */
1741 #define KT_ZONES_FOR_SIZE_SIZE 35
1742 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1743 
1744 __startup_func
1745 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],uint16_t type_zones_start,zone_t sig_zone,zone_t early_zone)1746 kalloc_type_distribute_zone_for_type(
1747 	kalloc_type_view_t *start,
1748 	kalloc_type_view_t *end,
1749 	bool                last_sig,
1750 	uint16_t            zones_total_type,
1751 	uint16_t            total_types,
1752 	uint16_t           *kt_skip_list,
1753 	zone_t              kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],
1754 	uint16_t            type_zones_start,
1755 	zone_t              sig_zone,
1756 	zone_t              early_zone)
1757 {
1758 	uint16_t count = 0, n_zones = 0;
1759 	uint16_t *shuffle_buf = NULL;
1760 	zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1761 
1762 	/*
1763 	 * Assert there is space in buffer
1764 	 */
1765 	count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1766 	n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1767 	    last_sig);
1768 	shuffle_buf = &kt_skip_list[count + 1];
1769 
1770 	/*
1771 	 * Initalize signature equivalence zone for type zones
1772 	 */
1773 	kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1774 
1775 	if (n_zones == 0) {
1776 		kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1777 		    early_zone);
1778 		return n_zones;
1779 	}
1780 
1781 	/*
1782 	 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1783 	 */
1784 	if (count == 1) {
1785 		kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1786 		    early_zone);
1787 		return n_zones;
1788 	}
1789 
1790 	/*
1791 	 * Add the signature based zone to n_zones
1792 	 */
1793 	n_zones++;
1794 
1795 	for (uint16_t i = 0; i < count; i++) {
1796 		uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1797 		uint16_t type_start = kt_skip_list[i];
1798 		kalloc_type_view_t *kt_type_start = &start[type_start];
1799 		uint16_t type_end = kt_skip_list[i + 1];
1800 		kalloc_type_view_t *kt_type_end = &start[type_end];
1801 		zone_t zone;
1802 
1803 		if (zidx == 0) {
1804 			kmem_shuffle(shuffle_buf, n_zones);
1805 		}
1806 
1807 		shuffled_zidx = shuffle_buf[zidx];
1808 		zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1809 		kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1810 		    early_zone);
1811 	}
1812 
1813 	return n_zones - 1;
1814 }
1815 
1816 __startup_func
1817 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1818 kalloc_type_create_zones_fixed(
1819 	uint16_t           *kt_skip_list_start,
1820 	uint16_t           *kt_shuffle_buf)
1821 {
1822 	uint16_t *kt_skip_list = kt_skip_list_start;
1823 	uint16_t p_j = 0;
1824 	uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1825 	uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1826 #if DEBUG || DEVELOPMENT
1827 	__assert_only uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1828 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1829 #endif
1830 	/*
1831 	 * Apply policy to determine how many zones to create for each size
1832 	 * class.
1833 	 */
1834 	kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1835 	    kt_zones_sig, kt_zones_type, kt_fixed_zones);
1836 
1837 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1838 		uint16_t n_unique_sig = kt_freq_list[i];
1839 		vm_size_t z_size = kt_zone_cfg[i];
1840 		uint16_t n_zones_sig = kt_zones_sig[i];
1841 		uint16_t n_zones_type = kt_zones_type[i];
1842 		uint16_t total_types = kt_freq_list_total[i];
1843 		uint16_t type_zones_used = 0;
1844 
1845 		if (n_unique_sig == 0) {
1846 			continue;
1847 		}
1848 
1849 		zone_carry = 0;
1850 		assert(n_zones_sig + n_zones_type + 1 <= KT_ZONES_FOR_SIZE_SIZE);
1851 		zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE] = {};
1852 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1853 		    n_zones_sig + n_zones_type, z_size);
1854 
1855 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1856 		/*
1857 		 * Ensure that there is enough space to shuffle n_unique_sig
1858 		 * indices
1859 		 */
1860 		assert(n_unique_sig < kt_shuffle_count);
1861 
1862 		/*
1863 		 * Get a shuffled set of signature indices
1864 		 */
1865 		*kt_shuffle_buf = 0;
1866 		if (n_unique_sig > 1) {
1867 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1868 		}
1869 
1870 		for (uint16_t j = 0; j < n_zones_sig; j++) {
1871 			zone_t *z_ptr = &kt_zones_for_size[j];
1872 
1873 			kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1874 		}
1875 
1876 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1877 			/*
1878 			 * For every size that has unique types
1879 			 */
1880 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1881 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1882 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1883 			zone_t zone = kt_zones_for_size[j % n_zones_sig];
1884 			zone_t early_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1885 			bool last_sig;
1886 
1887 			last_sig = (j == (n_unique_sig - 1)) ? true : false;
1888 			type_zones_used += kalloc_type_distribute_zone_for_type(
1889 				&kt_buffer[cur].ktv_fixed,
1890 				&kt_buffer[end].ktv_fixed, last_sig,
1891 				n_zones_type, total_types + n_unique_sig,
1892 				&kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1893 				n_zones_sig + type_zones_used, zone, early_zone);
1894 		}
1895 		assert(type_zones_used <= n_zones_type);
1896 		p_j += n_unique_sig;
1897 	}
1898 }
1899 
1900 __startup_func
1901 static void
kalloc_type_view_init_fixed(void)1902 kalloc_type_view_init_fixed(void)
1903 {
1904 	kalloc_type_hash_seed = (uint32_t) early_random();
1905 	kalloc_type_build_dlut();
1906 	/*
1907 	 * Parse __kalloc_type sections and build array of pointers to
1908 	 * all kalloc type views in kt_buffer.
1909 	 */
1910 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1911 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1912 
1913 #if MACH_ASSERT
1914 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1915 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1916 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1917 #endif
1918 
1919 	/*
1920 	 * Sort based on size class and signature
1921 	 */
1922 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1923 	    kalloc_type_cmp_fixed);
1924 
1925 	/*
1926 	 * Build a skip list that holds starts of unique signatures and a
1927 	 * frequency list of number of unique and total signatures per kalloc
1928 	 * size class
1929 	 */
1930 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1931 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1932 		kt_skip_list_start, kt_count);
1933 
1934 	/*
1935 	 * Create zones based on signatures
1936 	 */
1937 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1938 }
1939 
1940 __startup_func
1941 static void
kalloc_type_heap_init(void)1942 kalloc_type_heap_init(void)
1943 {
1944 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1945 	char kh_name[MAX_ZONE_NAME];
1946 	uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1947 
1948 	for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1949 		snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1950 		kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1951 		    &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1952 	}
1953 	/*
1954 	 * All variable kalloc type allocations are collapsed into a single
1955 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1956 	 */
1957 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1958 	zone_view_count += 1;
1959 }
1960 
1961 __startup_func
1962 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1963 kalloc_type_assign_heap(
1964 	uint32_t            start,
1965 	uint32_t            end,
1966 	uint32_t            heap_id)
1967 {
1968 	bool use_split = kmem_get_random16(1);
1969 
1970 	if (use_split) {
1971 		heap_id = kt_var_heaps;
1972 	}
1973 	kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1974 	    &kt_buffer[end].ktv_var, heap_id);
1975 }
1976 
1977 __startup_func
1978 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1979 kalloc_type_split_heap(
1980 	uint32_t            start,
1981 	uint32_t            end,
1982 	uint32_t            heap_id)
1983 {
1984 	uint32_t count = start;
1985 	const char *p_name = NULL;
1986 
1987 	while (count < end) {
1988 		kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1989 		const char *c_name = cur->kt_name;
1990 
1991 		if (!p_name) {
1992 			assert(count == start);
1993 			p_name = c_name;
1994 		}
1995 		if (strcmp(c_name, p_name) != 0) {
1996 			kalloc_type_assign_heap(start, count, heap_id);
1997 			start = count;
1998 			p_name = c_name;
1999 		}
2000 		count++;
2001 	}
2002 	kalloc_type_assign_heap(start, end, heap_id);
2003 }
2004 
2005 __startup_func
2006 static void
kalloc_type_view_init_var(void)2007 kalloc_type_view_init_var(void)
2008 {
2009 	uint32_t buf_start = 0, unique_sig = 0;
2010 	uint32_t *kt_skip_list_start;
2011 	uint16_t *shuffle_buf;
2012 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
2013 	uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
2014 	/*
2015 	 * Pick a random heap to split
2016 	 */
2017 	uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
2018 
2019 	/*
2020 	 * Zones are created prior to parsing the views as zone budget is fixed
2021 	 * per sizeclass and special types identified while parsing are redirected
2022 	 * as they are discovered.
2023 	 */
2024 	kalloc_type_heap_init();
2025 
2026 	/*
2027 	 * Parse __kalloc_var sections and build array of pointers to views that
2028 	 * aren't rediected in kt_buffer.
2029 	 */
2030 	kt_count = kalloc_type_view_parse(KTV_VAR);
2031 	assert(kt_count < UINT32_MAX);
2032 
2033 #if MACH_ASSERT
2034 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
2035 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
2036 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
2037 #endif
2038 
2039 	/*
2040 	 * Sort based on size class and signature
2041 	 */
2042 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
2043 	    kalloc_type_cmp_var);
2044 
2045 	buf_start = kalloc_type_handle_parray_var();
2046 
2047 	/*
2048 	 * Build a skip list that holds starts of unique signatures
2049 	 */
2050 	kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
2051 	unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
2052 	    buf_start);
2053 	shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
2054 	/*
2055 	 * If we have only one heap then other elements share heap with pointer
2056 	 * arrays
2057 	 */
2058 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
2059 		panic("kt_var_heaps is too small");
2060 	}
2061 
2062 	kmem_shuffle(shuffle_buf, flex_heap_count);
2063 	/*
2064 	 * The index of the heap we decide to split is placed twice in the shuffle
2065 	 * buffer so that it gets twice the number of signatures that we split
2066 	 * evenly
2067 	 */
2068 	shuffle_buf[flex_heap_count] = split_heap;
2069 	split_heap += (fixed_heaps + 1);
2070 
2071 	for (uint32_t i = 1; i <= unique_sig; i++) {
2072 		uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
2073 		    fixed_heaps + 1;
2074 		uint32_t start = kt_skip_list_start[i - 1];
2075 		uint32_t end = kt_skip_list_start[i];
2076 
2077 		assert(heap_id <= kt_var_heaps);
2078 		if (heap_id == split_heap) {
2079 			kalloc_type_split_heap(start, end, heap_id);
2080 			continue;
2081 		}
2082 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
2083 		    &kt_buffer[end].ktv_var, heap_id);
2084 	}
2085 }
2086 
2087 __startup_func
2088 static void
kalloc_init(void)2089 kalloc_init(void)
2090 {
2091 	/*
2092 	 * Allocate scratch space to parse kalloc_type_views and create
2093 	 * other structures necessary to process them.
2094 	 */
2095 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
2096 
2097 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
2098 	kalloc_zsize_compute();
2099 
2100 	/* Initialize kalloc data buffers heap */
2101 	kalloc_heap_init(KHEAP_DATA_BUFFERS);
2102 
2103 	/* Initialize kalloc shared data buffers heap */
2104 	kalloc_heap_init(KHEAP_DATA_SHARED);
2105 
2106 	/* Initialize kalloc shared buffers heap */
2107 	kalloc_heap_init(KHEAP_EARLY);
2108 
2109 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
2110 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT | KMA_SPRAYQTN, VM_KERN_MEMORY_KALLOC);
2111 
2112 	/*
2113 	 * Handle fixed size views
2114 	 */
2115 	kalloc_type_view_init_fixed();
2116 
2117 	/*
2118 	 * Reset
2119 	 */
2120 	bzero(kt_buffer, kt_scratch_size);
2121 	kt_count = max_count;
2122 
2123 	/*
2124 	 * Handle variable size views
2125 	 */
2126 	kalloc_type_view_init_var();
2127 
2128 	/*
2129 	 * Free resources used
2130 	 */
2131 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2132 }
2133 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2134 
2135 #pragma mark accessors
2136 
2137 #define KFREE_ABSURD_SIZE \
2138 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2139 
2140 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2141 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2142 {
2143 	thread_t thr = current_thread();
2144 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2145 }
2146 
2147 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2148 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2149 {
2150 	thread_t thr = current_thread();
2151 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2152 }
2153 
2154 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2155 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2156 {
2157 	kmem_guard_t guard = {
2158 		.kmg_atomic      = true,
2159 		.kmg_tag         = tag,
2160 		.kmg_type_hash   = type_hash,
2161 		.kmg_context     = os_hash_kernel_pointer(owner),
2162 	};
2163 
2164 	/*
2165 	 * TODO: this use is really not sufficiently smart.
2166 	 */
2167 
2168 	return guard;
2169 }
2170 
2171 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
2172 
2173 #if __arm64e__
2174 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2175 
2176 /*
2177  * Zone encoding is:
2178  *
2179  *   <PAC SIG><1><1><PTR value><5 bits of size class>
2180  *
2181  * VM encoding is:
2182  *
2183  *   <PAC SIG><1><0><PTR value><14 bits of page count>
2184  *
2185  * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2186  * so that PAC authentication extends the proper sign bit.
2187  */
2188 
2189 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2190 #else /* __arm64e__ */
2191 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2192 
2193 /*
2194  * Zone encoding is:
2195  *
2196  *   <TBI><1><PTR value><5 bits of size class>
2197  *
2198  * VM encoding is:
2199  *
2200  *   <TBI><0><PTR value><14 bits of page count>
2201  */
2202 
2203 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2204 #endif /* __arm64e__*/
2205 
2206 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2207 
2208 __attribute__((always_inline))
2209 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2210 __kalloc_array_decode(vm_address_t ptr)
2211 {
2212 	struct kalloc_result kr;
2213 	vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2214 
2215 	if (ptr & zone_mask) {
2216 		kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2217 		ptr &= ~0x1full;
2218 	} else if (__probable(ptr)) {
2219 		kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2220 		ptr &= ~PAGE_MASK;
2221 		ptr |= zone_mask;
2222 	} else {
2223 		kr.size = 0;
2224 	}
2225 
2226 	kr.addr = (void *)ptr;
2227 	return kr;
2228 }
2229 
2230 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2231 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2232 {
2233 	return (void *)((vm_address_t)ptr | z->z_array_size_class);
2234 }
2235 
2236 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2237 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2238 {
2239 	addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2240 
2241 	return addr | atop(size);
2242 }
2243 
2244 #else /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2245 
2246 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2247 
2248 /*
2249  * Encoding is:
2250  * bits  0..46: pointer value
2251  * bits 47..47: 0: zones, 1: VM
2252  * bits 48..63: zones: elem size, VM: number of pages
2253  */
2254 
2255 #define KALLOC_ARRAY_TYPE_BIT   47
2256 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2257 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2258 
2259 __attribute__((always_inline))
2260 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2261 __kalloc_array_decode(vm_address_t ptr)
2262 {
2263 	struct kalloc_result kr;
2264 	uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2265 
2266 	kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2267 	if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2268 		kr.size <<= PAGE_SHIFT;
2269 	}
2270 	/* sign extend, so that it also works with NULL */
2271 	kr.addr = (void *)((long)(ptr << shift) >> shift);
2272 
2273 	return kr;
2274 }
2275 
2276 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2277 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2278 {
2279 	vm_address_t addr = (vm_address_t)ptr;
2280 
2281 	addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2282 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2283 
2284 	return (void *)addr;
2285 }
2286 
2287 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2288 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2289 {
2290 	addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2291 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2292 
2293 	return addr;
2294 }
2295 
2296 #endif /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2297 
2298 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2299 kalloc_next_good_size(vm_size_t size, uint32_t period)
2300 {
2301 	uint32_t scale = kalloc_log2down((uint32_t)size);
2302 	vm_size_t step, size_class;
2303 
2304 	if (size < KHEAP_STEP_START) {
2305 		return KHEAP_STEP_START;
2306 	}
2307 	if (size < 2 * KHEAP_STEP_START) {
2308 		return 2 * KHEAP_STEP_START;
2309 	}
2310 
2311 	if (size < KHEAP_MAX_SIZE) {
2312 		step = 1ul << (scale - 1);
2313 	} else {
2314 		step = round_page(1ul << (scale - kalloc_log2down(period)));
2315 	}
2316 
2317 	size_class = (size + step) & -step;
2318 #if KASAN_CLASSIC
2319 	if (size > K_SIZE_CLASS(size_class)) {
2320 		return kalloc_next_good_size(size_class, period);
2321 	}
2322 	size_class = K_SIZE_CLASS(size_class);
2323 #endif
2324 	return size_class;
2325 }
2326 
2327 
2328 #pragma mark kalloc
2329 
2330 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_flags_t kt_flags)2331 kalloc_type_get_heap(kalloc_type_flags_t kt_flags)
2332 {
2333 	/*
2334 	 * Redirect data-only views
2335 	 */
2336 	if (kalloc_type_is_data(kt_flags)) {
2337 		/*
2338 		 * There are kexts that allocate arrays of data types (uint8_t etc.)
2339 		 * and use krealloc_data / kfree_data to free it; therefore,
2340 		 * until adoption will land, we need to use shared heap for now.
2341 		 */
2342 		return GET_KEXT_KHEAP_DATA();
2343 	}
2344 
2345 	if (kt_flags & KT_PROCESSED) {
2346 		return KHEAP_KT_VAR;
2347 	}
2348 
2349 	return KHEAP_DEFAULT;
2350 }
2351 
2352 
2353 __attribute__((noinline))
2354 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2355 kalloc_large(
2356 	kalloc_heap_t         kheap,
2357 	vm_size_t             req_size,
2358 	zalloc_flags_t        flags,
2359 	uint16_t              kt_hash,
2360 	void                 *owner __unused)
2361 {
2362 	kma_flags_t kma_flags = KMA_KASAN_GUARD;
2363 	vm_tag_t tag;
2364 	vm_offset_t addr, size;
2365 
2366 	if (flags & Z_NOFAIL) {
2367 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2368 		    (size_t)req_size);
2369 	}
2370 
2371 	/*
2372 	 * kmem_alloc could block so we return if noblock
2373 	 *
2374 	 * also, reject sizes larger than our address space is quickly,
2375 	 * as kt_size or IOMallocArraySize() expect this.
2376 	 */
2377 	if ((flags & Z_NOWAIT) ||
2378 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2379 		return (struct kalloc_result){ };
2380 	}
2381 
2382 	if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2383 		return (struct kalloc_result){ };
2384 	}
2385 
2386 	/*
2387 	 * (73465472) on Intel we didn't use to pass this flag,
2388 	 * which in turned allowed kalloc_large() memory to be shared
2389 	 * with user directly.
2390 	 *
2391 	 * We're bound by this unfortunate ABI.
2392 	 */
2393 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2394 #ifndef __x86_64__
2395 		kma_flags |= KMA_KOBJECT;
2396 #endif
2397 	} else {
2398 		assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
2399 	}
2400 	if (flags & Z_NOPAGEWAIT) {
2401 		kma_flags |= KMA_NOPAGEWAIT;
2402 	}
2403 	if (flags & Z_ZERO) {
2404 		kma_flags |= KMA_ZERO;
2405 	}
2406 	if (kheap == KHEAP_DATA_BUFFERS) {
2407 		kma_flags |= KMA_DATA;
2408 	} else if (kheap == KHEAP_DATA_SHARED) {
2409 		kma_flags |= KMA_DATA_SHARED;
2410 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2411 		kma_flags |= KMA_SPRAYQTN;
2412 	}
2413 	if (flags & Z_NOSOFTLIMIT) {
2414 		kma_flags |= KMA_NOSOFTLIMIT;
2415 	}
2416 
2417 
2418 	tag = zalloc_flags_get_tag(flags);
2419 	if (flags & Z_VM_TAG_BT_BIT) {
2420 		tag = vm_tag_bt() ?: tag;
2421 	}
2422 	if (tag == VM_KERN_MEMORY_NONE) {
2423 		tag = kheap->kh_tag;
2424 	}
2425 
2426 	size = round_page(req_size);
2427 	if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2428 		req_size = round_page(size);
2429 	}
2430 
2431 	addr = kmem_alloc_guard(kernel_map, req_size, 0,
2432 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2433 
2434 	if (addr != 0) {
2435 		counter_inc(&kalloc_large_count);
2436 		counter_add(&kalloc_large_total, size);
2437 		KALLOC_ZINFO_SALLOC(size);
2438 		if (flags & Z_KALLOC_ARRAY) {
2439 			addr = __kalloc_array_encode_vm(addr, req_size);
2440 		}
2441 	} else {
2442 		addr = 0;
2443 	}
2444 
2445 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2446 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2447 }
2448 
2449 #if KASAN
2450 
2451 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2452 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2453 {
2454 #if KASAN_CLASSIC
2455 	/*
2456 	 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2457 	 * tagging of the memory region is performed here.
2458 	 */
2459 	kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2460 	    __builtin_frame_address(0));
2461 #endif /* KASAN_CLASSIC */
2462 
2463 #if KASAN_TBI
2464 	kasan_tbi_retag_unused_space(addr, size, used ? :1);
2465 #endif /* KASAN_TBI */
2466 }
2467 #endif /* KASAN */
2468 
2469 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2470 kalloc_zone(
2471 	zone_t                  z,
2472 	zone_stats_t            zstats,
2473 	zalloc_flags_t          flags,
2474 	vm_size_t               req_size)
2475 {
2476 	struct kalloc_result kr;
2477 	vm_size_t esize;
2478 
2479 	kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2480 	esize = kr.size;
2481 
2482 	if (__probable(kr.addr)) {
2483 		if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2484 			req_size = esize;
2485 		} else {
2486 			kr.size = req_size;
2487 		}
2488 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2489 		kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2490 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2491 
2492 #if KASAN
2493 		kalloc_mark_unused_space(kr.addr, esize, kr.size);
2494 #endif /* KASAN */
2495 
2496 		if (flags & Z_KALLOC_ARRAY) {
2497 			kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2498 		}
2499 	}
2500 
2501 	DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2502 	return kr;
2503 }
2504 
2505 static zone_id_t
kalloc_use_early_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2506 kalloc_use_early_heap(
2507 	kalloc_heap_t           kheap,
2508 	zone_stats_t            zstats,
2509 	zone_id_t               zstart,
2510 	zalloc_flags_t         *flags)
2511 {
2512 	if (!zone_is_data_kheap(kheap->kh_heap_id)) {
2513 		zone_stats_t zstats_cpu = zpercpu_get(zstats);
2514 
2515 		if (os_atomic_load(&zstats_cpu->zs_alloc_not_early, relaxed) == 0) {
2516 			*flags |= Z_SET_NOTEARLY;
2517 			return KHEAP_EARLY->kh_zstart;
2518 		}
2519 	}
2520 
2521 	return zstart;
2522 }
2523 
2524 #undef kalloc_ext
2525 
2526 __mockable struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2527 kalloc_ext(
2528 	void                   *kheap_or_kt_view,
2529 	vm_size_t               size,
2530 	zalloc_flags_t          flags,
2531 	void                   *owner)
2532 {
2533 	kalloc_type_var_view_t kt_view;
2534 	kalloc_heap_t kheap;
2535 	zone_stats_t zstats = NULL;
2536 	zone_t z;
2537 	uint16_t kt_hash;
2538 	zone_id_t zstart;
2539 
2540 	if (kt_is_var_view(kheap_or_kt_view)) {
2541 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2542 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2543 		/*
2544 		 * Use stats from view if present, else use stats from kheap.
2545 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2546 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2547 		 * use stats from the respective zones.
2548 		 */
2549 		zstats  = kt_view->kt_stats;
2550 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2551 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
2552 	} else {
2553 		kt_view = NULL;
2554 		kheap   = kheap_or_kt_view;
2555 		kt_hash = kheap->kh_type_hash;
2556 		zstart  = kheap->kh_zstart;
2557 	}
2558 
2559 	if (!zstats) {
2560 		zstats = kheap->kh_stats;
2561 	}
2562 
2563 	zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
2564 	z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2565 	if (z) {
2566 		return kalloc_zone(z, zstats, flags, size);
2567 	} else {
2568 		return kalloc_large(kheap, size, flags, kt_hash, owner);
2569 	}
2570 }
2571 
2572 #if XNU_PLATFORM_MacOSX
2573 void *
2574 kalloc_external(vm_size_t size);
2575 void *
kalloc_external(vm_size_t size)2576 kalloc_external(vm_size_t size)
2577 {
2578 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2579 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2580 }
2581 #endif /* XNU_PLATFORM_MacOSX */
2582 
2583 void *
2584 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2585 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2586 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2587 {
2588 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2589 	return kheap_alloc(GET_KEXT_KHEAP_DATA(), size, flags);
2590 }
2591 
2592 void *
2593 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags);
2594 void *
kalloc_shared_data_external(vm_size_t size,zalloc_flags_t flags)2595 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags)
2596 {
2597 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
2598 	return kheap_alloc(KHEAP_DATA_SHARED, size, flags);
2599 }
2600 
2601 __abortlike
2602 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2603 kalloc_data_require_panic(void *addr, vm_size_t size)
2604 {
2605 	zone_id_t zid = zone_id_for_element(addr, size);
2606 
2607 	if (zid != ZONE_ID_INVALID) {
2608 		zone_t z = &zone_array[zid];
2609 		zone_security_flags_t zsflags = zone_security_array[zid];
2610 
2611 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
2612 			panic("kalloc_data_require failed: address %p in [%s%s]",
2613 			    addr, zone_heap_name(z), zone_name(z));
2614 		}
2615 
2616 		panic("kalloc_data_require failed: address %p in [%s%s], "
2617 		    "size too large %zd > %zd", addr,
2618 		    zone_heap_name(z), zone_name(z),
2619 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2620 	} else {
2621 		panic("kalloc_data_require failed: address %p not in zone native map",
2622 		    addr);
2623 	}
2624 }
2625 
2626 __abortlike
2627 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2628 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2629 {
2630 	zone_id_t zid = zone_id_for_element(addr, size);
2631 
2632 	if (zid != ZONE_ID_INVALID) {
2633 		zone_t z = &zone_array[zid];
2634 		zone_security_flags_t zsflags = zone_security_array[zid];
2635 
2636 		switch (zsflags.z_kheap_id) {
2637 		case KHEAP_ID_NONE:
2638 		case KHEAP_ID_DATA_BUFFERS:
2639 		case KHEAP_ID_DATA_SHARED:
2640 		case KHEAP_ID_KT_VAR:
2641 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2642 			    addr, zone_heap_name(z), zone_name(z));
2643 		default:
2644 			break;
2645 		}
2646 
2647 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2648 		    "size too large %zd > %zd", addr,
2649 		    zone_heap_name(z), zone_name(z),
2650 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2651 	} else {
2652 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2653 		    addr);
2654 	}
2655 }
2656 
2657 void
kalloc_data_require(void * addr,vm_size_t size)2658 kalloc_data_require(void *addr, vm_size_t size)
2659 {
2660 	zone_id_t zid = zone_id_for_element(addr, size);
2661 
2662 	if (zid != ZONE_ID_INVALID) {
2663 		zone_t z = &zone_array[zid];
2664 		zone_security_flags_t zsflags = zone_security_array[zid];
2665 		if (zone_is_data_kheap(zsflags.z_kheap_id) &&
2666 		    size <= zone_elem_inner_size(z)) {
2667 			return;
2668 		}
2669 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2670 	    (vm_address_t)addr, size)) {
2671 		return;
2672 	} else if (kmem_needs_data_share_range() &&
2673 	    kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2674 	    (vm_address_t)addr, size)) {
2675 		return;
2676 	}
2677 
2678 	kalloc_data_require_panic(addr, size);
2679 }
2680 
2681 void
kalloc_non_data_require(void * addr,vm_size_t size)2682 kalloc_non_data_require(void *addr, vm_size_t size)
2683 {
2684 	zone_id_t zid = zone_id_for_element(addr, size);
2685 
2686 	if (zid != ZONE_ID_INVALID) {
2687 		zone_t z = &zone_array[zid];
2688 		zone_security_flags_t zsflags = zone_security_array[zid];
2689 		switch (zsflags.z_kheap_id) {
2690 		case KHEAP_ID_NONE:
2691 			if (!zsflags.z_kalloc_type) {
2692 				break;
2693 			}
2694 			OS_FALLTHROUGH;
2695 		case KHEAP_ID_KT_VAR:
2696 			if (size < zone_elem_inner_size(z)) {
2697 				return;
2698 			}
2699 			break;
2700 		default:
2701 			break;
2702 		}
2703 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2704 	    (vm_address_t)addr, size)) {
2705 		return;
2706 	} else if (kmem_needs_data_share_range() &&
2707 	    !kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2708 	    (vm_address_t)addr, size)) {
2709 		return;
2710 	}
2711 
2712 	kalloc_non_data_require_panic(addr, size);
2713 }
2714 
2715 bool
kalloc_is_data_buffers(void * addr,vm_size_t size)2716 kalloc_is_data_buffers(void *addr, vm_size_t size)
2717 {
2718 	zone_id_t zid = zone_id_for_element(addr, size);
2719 
2720 	/*
2721 	 * If we do not use dedicated data share range,
2722 	 * there is no way to fully distinguish between
2723 	 * shared and buffers heaps.
2724 	 *
2725 	 * When kmem_needs_data_share_range() == true, the
2726 	 * KMEM_RANGE_ID_DATA range is strictly for DATA_BUFFERS,
2727 	 * and KMEM_RANGE_ID_DATA_SHARED is strictly for DATA_SHARED.
2728 	 */
2729 	assert(kmem_needs_data_share_range());
2730 
2731 	if (zid != ZONE_ID_INVALID) {
2732 		zone_t z = &zone_array[zid];
2733 		zone_security_flags_t zsflags = zone_security_array[zid];
2734 		if (zone_is_data_buffers_kheap(zsflags.z_kheap_id) &&
2735 		    size <= zone_elem_inner_size(z)) {
2736 			return true;
2737 		}
2738 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2739 	    (vm_address_t)addr, size)) {
2740 		return true;
2741 	}
2742 
2743 	return false;
2744 }
2745 
2746 __mockable void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2747 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2748 {
2749 	/*
2750 	 * Callsites from a kext that aren't in the BootKC on macOS or
2751 	 * any callsites on armv7 are not processed during startup,
2752 	 * default to using kheap_alloc
2753 	 *
2754 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2755 	 * NULL as we need to use the vm for the allocation
2756 	 *
2757 	 */
2758 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2759 		kalloc_heap_t kheap;
2760 		vm_size_t size;
2761 
2762 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2763 		size  = kalloc_type_get_size(kt_view->kt_size);
2764 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
2765 		return kalloc_ext(kheap, size, flags, NULL).addr;
2766 	}
2767 
2768 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2769 	return kalloc_type_impl(kt_view, flags);
2770 }
2771 
2772 void *
2773 kalloc_type_var_impl_external(
2774 	kalloc_type_var_view_t  kt_view,
2775 	vm_size_t               size,
2776 	zalloc_flags_t          flags,
2777 	void                   *owner);
2778 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2779 kalloc_type_var_impl_external(
2780 	kalloc_type_var_view_t  kt_view,
2781 	vm_size_t               size,
2782 	zalloc_flags_t          flags,
2783 	void                   *owner)
2784 {
2785 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2786 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2787 }
2788 
2789 #pragma mark kfree
2790 
2791 __abortlike
2792 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2793 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2794 {
2795 	zone_security_flags_t zsflags = zone_security_config(z);
2796 	const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2797 
2798 	if (zsflags.z_kalloc_type) {
2799 		panic_include_kalloc_types = true;
2800 		kalloc_type_src_zone = z;
2801 		panic("kfree: addr %p found in kalloc type zone '%s'"
2802 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2803 	}
2804 
2805 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2806 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2807 		    data, size, zone_heap_name(z), z->z_name);
2808 	} else {
2809 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2810 		    data, size, zone_heap_name(z), kheap_name);
2811 	}
2812 }
2813 
2814 __abortlike
2815 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2816 kfree_size_confusion_panic(zone_t z, void *data,
2817     size_t oob_offs, size_t size, size_t zsize)
2818 {
2819 	if (z) {
2820 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2821 		    "with elem_size %zd",
2822 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2823 	} else {
2824 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2825 		    data, size, oob_offs);
2826 	}
2827 }
2828 
2829 __abortlike
2830 static void
kfree_size_invalid_panic(void * data,size_t size)2831 kfree_size_invalid_panic(void *data, size_t size)
2832 {
2833 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2834 	    data, size);
2835 }
2836 
2837 __abortlike
2838 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2839 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2840     size_t max_size)
2841 {
2842 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2843 	    data, size, min_size, max_size);
2844 }
2845 
2846 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2847 kfree_size_require(
2848 	kalloc_heap_t kheap,
2849 	void *addr,
2850 	vm_size_t min_size,
2851 	vm_size_t max_size)
2852 {
2853 	assert3u(min_size, <=, max_size);
2854 	zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2855 	vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2856 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2857 	if (elem_size > max_zone_size || elem_size < min_size) {
2858 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2859 	}
2860 }
2861 
2862 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2863 kfree_large(
2864 	vm_offset_t             addr,
2865 	vm_size_t               size,
2866 	kmf_flags_t             flags,
2867 	void                   *owner)
2868 {
2869 	size = kmem_free_guard(kernel_map, addr, size,
2870 	    flags | KMF_TAG | KMF_KASAN_GUARD,
2871 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2872 
2873 	counter_dec(&kalloc_large_count);
2874 	counter_add(&kalloc_large_total, -(uint64_t)size);
2875 	KALLOC_ZINFO_SFREE(size);
2876 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2877 }
2878 
2879 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2880 kfree_zone(
2881 	void                   *kheap_or_kt_view __unsafe_indexable,
2882 	void                   *data,
2883 	vm_size_t               size,
2884 	zone_t                  z,
2885 	vm_size_t               zsize)
2886 {
2887 	zone_security_flags_t zsflags = zone_security_config(z);
2888 	kalloc_type_var_view_t kt_view;
2889 	kalloc_heap_t kheap;
2890 	zone_stats_t zstats = NULL;
2891 
2892 	if (kt_is_var_view(kheap_or_kt_view)) {
2893 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2894 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2895 		/*
2896 		 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2897 		 * we will end up having incorrect stats. Cross frees may happen on
2898 		 * macOS due to allocation from an unprocessed view and free from
2899 		 * a processed view or vice versa.
2900 		 */
2901 		zstats  = kt_view->kt_stats;
2902 	} else {
2903 		kt_view = NULL;
2904 		kheap   = kheap_or_kt_view;
2905 	}
2906 
2907 	if (!zstats) {
2908 		zstats = kheap->kh_stats;
2909 	}
2910 
2911 	zsflags = zone_security_config(z);
2912 	if (kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED) {
2913 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2914 			kfree_heap_confusion_panic(kheap, data, size, z);
2915 		}
2916 	} else {
2917 		if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2918 		    (zsflags.z_kheap_id != KHEAP_ID_EARLY)) {
2919 			kfree_heap_confusion_panic(kheap, data, size, z);
2920 		}
2921 	}
2922 
2923 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2924 
2925 	/* needs to be __nosan because the user size might be partial */
2926 	__nosan_bzero(data, zsize);
2927 	zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2928 }
2929 
2930 __mockable void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2931 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2932 {
2933 	vm_size_t bucket_size;
2934 	zone_t z;
2935 
2936 	if (data == NULL) {
2937 		return;
2938 	}
2939 
2940 	if (size > KFREE_ABSURD_SIZE) {
2941 		kfree_size_invalid_panic(data, size);
2942 	}
2943 
2944 	if (size <= KHEAP_MAX_SIZE) {
2945 		vm_size_t oob_offs;
2946 
2947 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2948 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2949 			kfree_size_confusion_panic(z, data,
2950 			    oob_offs, size, bucket_size);
2951 		}
2952 
2953 		data = (char *)data - oob_offs;
2954 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2955 	} else {
2956 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2957 	}
2958 }
2959 
2960 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2961 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2962 {
2963 	vm_offset_t oob_offs;
2964 	vm_size_t size, usize = 0;
2965 	zone_t z;
2966 
2967 	if (data == NULL) {
2968 		return;
2969 	}
2970 
2971 	size = zone_element_size(data, &z, true, &oob_offs);
2972 	if (size) {
2973 #if KASAN_CLASSIC
2974 		usize = kasan_user_size((vm_offset_t)data);
2975 #endif
2976 		data = (char *)data - oob_offs;
2977 		kfree_zone(kheap, data, usize, z, size);
2978 	} else {
2979 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2980 	}
2981 }
2982 
2983 #if XNU_PLATFORM_MacOSX
2984 void
2985 kfree_external(void *addr, vm_size_t size);
2986 void
kfree_external(void * addr,vm_size_t size)2987 kfree_external(void *addr, vm_size_t size)
2988 {
2989 	kalloc_heap_t kheap = KHEAP_DEFAULT;
2990 
2991 	kfree_ext(kheap, addr, size);
2992 }
2993 #endif /* XNU_PLATFORM_MacOSX */
2994 
2995 void
2996 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2997     vm_size_t min_sz, vm_size_t max_sz)
2998 {
2999 	if (__improbable(addr == NULL)) {
3000 		return;
3001 	}
3002 	kfree_size_require(kheap, addr, min_sz, max_sz);
3003 	kfree_addr_ext(kheap, addr);
3004 }
3005 
3006 __mockable void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)3007 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
3008 {
3009 	zone_stats_t zs = kt_view->kt_zv.zv_stats;
3010 	zone_t       z  = kt_view->kt_zv.zv_zone;
3011 	zone_stats_t zs_cpu = zpercpu_get(zs);
3012 
3013 	if ((flags & Z_SET_NOTEARLY) ||
3014 	    os_atomic_load(&zs_cpu->zs_alloc_not_early, relaxed)) {
3015 		return zalloc_ext(z, zs, flags).addr;
3016 	}
3017 
3018 	assert(!zone_is_data_kheap(zone_security_config(z).z_kheap_id));
3019 	return zalloc_ext(kt_view->kt_zearly, zs, flags | Z_SET_NOTEARLY).addr;
3020 }
3021 
3022 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)3023 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
3024 {
3025 	/*
3026 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
3027 	 * processed during startup so default to using kheap_alloc
3028 	 *
3029 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
3030 	 * NULL as we need to use the vm for the allocation/free
3031 	 */
3032 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
3033 		kalloc_heap_t kheap;
3034 		vm_size_t size;
3035 
3036 		size  = kalloc_type_get_size(kt_view->kt_size);
3037 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
3038 		return kheap_free(kheap, ptr, size);
3039 	}
3040 	return kfree_type_impl(kt_view, ptr);
3041 }
3042 
3043 void
3044 kfree_type_var_impl_external(
3045 	kalloc_type_var_view_t  kt_view,
3046 	void                   *ptr,
3047 	vm_size_t               size);
3048 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)3049 kfree_type_var_impl_external(
3050 	kalloc_type_var_view_t  kt_view,
3051 	void                   *ptr,
3052 	vm_size_t               size)
3053 {
3054 	return kfree_type_var_impl(kt_view, ptr, size);
3055 }
3056 
3057 void
3058 kfree_data_external(void *ptr, vm_size_t size);
3059 void
kfree_data_external(void * ptr,vm_size_t size)3060 kfree_data_external(void *ptr, vm_size_t size)
3061 {
3062 	return kheap_free(GET_KEXT_KHEAP_DATA(), ptr, size);
3063 }
3064 
3065 void
3066 kfree_data_addr_external(void *ptr);
3067 void
kfree_data_addr_external(void * ptr)3068 kfree_data_addr_external(void *ptr)
3069 {
3070 	return kheap_free_addr(GET_KEXT_KHEAP_DATA(), ptr);
3071 }
3072 
3073 void
3074 kfree_shared_data_external(void *ptr, vm_size_t size);
3075 void
kfree_shared_data_external(void * ptr,vm_size_t size)3076 kfree_shared_data_external(void *ptr, vm_size_t size)
3077 {
3078 	return kheap_free(KHEAP_DATA_SHARED, ptr, size);
3079 }
3080 
3081 void
3082 kfree_shared_data_addr_external(void *ptr);
3083 void
kfree_shared_data_addr_external(void * ptr)3084 kfree_shared_data_addr_external(void *ptr)
3085 {
3086 	return kheap_free_addr(KHEAP_DATA_SHARED, ptr);
3087 }
3088 
3089 #pragma mark krealloc
3090 
3091 __abortlike
3092 static void
krealloc_size_invalid_panic(void * data,size_t size)3093 krealloc_size_invalid_panic(void *data, size_t size)
3094 {
3095 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
3096 	    data, size);
3097 }
3098 
3099 
3100 __attribute__((noinline))
3101 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)3102 krealloc_large(
3103 	kalloc_heap_t         kheap,
3104 	vm_offset_t           addr,
3105 	vm_size_t             old_size,
3106 	vm_size_t             new_size,
3107 	zalloc_flags_t        flags,
3108 	uint16_t              kt_hash,
3109 	void                 *owner __unused)
3110 {
3111 	kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_KASAN_GUARD;
3112 	vm_size_t new_req_size = new_size;
3113 	vm_size_t old_req_size = old_size;
3114 	uint64_t delta;
3115 	kmem_return_t kmr;
3116 	vm_tag_t tag;
3117 
3118 	if (flags & Z_NOFAIL) {
3119 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
3120 		    (size_t)new_req_size);
3121 	}
3122 
3123 	/*
3124 	 * kmem_alloc could block so we return if noblock
3125 	 *
3126 	 * also, reject sizes larger than our address space is quickly,
3127 	 * as kt_size or IOMallocArraySize() expect this.
3128 	 */
3129 	if ((flags & Z_NOWAIT) ||
3130 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
3131 		return (struct kalloc_result){ };
3132 	}
3133 
3134 	/*
3135 	 * (73465472) on Intel we didn't use to pass this flag,
3136 	 * which in turned allowed kalloc_large() memory to be shared
3137 	 * with user directly.
3138 	 *
3139 	 * We're bound by this unfortunate ABI.
3140 	 */
3141 	if ((flags & Z_MAY_COPYINMAP) == 0) {
3142 #ifndef __x86_64__
3143 		kmr_flags |= KMR_KOBJECT;
3144 #endif
3145 	} else {
3146 		assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
3147 	}
3148 	if (flags & Z_NOPAGEWAIT) {
3149 		kmr_flags |= KMR_NOPAGEWAIT;
3150 	}
3151 	if (flags & Z_ZERO) {
3152 		kmr_flags |= KMR_ZERO;
3153 	}
3154 	if (kheap == KHEAP_DATA_BUFFERS) {
3155 		kmr_flags |= KMR_DATA;
3156 	} else if (kheap == KHEAP_DATA_SHARED) {
3157 		kmr_flags |= KMR_DATA_SHARED;
3158 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
3159 		kmr_flags |= KMR_SPRAYQTN;
3160 	}
3161 	if (flags & Z_REALLOCF) {
3162 		kmr_flags |= KMR_REALLOCF;
3163 	}
3164 
3165 #if ZSECURITY_CONFIG(ZONE_TAGGING)
3166 	krealloc_enforce_large_tagging_policy(&kmr_flags, kheap);
3167 #endif /* ZSECURITY_CONFIG(ZONE_TAGGING) */
3168 
3169 	tag = zalloc_flags_get_tag(flags);
3170 	if (flags & Z_VM_TAG_BT_BIT) {
3171 		tag = vm_tag_bt() ?: tag;
3172 	}
3173 	if (tag == VM_KERN_MEMORY_NONE) {
3174 		tag = kheap->kh_tag;
3175 	}
3176 
3177 	kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
3178 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
3179 
3180 	new_size = round_page(new_req_size);
3181 	old_size = round_page(old_req_size);
3182 
3183 	if (kmr.kmr_address != 0) {
3184 		delta = (uint64_t)(new_size - old_size);
3185 	} else if (flags & Z_REALLOCF) {
3186 		counter_dec(&kalloc_large_count);
3187 		delta = (uint64_t)(-old_size);
3188 	} else {
3189 		delta = 0;
3190 	}
3191 
3192 	counter_add(&kalloc_large_total, delta);
3193 	KALLOC_ZINFO_SALLOC(delta);
3194 
3195 	if (addr != 0 || (flags & Z_REALLOCF)) {
3196 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
3197 		    void*, addr);
3198 	}
3199 	if (__improbable(kmr.kmr_address == 0)) {
3200 		return (struct kalloc_result){ };
3201 	}
3202 
3203 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
3204 	    void*, kmr.kmr_address);
3205 
3206 	if (flags & Z_KALLOC_ARRAY) {
3207 		kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
3208 		    new_req_size);
3209 	}
3210 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
3211 }
3212 
3213 #undef krealloc_ext
3214 
3215 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3216 krealloc_ext(
3217 	void                 *kheap_or_kt_view __unsafe_indexable,
3218 	void                 *addr,
3219 	vm_size_t             old_size,
3220 	vm_size_t             new_size,
3221 	zalloc_flags_t        flags,
3222 	void                 *owner)
3223 {
3224 	vm_size_t old_bucket_size, new_bucket_size, min_size;
3225 	kalloc_type_var_view_t kt_view;
3226 	kalloc_heap_t kheap;
3227 	zone_stats_t zstats = NULL;
3228 	struct kalloc_result kr;
3229 	vm_offset_t oob_offs = 0;
3230 	zone_t old_z, new_z;
3231 	uint16_t kt_hash = 0;
3232 	zone_id_t zstart;
3233 
3234 	if (old_size > KFREE_ABSURD_SIZE) {
3235 		krealloc_size_invalid_panic(addr, old_size);
3236 	}
3237 
3238 	if (addr == NULL && new_size == 0) {
3239 		return (struct kalloc_result){ };
3240 	}
3241 
3242 	if (kt_is_var_view(kheap_or_kt_view)) {
3243 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
3244 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
3245 		/*
3246 		 * Similar to kalloc_ext: Use stats from view if present,
3247 		 * else use stats from kheap.
3248 		 *
3249 		 * krealloc_type isn't exposed to kexts, so we don't need to
3250 		 * handle cross frees and can rely on stats from view or kheap.
3251 		 */
3252 		zstats  = kt_view->kt_stats;
3253 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
3254 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
3255 	} else {
3256 		kt_view = NULL;
3257 		kheap   = kheap_or_kt_view;
3258 		kt_hash = kheap->kh_type_hash;
3259 		zstart  = kheap->kh_zstart;
3260 	}
3261 
3262 	if (!zstats) {
3263 		zstats = kheap->kh_stats;
3264 	}
3265 	/*
3266 	 * Find out the size of the bucket in which the new sized allocation
3267 	 * would land. If it matches the bucket of the original allocation,
3268 	 * simply return the same address.
3269 	 */
3270 	if (new_size == 0) {
3271 		new_z = ZONE_NULL;
3272 		new_bucket_size = new_size = 0;
3273 	} else {
3274 		zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
3275 		new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3276 		new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3277 	}
3278 #if !KASAN_CLASSIC
3279 	if (flags & Z_FULLSIZE) {
3280 		new_size = new_bucket_size;
3281 	}
3282 #endif /* !KASAN_CLASSIC */
3283 
3284 	if (addr == NULL) {
3285 		old_z = ZONE_NULL;
3286 		old_size = old_bucket_size = 0;
3287 	} else if (kheap_size_from_zone(addr, old_size, flags)) {
3288 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3289 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3290 			kfree_size_confusion_panic(old_z, addr,
3291 			    oob_offs, old_size, old_bucket_size);
3292 		}
3293 		__builtin_assume(old_z != ZONE_NULL);
3294 	} else {
3295 		old_z = ZONE_NULL;
3296 		old_bucket_size = round_page(old_size);
3297 	}
3298 	min_size = MIN(old_size, new_size);
3299 
3300 	if (old_bucket_size == new_bucket_size && old_z) {
3301 		kr.addr = (char *)addr - oob_offs;
3302 		kr.size = new_size;
3303 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3304 		kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3305 		    new_size, new_bucket_size);
3306 		if (kr.addr != addr) {
3307 			memmove(kr.addr, addr, min_size);
3308 			bzero((char *)kr.addr + min_size,
3309 			    kr.size - min_size);
3310 		}
3311 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3312 #if KASAN
3313 		/*
3314 		 * On KASAN kernels, treat a reallocation effectively as a new
3315 		 * allocation and add a sanity check around the existing one
3316 		 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3317 		 * to much extra work, on KASAN_TBI, assign a new tag both to the
3318 		 * buffer and to the potential free space.
3319 		 */
3320 #if KASAN_CLASSIC
3321 		kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3322 		kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3323 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3324 #endif /* KASAN_CLASSIC */
3325 #if KASAN_TBI
3326 		/*
3327 		 * Validate the current buffer, then generate a new tag,
3328 		 * even if the address is stable, it's a "new" allocation.
3329 		 */
3330 		__asan_loadN((vm_offset_t)addr, old_size);
3331 		kr.addr = vm_memtag_generate_and_store_tag(kr.addr, kr.size);
3332 		kasan_tbi_retag_unused_space(kr.addr, new_bucket_size, kr.size);
3333 #endif /* KASAN_TBI */
3334 #endif /* KASAN */
3335 		goto out_success;
3336 	}
3337 
3338 #if !KASAN
3339 	/*
3340 	 * Fallthrough to krealloc_large() for KASAN,
3341 	 * because we can't use kasan_check_alloc()
3342 	 * on kalloc_large() memory.
3343 	 *
3344 	 * kmem_realloc_guard() will perform all the validations,
3345 	 * and re-tagging.
3346 	 */
3347 	if (old_bucket_size == new_bucket_size) {
3348 		kr.addr = (char *)addr - oob_offs;
3349 		kr.size = new_size;
3350 		goto out_success;
3351 	}
3352 #endif
3353 
3354 	if (addr && !old_z && new_size && !new_z) {
3355 		return krealloc_large(kheap, (vm_offset_t)addr,
3356 		           old_size, new_size, flags, kt_hash, owner);
3357 	}
3358 
3359 	if (!new_size) {
3360 		kr.addr = NULL;
3361 		kr.size = 0;
3362 	} else if (new_z) {
3363 		kr = kalloc_zone(new_z, zstats,
3364 		    flags & ~Z_KALLOC_ARRAY, new_size);
3365 	} else if (old_z || addr == NULL) {
3366 		kr = kalloc_large(kheap, new_size,
3367 		    flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3368 	}
3369 
3370 	if (addr && kr.addr) {
3371 		__nosan_memcpy(kr.addr, addr, min_size);
3372 	}
3373 
3374 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3375 		if (old_z) {
3376 			kfree_zone(kheap_or_kt_view,
3377 			    (char *)addr - oob_offs, old_size,
3378 			    old_z, old_bucket_size);
3379 		} else {
3380 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3381 		}
3382 	}
3383 
3384 	if (__improbable(kr.addr == NULL)) {
3385 		return kr;
3386 	}
3387 
3388 out_success:
3389 	if ((flags & Z_KALLOC_ARRAY) == 0) {
3390 		return kr;
3391 	}
3392 
3393 	if (new_z) {
3394 		kr.addr = __kalloc_array_encode_zone(new_z,
3395 		    kr.addr, kr.size);
3396 	} else {
3397 		kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3398 		    kr.size);
3399 	}
3400 	return kr;
3401 }
3402 
3403 void *
3404 krealloc_data_external(
3405 	void               *ptr,
3406 	vm_size_t           old_size,
3407 	vm_size_t           new_size,
3408 	zalloc_flags_t      flags);
3409 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3410 krealloc_data_external(
3411 	void               *ptr,
3412 	vm_size_t           old_size,
3413 	vm_size_t           new_size,
3414 	zalloc_flags_t      flags)
3415 {
3416 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3417 	return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3418 }
3419 
3420 void *
3421 krealloc_shared_data_external(
3422 	void               *ptr,
3423 	vm_size_t           old_size,
3424 	vm_size_t           new_size,
3425 	zalloc_flags_t      flags);
3426 void *
krealloc_shared_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3427 krealloc_shared_data_external(
3428 	void               *ptr,
3429 	vm_size_t           old_size,
3430 	vm_size_t           new_size,
3431 	zalloc_flags_t      flags)
3432 {
3433 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
3434 	return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3435 }
3436 
3437 __startup_func
3438 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3439 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3440 {
3441 	kheap->kh_zstart      = parent_heap->kh_zstart;
3442 	kheap->kh_heap_id     = parent_heap->kh_heap_id;
3443 	kheap->kh_tag         = parent_heap->kh_tag;
3444 	kheap->kh_stats       = zalloc_percpu_permanent_type(struct zone_stats);
3445 	zone_view_count += 1;
3446 }
3447 
3448 __startup_func
3449 static void
kheap_init_data(kalloc_heap_t kheap)3450 kheap_init_data(kalloc_heap_t kheap)
3451 {
3452 	kheap_init(KHEAP_DATA_BUFFERS, kheap);
3453 	kheap->kh_views               = KHEAP_DATA_BUFFERS->kh_views;
3454 	KHEAP_DATA_BUFFERS->kh_views  = kheap;
3455 }
3456 
3457 __startup_func
3458 static void
kheap_init_data_shared(kalloc_heap_t kheap)3459 kheap_init_data_shared(kalloc_heap_t kheap)
3460 {
3461 	kheap_init(KHEAP_DATA_SHARED, kheap);
3462 	kheap->kh_views               = KHEAP_DATA_SHARED->kh_views;
3463 	KHEAP_DATA_SHARED->kh_views   = kheap;
3464 }
3465 
3466 __startup_func
3467 static void
kheap_init_var(kalloc_heap_t kheap)3468 kheap_init_var(kalloc_heap_t kheap)
3469 {
3470 	uint16_t idx;
3471 	struct kheap_info *parent_heap;
3472 
3473 	kheap_init(KHEAP_KT_VAR, kheap);
3474 	idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3475 	    KT_VAR__FIRST_FLEXIBLE_HEAP;
3476 	parent_heap = &kalloc_type_heap_array[idx];
3477 	kheap->kh_zstart = parent_heap->kh_zstart;
3478 	kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3479 		(uint32_t) early_random(), 0);
3480 	kheap->kh_views       = parent_heap->kh_views;
3481 	parent_heap->kh_views = kheap;
3482 }
3483 
3484 __startup_func
3485 void
kheap_startup_init(kalloc_heap_t kheap)3486 kheap_startup_init(kalloc_heap_t kheap)
3487 {
3488 	switch (kheap->kh_heap_id) {
3489 	case KHEAP_ID_DATA_BUFFERS:
3490 		kheap_init_data(kheap);
3491 		break;
3492 	case KHEAP_ID_DATA_SHARED:
3493 		kheap_init_data_shared(kheap);
3494 		break;
3495 	case KHEAP_ID_KT_VAR:
3496 		kheap_init_var(kheap);
3497 		break;
3498 	default:
3499 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3500 		    kheap->kh_heap_id);
3501 	}
3502 }
3503 
3504 #pragma mark IOKit/libkern helpers
3505 
3506 #if XNU_PLATFORM_MacOSX
3507 
3508 void *
3509 kern_os_malloc_external(size_t size);
3510 void *
kern_os_malloc_external(size_t size)3511 kern_os_malloc_external(size_t size)
3512 {
3513 	if (size == 0) {
3514 		return NULL;
3515 	}
3516 
3517 	return kheap_alloc(KERN_OS_MALLOC, size,
3518 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3519 }
3520 
3521 void
3522 kern_os_free_external(void *addr);
3523 void
kern_os_free_external(void * addr)3524 kern_os_free_external(void *addr)
3525 {
3526 	kheap_free_addr(KERN_OS_MALLOC, addr);
3527 }
3528 
3529 void *
3530 kern_os_realloc_external(void *addr, size_t nsize);
3531 void *
kern_os_realloc_external(void * addr,size_t nsize)3532 kern_os_realloc_external(void *addr, size_t nsize)
3533 {
3534 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3535 	vm_size_t osize, oob_offs = 0;
3536 
3537 	if (addr == NULL) {
3538 		return kern_os_malloc_external(nsize);
3539 	}
3540 
3541 	osize = zone_element_size(addr, NULL, false, &oob_offs);
3542 	if (osize == 0) {
3543 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3544 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3545 #if KASAN_CLASSIC
3546 	} else {
3547 		osize = kasan_user_size((vm_offset_t)addr);
3548 #endif
3549 	}
3550 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3551 }
3552 
3553 #endif /* XNU_PLATFORM_MacOSX */
3554 
3555 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3556 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3557 {
3558 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3559 #pragma unused(size)
3560 	zfree(zone, addr);
3561 #else
3562 	if (zone_owns(zone, addr)) {
3563 		zfree(zone, addr);
3564 	} else {
3565 		/*
3566 		 * Third party kexts might not know about the operator new
3567 		 * and be allocated from the default heap
3568 		 */
3569 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3570 		    zone->z_name);
3571 		kheap_free(KHEAP_DEFAULT, addr, size);
3572 	}
3573 #endif
3574 }
3575 
3576 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3577 IOMallocType_from_vm(kalloc_type_view_t ktv)
3578 {
3579 	return kalloc_type_from_vm(ktv->kt_flags);
3580 }
3581 
3582 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3583 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3584 {
3585 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3586 #pragma unused(esize)
3587 #else
3588 	/*
3589 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3590 	 * an allocation of an OSObject that is defined in xnu or first pary
3591 	 * kexts, by directly calling new will lead to using the default heap
3592 	 * as it will call OSObject_operator_new_external. If this object
3593 	 * is freed by xnu, it panics as xnu uses the typed free which
3594 	 * requires the object to have been allocated in a kalloc.type zone.
3595 	 * To workaround this issue, detect if the allocation being freed is
3596 	 * from the default heap and allow freeing to it.
3597 	 */
3598 	zone_id_t zid = zone_id_for_element(addr, esize);
3599 	if (__probable(zid < MAX_ZONES)) {
3600 		zone_security_flags_t zsflags = zone_security_array[zid];
3601 		if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3602 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3603 		}
3604 	}
3605 #endif
3606 	kfree_type_impl_external(ktv, addr);
3607 }
3608 
3609 #pragma mark tests
3610 #if DEBUG || DEVELOPMENT
3611 
3612 #include <sys/random.h>
3613 
3614 /*
3615  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3616  *
3617  * Note: Presence of zones with name kalloc.type* is used to
3618  * determine if the feature is on.
3619  */
3620 static int
kalloc_type_feature_on(void)3621 kalloc_type_feature_on(void)
3622 {
3623 	boolean_t zone_found = false;
3624 	const char kalloc_type_str[] = "kalloc.type";
3625 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3626 		zone_t z = kalloc_type_zarray[i];
3627 		while (z != NULL) {
3628 			zone_found = true;
3629 			if (strncmp(z->z_name, kalloc_type_str,
3630 			    strlen(kalloc_type_str)) != 0) {
3631 				return 0;
3632 			}
3633 			z = z->z_kt_next;
3634 		}
3635 	}
3636 
3637 	if (!zone_found) {
3638 		return 0;
3639 	}
3640 
3641 	return 1;
3642 }
3643 
3644 /*
3645  * Ensure that the policy uses the zone budget completely
3646  */
3647 static int
kalloc_type_test_policy(int64_t in)3648 kalloc_type_test_policy(int64_t in)
3649 {
3650 	uint16_t zone_budget = (uint16_t) in;
3651 	uint16_t max_bucket_freq = 25;
3652 	uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3653 	uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3654 	uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3655 	uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3656 	uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3657 	uint16_t wasted_zone_budget = 0, total_types = 0;
3658 	uint16_t n_zones = 0, n_zones_cal = 0;
3659 	int ret = 0;
3660 
3661 	/*
3662 	 * Need a minimum of 2 zones per size class
3663 	 */
3664 	if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3665 		return ret;
3666 	}
3667 	read_random((void *)&random[0], sizeof(random));
3668 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3669 		uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3670 		uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3671 
3672 		freq_list[i] = r1 > r2 ? r2 : r1;
3673 		freq_total_list[i] = r1 > r2 ? r1 : r2;
3674 	}
3675 	wasted_zone_budget = kalloc_type_apply_policy(
3676 		freq_list, freq_total_list,
3677 		zones_per_sig, zones_per_type, zone_budget);
3678 
3679 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3680 		total_types += freq_total_list[i];
3681 	}
3682 
3683 	n_zones = kmem_get_random16(total_types);
3684 	printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3685 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3686 		uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3687 		    freq_total_list[i], total_types,
3688 		    (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3689 
3690 		n_zones_cal += n_zones_for_type;
3691 
3692 		printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3693 	}
3694 	printf("-----------------------\n%u\t%u\n", total_types,
3695 	    n_zones_cal);
3696 
3697 	if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3698 		ret = 1;
3699 	}
3700 	return ret;
3701 }
3702 
3703 /*
3704  * Ensure that size of adopters of kalloc_type fit in the zone
3705  * they have been assigned.
3706  */
3707 static int
kalloc_type_check_size(zone_t z)3708 kalloc_type_check_size(zone_t z)
3709 {
3710 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3711 
3712 	while (kt_cur != NULL) {
3713 		if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3714 			return 0;
3715 		}
3716 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3717 	}
3718 
3719 	return 1;
3720 }
3721 
3722 struct test_kt_data {
3723 	int a;
3724 };
3725 
3726 static int
kalloc_type_test_data_redirect(void)3727 kalloc_type_test_data_redirect(void)
3728 {
3729 	struct kalloc_type_view ktv_data = {
3730 		.kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3731 		.kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3732 	};
3733 	if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3734 		printf("%s: data redirect failed\n", __func__);
3735 		return 0;
3736 	}
3737 	return 1;
3738 }
3739 
3740 static int
run_kalloc_type_test(int64_t in,int64_t * out)3741 run_kalloc_type_test(int64_t in, int64_t *out)
3742 {
3743 	*out = 0;
3744 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3745 		zone_t z = kalloc_type_zarray[i];
3746 		while (z != NULL) {
3747 			if (!kalloc_type_check_size(z)) {
3748 				printf("%s: size check failed\n", __func__);
3749 				return 0;
3750 			}
3751 			z = z->z_kt_next;
3752 		}
3753 	}
3754 
3755 	if (!kalloc_type_test_policy(in)) {
3756 		printf("%s: policy check failed\n", __func__);
3757 		return 0;
3758 	}
3759 
3760 	if (!kalloc_type_feature_on()) {
3761 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3762 		return 0;
3763 	}
3764 
3765 	if (!kalloc_type_test_data_redirect()) {
3766 		printf("%s: kalloc_type redirect for all data signature failed\n",
3767 		    __func__);
3768 		return 0;
3769 	}
3770 
3771 	printf("%s: test passed\n", __func__);
3772 
3773 	*out = 1;
3774 	return 0;
3775 }
3776 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3777 
3778 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3779 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3780 {
3781 	zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3782 
3783 	return z ? zone_elem_inner_size(z) : round_page(size);
3784 }
3785 
3786 static int
run_kalloc_test_kheap(kalloc_heap_t kheap)3787 run_kalloc_test_kheap(kalloc_heap_t kheap)
3788 {
3789 	uint64_t *data_ptr;
3790 	void *strippedp_old, *strippedp_new;
3791 	size_t alloc_size = 0, old_alloc_size = 0;
3792 	struct kalloc_result kr = {};
3793 
3794 	printf("%s: %s test running\n", __func__, kheap->kh_name);
3795 
3796 	/*
3797 	 * Test size 0: alloc, free, realloc
3798 	 */
3799 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3800 	    NULL).addr;
3801 	if (!data_ptr) {
3802 		printf("%s: kalloc 0 returned null\n", __func__);
3803 		return 1;
3804 	}
3805 	kheap_free(kheap, data_ptr, alloc_size);
3806 
3807 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3808 	    NULL).addr;
3809 	alloc_size = sizeof(uint64_t) + 1;
3810 	data_ptr = krealloc_ext(kheap, kr.addr, old_alloc_size,
3811 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3812 	if (!data_ptr) {
3813 		printf("%s: krealloc -> old size 0 failed\n", __func__);
3814 		return 1;
3815 	}
3816 	*data_ptr = 0;
3817 
3818 	/*
3819 	 * Test krealloc: same sizeclass, different size classes, 2pgs,
3820 	 * VM (with owner)
3821 	 */
3822 	old_alloc_size = alloc_size;
3823 	alloc_size++;
3824 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3825 	    Z_WAITOK | Z_NOFAIL, NULL);
3826 
3827 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3828 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3829 
3830 	if (!kr.addr || (strippedp_old != strippedp_new) ||
3831 	    (test_bucket_size(kheap, kr.size) !=
3832 	    test_bucket_size(kheap, old_alloc_size))) {
3833 		printf("%s: krealloc -> same size class failed\n", __func__);
3834 		return 1;
3835 	}
3836 	data_ptr = kr.addr;
3837 	*data_ptr = 0;
3838 
3839 	old_alloc_size = alloc_size;
3840 	alloc_size *= 2;
3841 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3842 	    Z_WAITOK | Z_NOFAIL, NULL);
3843 
3844 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3845 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3846 
3847 	if (!kr.addr || (strippedp_old == strippedp_new) ||
3848 	    (test_bucket_size(kheap, kr.size) ==
3849 	    test_bucket_size(kheap, old_alloc_size))) {
3850 		printf("%s: krealloc -> different size class failed\n", __func__);
3851 		return 1;
3852 	}
3853 	data_ptr = kr.addr;
3854 	*data_ptr = 0;
3855 
3856 	kheap_free(kheap, kr.addr, alloc_size);
3857 
3858 	alloc_size = 3544;
3859 	data_ptr = kalloc_ext(kheap, alloc_size,
3860 	    Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3861 	if (!data_ptr) {
3862 		printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3863 		    __func__);
3864 		return 1;
3865 	}
3866 	*data_ptr = 0;
3867 
3868 	data_ptr = krealloc_ext(kheap, data_ptr, alloc_size,
3869 	    PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3870 	if (!data_ptr) {
3871 		printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3872 		return 1;
3873 	}
3874 	*data_ptr = 0;
3875 
3876 	data_ptr = krealloc_ext(kheap, data_ptr, PAGE_SIZE * 2,
3877 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3878 	if (!data_ptr) {
3879 		printf("%s: krealloc -> VM1 returned not null\n", __func__);
3880 		return 1;
3881 	}
3882 	*data_ptr = 0;
3883 
3884 	data_ptr = krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 2,
3885 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3886 	*data_ptr = 0;
3887 	if (!data_ptr) {
3888 		printf("%s: krealloc -> VM2 returned not null\n", __func__);
3889 		return 1;
3890 	}
3891 
3892 	krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 4,
3893 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3894 
3895 	printf("%s: test passed\n", __func__);
3896 	return 0;
3897 }
3898 
3899 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3900 run_kalloc_test(int64_t in __unused, int64_t *out)
3901 {
3902 	*out = 1;
3903 
3904 	if (run_kalloc_test_kheap(KHEAP_DATA_BUFFERS) != 0 ||
3905 	    run_kalloc_test_kheap(KHEAP_DATA_SHARED) != 0) {
3906 		*out = 0;
3907 	}
3908 
3909 	return 0;
3910 }
3911 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3912 
3913 #endif /* DEBUG || DEVELOPMENT */
3914