xref: /xnu-12377.1.9/osfmk/kern/kalloc.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/kalloc.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	General kernel memory allocator.  This allocator is designed
64  *	to be used by the kernel to manage dynamic memory fast.
65  */
66 
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern_internal.h>
79 #include <vm/vm_object_xnu.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83 
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88 
89 
90 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
91 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
92 
93 #pragma mark initialization
94 
95 /*
96  * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
97  * sized zone.  This allocator is built on top of the zone allocator.  A zone
98  * is created for each potential size that we are willing to get in small
99  * blocks.
100  *
101  * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
102  */
103 
104 /*
105  * The kt_zone_cfg table defines the configuration of zones on various
106  * platforms for kalloc_type fixed size allocations.
107  */
108 
109 #if KASAN_CLASSIC
110 #define K_SIZE_CLASS(size)    \
111 	(((size) & PAGE_MASK) == 0 ? (size) : \
112 	((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
113 #else
114 #define K_SIZE_CLASS(size)    (size)
115 #endif
116 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
117 
118 static const uint16_t kt_zone_cfg[] = {
119 	K_SIZE_CLASS(16),
120 	K_SIZE_CLASS(32),
121 	K_SIZE_CLASS(48),
122 	K_SIZE_CLASS(64),
123 	K_SIZE_CLASS(80),
124 	K_SIZE_CLASS(96),
125 	K_SIZE_CLASS(128),
126 	K_SIZE_CLASS(160),
127 	K_SIZE_CLASS(192),
128 	K_SIZE_CLASS(224),
129 	K_SIZE_CLASS(256),
130 	K_SIZE_CLASS(288),
131 	K_SIZE_CLASS(368),
132 	K_SIZE_CLASS(400),
133 	K_SIZE_CLASS(512),
134 	K_SIZE_CLASS(576),
135 	K_SIZE_CLASS(768),
136 	K_SIZE_CLASS(1024),
137 	K_SIZE_CLASS(1152),
138 	K_SIZE_CLASS(1280),
139 	K_SIZE_CLASS(1664),
140 	K_SIZE_CLASS(2048),
141 	K_SIZE_CLASS(4096),
142 	K_SIZE_CLASS(6144),
143 	K_SIZE_CLASS(8192),
144 	K_SIZE_CLASS(12288),
145 	K_SIZE_CLASS(16384),
146 #if __arm64__
147 	K_SIZE_CLASS(24576),
148 	K_SIZE_CLASS(32768),
149 #endif /* __arm64__ */
150 };
151 
152 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
153 
154 /*
155  * kalloc_type callsites are assigned a zone during early boot. They
156  * use the dlut[] (direct lookup table), indexed by size normalized
157  * to the minimum alignment to find the right zone index quickly.
158  */
159 #define INDEX_ZDLUT(size)       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
160 #define KALLOC_DLUT_SIZE        (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
161 #define MAX_SIZE_ZDLUT          ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
162 static __startup_data uint8_t   kalloc_type_dlut[KALLOC_DLUT_SIZE];
163 static __startup_data uint32_t  kheap_zsize[KHEAP_NUM_ZONES];
164 
165 #if VM_TAG_SIZECLASSES
166 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
167 #endif
168 
169 const char * const kalloc_heap_names[] = {
170 	[KHEAP_ID_NONE]          = "",
171 	[KHEAP_ID_EARLY]         = "early.",
172 	[KHEAP_ID_DATA_BUFFERS]  = "data.",
173 	[KHEAP_ID_DATA_SHARED]   = "data_shared.",
174 	[KHEAP_ID_KT_VAR]        = "",
175 };
176 
177 /*
178  * Early heap configuration
179  */
180 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_EARLY[1] = {
181 	{
182 		.kh_name     = "early.kalloc",
183 		.kh_heap_id  = KHEAP_ID_EARLY,
184 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE,
185 	}
186 };
187 
188 /*
189  * Bag of bytes heap configuration
190  */
191 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
192 	{
193 		.kh_name     = "data.kalloc",
194 		.kh_heap_id  = KHEAP_ID_DATA_BUFFERS,
195 		.kh_tag      = VM_KERN_MEMORY_KALLOC_DATA,
196 	}
197 };
198 
199 /*
200  * Configuration of variable kalloc type heaps
201  */
202 SECURITY_READ_ONLY_LATE(struct kheap_info)
203 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
204 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
205 	{
206 		.kh_name     = "kalloc.type.var",
207 		.kh_heap_id  = KHEAP_ID_KT_VAR,
208 		.kh_tag      = VM_KERN_MEMORY_KALLOC_TYPE
209 	}
210 };
211 
212 /*
213  * Share heap configuration
214  */
215 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_SHARED[1] = {
216 	{
217 		.kh_name     = "data_shared.kalloc",
218 		.kh_heap_id  = KHEAP_ID_DATA_SHARED,
219 		.kh_tag      = VM_KERN_MEMORY_KALLOC_SHARED,
220 	}
221 };
222 
223 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
224 
225 __startup_func
226 static void
kalloc_zsize_compute(void)227 kalloc_zsize_compute(void)
228 {
229 	uint32_t step = KHEAP_STEP_START;
230 	uint32_t size = KHEAP_START_SIZE;
231 
232 	/*
233 	 * Manually initialize extra initial zones
234 	 */
235 	kheap_zsize[0] = size / 2;
236 	kheap_zsize[1] = size;
237 	static_assert(KHEAP_EXTRA_ZONES == 2);
238 
239 	/*
240 	 * Compute sizes for remaining zones
241 	 */
242 	for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
243 		uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
244 
245 		kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
246 		kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
247 
248 		step *= 2;
249 		size += step;
250 	}
251 }
252 
253 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)254 kalloc_zone_for_size_with_flags(
255 	zone_id_t               zid,
256 	vm_size_t               size,
257 	zalloc_flags_t          flags)
258 {
259 	vm_size_t max_size = KHEAP_MAX_SIZE;
260 	bool forcopyin = flags & Z_MAY_COPYINMAP;
261 	zone_t zone;
262 
263 	if (flags & Z_KALLOC_ARRAY) {
264 		size = roundup(size, KALLOC_ARRAY_GRANULE);
265 	}
266 
267 	if (forcopyin) {
268 #if __x86_64__
269 		/*
270 		 * On Intel, the OSData() ABI used to allocate
271 		 * from the kernel map starting at PAGE_SIZE.
272 		 *
273 		 * If only vm_map_copyin() or a wrapper is used,
274 		 * then everything will work fine because vm_map_copy_t
275 		 * will perform an actual copy if the data is smaller
276 		 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
277 		 *
278 		 * However, if anyone is trying to call mach_vm_remap(),
279 		 * then bad things (TM) happen.
280 		 *
281 		 * Avoid this by preserving the ABI and moving
282 		 * to kalloc_large() earlier.
283 		 *
284 		 * Any recent code really ought to use IOMemoryDescriptor
285 		 * for this purpose however.
286 		 */
287 		max_size = PAGE_SIZE - 1;
288 #endif
289 	}
290 
291 	if (size <= max_size) {
292 		uint32_t idx;
293 
294 		if (size <= KHEAP_START_SIZE) {
295 			zid  += (size > 16);
296 		} else {
297 			/*
298 			 * . log2down(size - 1) is log2up(size) - 1
299 			 * . (size - 1) >> (log2down(size - 1) - 1)
300 			 *   is either 0x2 or 0x3
301 			 */
302 			idx   = kalloc_log2down((uint32_t)(size - 1));
303 			zid  += KHEAP_EXTRA_ZONES +
304 			    2 * (idx - KHEAP_START_IDX) +
305 			    ((uint32_t)(size - 1) >> (idx - 1)) - 2;
306 		}
307 
308 		zone = zone_by_id(zid);
309 #if KASAN_CLASSIC
310 		/*
311 		 * Under kasan classic, certain size classes are a redzone
312 		 * away from the mathematical formula above, and we need
313 		 * to "go to the next zone".
314 		 *
315 		 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
316 		 * this will never go to an "invalid" zone that doesn't
317 		 * belong to the kheap.
318 		 */
319 		if (size > zone_elem_inner_size(zone)) {
320 			zone++;
321 		}
322 #endif
323 		return zone;
324 	}
325 
326 	return ZONE_NULL;
327 }
328 
329 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)330 kalloc_zone_for_size(zone_id_t zid, size_t size)
331 {
332 	return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
333 }
334 
335 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)336 kheap_size_from_zone(
337 	void                   *addr,
338 	vm_size_t               size,
339 	zalloc_flags_t          flags)
340 {
341 	vm_size_t max_size = KHEAP_MAX_SIZE;
342 	bool forcopyin = flags & Z_MAY_COPYINMAP;
343 
344 #if __x86_64__
345 	/*
346 	 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
347 	 * behavior, then the element could have a PAGE_SIZE reported size,
348 	 * yet still be from a zone for Z_MAY_COPYINMAP.
349 	 */
350 	if (forcopyin) {
351 		if (size == PAGE_SIZE &&
352 		    zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
353 			return true;
354 		}
355 
356 		max_size = PAGE_SIZE - 1;
357 	}
358 #else
359 #pragma unused(addr, forcopyin)
360 #endif
361 
362 	return size <= max_size;
363 }
364 
365 /*
366  * All data zones shouldn't use the early zone. Therefore set the no early alloc
367  * bit right after creation.
368  */
369 __startup_func
370 static void
kalloc_set_no_early_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)371 kalloc_set_no_early_for_data(
372 	zone_kheap_id_t       kheap_id,
373 	zone_stats_t          zstats)
374 {
375 	if (zone_is_data_kheap(kheap_id)) {
376 		zpercpu_foreach(zs, zstats) {
377 			os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
378 		}
379 	}
380 }
381 
382 __startup_func
383 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)384 kalloc_zone_init(
385 	const char           *kheap_name,
386 	zone_kheap_id_t       kheap_id,
387 	zone_id_t            *kheap_zstart,
388 	zone_create_flags_t   zc_flags)
389 {
390 	if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
391 		zc_flags |= ZC_DATA;
392 	}
393 
394 	if (kheap_id == KHEAP_ID_DATA_SHARED) {
395 		zc_flags |= ZC_SHARED_DATA;
396 	}
397 
398 	for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
399 		uint32_t size = kheap_zsize[i];
400 		char buf[MAX_ZONE_NAME], *z_name;
401 		int len;
402 
403 		len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
404 		z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
405 		strlcpy(z_name, buf, len + 1);
406 
407 		(void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
408 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
409 			uint32_t scale = kalloc_log2down(size / 32);
410 
411 			if (size == 32 << scale) {
412 			        z->z_array_size_class = scale;
413 			} else {
414 			        z->z_array_size_class = scale | 0x10;
415 			}
416 #endif
417 			zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
418 			if (i == 0) {
419 			        *kheap_zstart = zone_index(z);
420 			}
421 			kalloc_set_no_early_for_data(kheap_id, z->z_stats);
422 		});
423 	}
424 }
425 
426 __startup_func
427 static void
kalloc_heap_init(struct kalloc_heap * kheap)428 kalloc_heap_init(struct kalloc_heap *kheap)
429 {
430 	kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
431 	    ZC_NONE);
432 	/*
433 	 * Count all the "raw" views for zones in the heap.
434 	 */
435 	zone_view_count += KHEAP_NUM_ZONES;
436 }
437 
438 #define KEXT_ALIGN_SHIFT           6
439 #define KEXT_ALIGN_BYTES           (1<< KEXT_ALIGN_SHIFT)
440 #define KEXT_ALIGN_MASK            (KEXT_ALIGN_BYTES-1)
441 #define kt_scratch_size            (256ul << 10)
442 #define KALLOC_TYPE_SECTION(type) \
443 	(type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
444 
445 /*
446  * Enum to specify the kalloc_type variant being used.
447  */
448 __options_decl(kalloc_type_variant_t, uint16_t, {
449 	KTV_FIXED     = 0x0001,
450 	KTV_VAR       = 0x0002,
451 });
452 
453 /*
454  * Macros that generate the appropriate kalloc_type variant (i.e fixed or
455  * variable) of the desired variable/function.
456  */
457 #define kalloc_type_var(type, var)              \
458 	((type) == KTV_FIXED?                       \
459 	(vm_offset_t) kalloc_type_##var##_fixed:    \
460 	(vm_offset_t) kalloc_type_##var##_var)
461 #define kalloc_type_func(type, func, ...)       \
462 	((type) == KTV_FIXED?                       \
463 	kalloc_type_##func##_fixed(__VA_ARGS__):    \
464 	kalloc_type_##func##_var(__VA_ARGS__))
465 
466 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
467 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
468     ZSECURITY_CONFIG_KT_VAR_BUDGET);
469 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
470     ZSECURITY_CONFIG_KT_BUDGET);
471 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
472 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
473 
474 
475 /**
476  * @const kexts_enroll_data_shared
477  *
478  * @brief
479  * We have two heaps for data allocations:
480  *     - KHEAP_DATA_BUFFERS, which is for allocations that never shared.
481  *     - KHEAP_DATA_SHARED, which is for allocations that need to be shared.
482  *
483  * This is a control that indicates which heap we expose to kexts via the
484  * exported allocations functions.
485  */
486 STATIC_IF_KEY_DEFINE_TRUE(kexts_enroll_data_shared);
487 
488 /**
489  * @const restricted_data_mode
490  *
491  * @brief
492  * This is a control that sets the mode of mapping policies
493  * enforcement on data allocations:
494  *     - none: the state before the change (no telemetry, no enforcement).
495  *     - telemetry: do not enforce, do emit telemetry
496  *     - enforce: type the KHEAP_DATA_BUFFERS pages as restricted mappings.
497  *
498  * Combined with kexts_enroll_data_shared, we can create the modes we need
499  * for none/telemetry/enforcement on core kernel/kexts.
500  *
501  * restricted_data_mode_t is an enum used to specify the mode being used.
502  */
503 
504 __options_decl(restricted_data_mode_t, uint8_t, {
505 	RESTRICTED_DATA_MODE_NONE      = 0x0000,
506 	RESTRICTED_DATA_MODE_TELEMETRY = 0x0001,
507 	RESTRICTED_DATA_MODE_ENFORCE   = 0x0002
508 });
509 
510 TUNABLE(restricted_data_mode_t,
511     restricted_data_mode,
512     "restricted_data_mode",
513 #if __x86_64__
514     RESTRICTED_DATA_MODE_NONE
515 #else
516     RESTRICTED_DATA_MODE_TELEMETRY
517 #endif /* __x86_64__ */
518     );
519 
520 inline bool
kalloc_is_restricted_data_mode_telemetry(void)521 kalloc_is_restricted_data_mode_telemetry(void)
522 {
523 	return restricted_data_mode == RESTRICTED_DATA_MODE_TELEMETRY;
524 }
525 
526 inline bool
kalloc_is_restricted_data_mode_enforced(void)527 kalloc_is_restricted_data_mode_enforced(void)
528 {
529 	return restricted_data_mode == RESTRICTED_DATA_MODE_ENFORCE;
530 }
531 
532 inline bool
kmem_needs_data_share_range(void)533 kmem_needs_data_share_range(void)
534 {
535 	/*
536 	 * The dedicated range is required only for
537 	 * telemetry reporting, when we need to distinguish
538 	 * between the two kind of data via kmem ranges.
539 	 *
540 	 * Even though this is strictly like checking telemetry
541 	 * mode, it's better to have well-defined abstraction layer
542 	 * for that adopted in all the call-sites, to be flexible
543 	 * w.r.t future changes / unrolling.
544 	 */
545 	return kalloc_is_restricted_data_mode_telemetry();
546 }
547 
548 /*
549  * Section start/end for fixed kalloc_type views
550  */
551 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
552 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
553 
554 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
555 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
556 
557 /*
558  * Section start/end for variable kalloc_type views
559  */
560 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
561 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
562 
563 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
564 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
565 
566 __startup_data
567 static kalloc_type_views_t *kt_buffer = NULL;
568 __startup_data
569 static uint64_t kt_count;
570 __startup_data
571 uint32_t kalloc_type_hash_seed;
572 
573 __startup_data
574 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
575 __startup_data
576 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
577 
578 struct nzones_with_idx {
579 	uint16_t nzones;
580 	uint16_t idx;
581 };
582 int16_t zone_carry = 0;
583 
584 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
585     "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
586 
587 /*
588  * For use by lldb to iterate over kalloc types
589  */
590 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
591 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
592 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
593 
594 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
595 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
596     KMEM_DIRECTION_MASK),
597     "Insufficient bits to represent range and dir for VM allocations");
598 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
599     "validate idx mask");
600 /* qsort routines */
601 typedef int (*cmpfunc_t)(const void *a, const void *b);
602 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
603 
604 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)605 kalloc_type_get_idx(uint32_t kt_size)
606 {
607 	return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
608 }
609 
610 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)611 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
612 {
613 	return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
614 }
615 
616 static void
kalloc_type_build_dlut(void)617 kalloc_type_build_dlut(void)
618 {
619 	vm_size_t size = 0;
620 	for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
621 		uint8_t zindex = 0;
622 		while (kt_zone_cfg[zindex] < size) {
623 			zindex++;
624 		}
625 		kalloc_type_dlut[i] = zindex;
626 	}
627 }
628 
629 static uint32_t
kalloc_type_idx_for_size(uint32_t size)630 kalloc_type_idx_for_size(uint32_t size)
631 {
632 	assert(size <= KHEAP_MAX_SIZE);
633 	uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
634 	return kalloc_type_set_idx(size, idx);
635 }
636 
637 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t early_zone)638 kalloc_type_assign_zone_fixed(
639 	kalloc_type_view_t     *cur,
640 	kalloc_type_view_t     *end,
641 	zone_t                  z,
642 	zone_t                  sig_zone,
643 	zone_t                  early_zone)
644 {
645 	/*
646 	 * Assign the zone created for every kalloc_type_view
647 	 * of the same unique signature
648 	 */
649 	bool need_raw_view = false;
650 
651 	while (cur < end) {
652 		kalloc_type_view_t kt = *cur;
653 		struct zone_view *zv = &kt->kt_zv;
654 		zv->zv_zone = z;
655 		kalloc_type_flags_t kt_flags = kt->kt_flags;
656 		zone_security_flags_t zsflags = zone_security_config(z);
657 
658 		assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
659 		if (!early_zone) {
660 			assert(zone_is_data_kheap(zsflags.z_kheap_id));
661 		}
662 
663 		if (kt_flags & KT_SLID) {
664 			kt->kt_signature -= vm_kernel_slide;
665 			kt->kt_zv.zv_name -= vm_kernel_slide;
666 		}
667 
668 		if ((kt_flags & KT_PRIV_ACCT) ||
669 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
670 			zv->zv_stats = zalloc_percpu_permanent_type(
671 				struct zone_stats);
672 			need_raw_view = true;
673 			zone_view_count += 1;
674 		} else {
675 			zv->zv_stats = z->z_stats;
676 		}
677 
678 		if ((kt_flags & KT_NOEARLY) || !early_zone) {
679 			if ((kt_flags & KT_NOEARLY) && !(kt_flags & KT_PRIV_ACCT)) {
680 				panic("KT_NOEARLY used w/o private accounting for view %s",
681 				    zv->zv_name);
682 			}
683 
684 			zpercpu_foreach(zs, zv->zv_stats) {
685 				os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
686 			}
687 		}
688 
689 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
690 			kt->kt_zearly = early_zone;
691 			kt->kt_zsig = sig_zone;
692 			/*
693 			 * If we haven't yet set the signature equivalance then set it
694 			 * otherwise validate that the zone has the same signature equivalance
695 			 * as the sig_zone provided
696 			 */
697 			if (!zone_get_sig_eq(z)) {
698 				zone_set_sig_eq(z, zone_index(sig_zone));
699 			} else {
700 				assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
701 			}
702 		}
703 		zv->zv_next = (zone_view_t) z->z_views;
704 		zv->zv_zone->z_views = (zone_view_t) kt;
705 		cur++;
706 	}
707 	if (need_raw_view) {
708 		zone_view_count += 1;
709 	}
710 }
711 
712 __startup_func
713 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)714 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
715     kalloc_type_var_view_t *end, uint32_t heap_idx)
716 {
717 	struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
718 	while (cur < end) {
719 		kalloc_type_var_view_t kt = *cur;
720 		kt->kt_heap_start = cfg->kh_zstart;
721 		kalloc_type_flags_t kt_flags = kt->kt_flags;
722 
723 		if (kt_flags & KT_SLID) {
724 			if (kt->kt_sig_hdr) {
725 				kt->kt_sig_hdr -= vm_kernel_slide;
726 			}
727 			kt->kt_sig_type -= vm_kernel_slide;
728 			kt->kt_name -= vm_kernel_slide;
729 		}
730 
731 		if ((kt_flags & KT_PRIV_ACCT) ||
732 		    ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
733 			kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
734 			zone_view_count += 1;
735 		}
736 
737 		kt->kt_next = (zone_view_t) cfg->kt_views;
738 		cfg->kt_views = kt;
739 		cur++;
740 	}
741 }
742 
743 __startup_func
744 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)745 kalloc_type_slide_fixed(vm_offset_t addr)
746 {
747 	kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
748 	ktv->kt_signature += vm_kernel_slide;
749 	ktv->kt_zv.zv_name += vm_kernel_slide;
750 	ktv->kt_flags |= KT_SLID;
751 }
752 
753 __startup_func
754 static inline void
kalloc_type_slide_var(vm_offset_t addr)755 kalloc_type_slide_var(vm_offset_t addr)
756 {
757 	kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
758 	if (ktv->kt_sig_hdr) {
759 		ktv->kt_sig_hdr += vm_kernel_slide;
760 	}
761 	ktv->kt_sig_type += vm_kernel_slide;
762 	ktv->kt_name += vm_kernel_slide;
763 	ktv->kt_flags |= KT_SLID;
764 }
765 
766 __startup_func
767 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)768 kalloc_type_validate_flags(
769 	kalloc_type_flags_t   kt_flags,
770 	const char           *kt_name,
771 	uuid_string_t         kext_uuid)
772 {
773 	if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
774 		panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
775 		    "required xnu headers", kt_name, kext_uuid);
776 	}
777 }
778 
779 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)780 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
781 {
782 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
783 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
784 	return ktv->kt_flags;
785 }
786 
787 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)788 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
789 {
790 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
791 	kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
792 	return ktv->kt_flags;
793 }
794 
795 /*
796  * Check if signature of type is made up of only data and padding,
797  * which is meant to never be shared.
798  */
799 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)800 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
801 {
802 	assert(kt_flags & KT_CHANGED);
803 	return kt_flags & KT_DATA_ONLY;
804 }
805 
806 /*
807  * Check if signature of type is made up of only pointers
808  */
809 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)810 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
811 {
812 	assert(kt_flags & KT_CHANGED2);
813 	return kt_flags & KT_PTR_ARRAY;
814 }
815 
816 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)817 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
818 {
819 	assert(kt_flags & KT_CHANGED);
820 	return kt_flags & KT_VM;
821 }
822 
823 __startup_func
824 static inline vm_size_t
kalloc_type_view_sz_fixed(void)825 kalloc_type_view_sz_fixed(void)
826 {
827 	return sizeof(struct kalloc_type_view);
828 }
829 
830 __startup_func
831 static inline vm_size_t
kalloc_type_view_sz_var(void)832 kalloc_type_view_sz_var(void)
833 {
834 	return sizeof(struct kalloc_type_var_view);
835 }
836 
837 __startup_func
838 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)839 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
840     vm_offset_t end)
841 {
842 	return (end - start) / kalloc_type_func(type, view_sz);
843 }
844 
845 __startup_func
846 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)847 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
848 {
849 	buffer->ktv_fixed = (kalloc_type_view_t) ktv;
850 }
851 
852 __startup_func
853 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)854 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
855 {
856 	buffer->ktv_var = (kalloc_type_var_view_t) ktv;
857 }
858 
859 __startup_func
860 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)861 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
862 {
863 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
864 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
865 	    cur_data_view->kt_size);
866 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
867 	    NULL);
868 }
869 
870 __startup_func
871 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)872 kalloc_type_handle_data_view_var(vm_offset_t addr)
873 {
874 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
875 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
876 }
877 
878 __startup_func
879 static void
kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)880 kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)
881 {
882 	kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
883 	zone_t z = kalloc_zone_for_size(KHEAP_DATA_SHARED->kh_zstart,
884 	    cur_data_view->kt_size);
885 	kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
886 	    NULL);
887 }
888 
889 __startup_func
890 static void
kalloc_type_handle_data_shared_view_var(vm_offset_t addr)891 kalloc_type_handle_data_shared_view_var(vm_offset_t addr)
892 {
893 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
894 	kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_SHARED_HEAP);
895 }
896 
897 __startup_func
898 static uint32_t
kalloc_type_handle_parray_var(void)899 kalloc_type_handle_parray_var(void)
900 {
901 	uint32_t i = 0;
902 	kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
903 	const char *p_name = kt->kt_name;
904 
905 	/*
906 	 * The sorted list of variable kalloc_type_view has pointer arrays at the
907 	 * beginning. Walk through them and assign a random pointer heap to each
908 	 * type detected by typename.
909 	 */
910 	while (kalloc_type_is_ptr_array(kt->kt_flags)) {
911 		uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
912 		const char *c_name = kt->kt_name;
913 		uint32_t p_i = i;
914 
915 		while (strcmp(c_name, p_name) == 0) {
916 			i++;
917 			kt = kt_buffer[i].ktv_var;
918 			c_name = kt->kt_name;
919 		}
920 		p_name = c_name;
921 		kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
922 		    &kt_buffer[i].ktv_var, heap_id);
923 	}
924 
925 	/*
926 	 * Returns the the index of the first view that isn't a pointer array
927 	 */
928 	return i;
929 }
930 
931 __startup_func
932 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)933 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
934 {
935 	/*
936 	 * Limit range_id to ptr ranges
937 	 */
938 	uint32_t range_id = kmem_adjust_range_id(hash);
939 	uint32_t direction = hash & 0x8000;
940 	return (range_id | KMEM_HASH_SET | direction) << shift;
941 }
942 
943 __startup_func
944 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)945 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
946     kalloc_type_flags_t *kt_flags)
947 {
948 	uint32_t hash = 0;
949 
950 	assert(sig_ty != NULL);
951 	hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
952 	    kalloc_type_hash_seed);
953 	if (sig_hdr) {
954 		hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
955 	}
956 	os_hash_jenkins_finish(hash);
957 	hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
958 
959 	*kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
960 }
961 
962 __startup_func
963 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)964 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
965 {
966 	/*
967 	 * Use backtraces on fixed as we don't have signatures for types that go
968 	 * to the VM due to rdar://85182551.
969 	 */
970 	(void) addr;
971 }
972 
973 __startup_func
974 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)975 kalloc_type_set_type_hash_var(vm_offset_t addr)
976 {
977 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
978 	kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
979 	    &ktv->kt_flags);
980 }
981 
982 __startup_func
983 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)984 kalloc_type_mark_processed_fixed(vm_offset_t addr)
985 {
986 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
987 	ktv->kt_flags |= KT_PROCESSED;
988 }
989 
990 __startup_func
991 static void
kalloc_type_mark_processed_var(vm_offset_t addr)992 kalloc_type_mark_processed_var(vm_offset_t addr)
993 {
994 	kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
995 	ktv->kt_flags |= KT_PROCESSED;
996 }
997 
998 __startup_func
999 static void
kalloc_type_update_view_fixed(vm_offset_t addr)1000 kalloc_type_update_view_fixed(vm_offset_t addr)
1001 {
1002 	kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
1003 	ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
1004 }
1005 
1006 __startup_func
1007 static void
kalloc_type_update_view_var(vm_offset_t addr)1008 kalloc_type_update_view_var(vm_offset_t addr)
1009 {
1010 	(void) addr;
1011 }
1012 
1013 __startup_func
1014 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)1015 kalloc_type_view_copy(
1016 	const kalloc_type_variant_t   type,
1017 	vm_offset_t                   start,
1018 	vm_offset_t                   end,
1019 	uint64_t                     *cur_count,
1020 	bool                          slide,
1021 	uuid_string_t                 kext_uuid)
1022 {
1023 	uint64_t count = kalloc_type_view_count(type, start, end);
1024 	if (count + *cur_count >= kt_count) {
1025 		panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
1026 	}
1027 	vm_offset_t cur = start;
1028 	while (cur < end) {
1029 		if (slide) {
1030 			kalloc_type_func(type, slide, cur);
1031 		}
1032 		kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
1033 		    kext_uuid);
1034 		kalloc_type_func(type, mark_processed, cur);
1035 		/*
1036 		 * Skip views that go to the VM
1037 		 */
1038 		if (kalloc_type_from_vm(kt_flags)) {
1039 			cur += kalloc_type_func(type, view_sz);
1040 			continue;
1041 		}
1042 
1043 		/*
1044 		 * Check if the signature indicates that the entire allocation is data.
1045 		 *
1046 		 * Note that KT_VAR_DATA_HEAP is fake "data" heap, variable kalloc_type handles
1047 		 * the actual redirection in the entry points kalloc/kfree_type_var_impl.
1048 		 */
1049 		if (kalloc_type_is_data(kt_flags)) {
1050 			kalloc_type_func(type, handle_data_view, cur);
1051 			cur += kalloc_type_func(type, view_sz);
1052 			continue;
1053 		}
1054 
1055 		/*
1056 		 * Set type hash that is used by kmem_*_guard
1057 		 */
1058 		kalloc_type_func(type, set_type_hash, cur);
1059 		kalloc_type_func(type, update_view, cur);
1060 		kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
1061 		cur += kalloc_type_func(type, view_sz);
1062 		*cur_count = *cur_count + 1;
1063 	}
1064 }
1065 
1066 __startup_func
1067 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)1068 kalloc_type_view_parse(const kalloc_type_variant_t type)
1069 {
1070 	kc_format_t kc_format;
1071 	uint64_t cur_count = 0;
1072 
1073 	if (!PE_get_primary_kc_format(&kc_format)) {
1074 		panic("kalloc_type_view_parse: wasn't able to determine kc format");
1075 	}
1076 
1077 	if (kc_format == KCFormatStatic) {
1078 		/*
1079 		 * If kc is static or KCGEN, __kalloc_type sections from kexts and
1080 		 * xnu are coalesced.
1081 		 */
1082 		kalloc_type_view_copy(type,
1083 		    kalloc_type_var(type, sec_start),
1084 		    kalloc_type_var(type, sec_end),
1085 		    &cur_count, false, NULL);
1086 	} else if (kc_format == KCFormatFileset) {
1087 		/*
1088 		 * If kc uses filesets, traverse __kalloc_type section for each
1089 		 * macho in the BootKC.
1090 		 */
1091 		kernel_mach_header_t *kc_mh = NULL;
1092 		kernel_mach_header_t *kext_mh = NULL;
1093 
1094 		kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1095 		struct load_command *lc =
1096 		    (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1097 		for (uint32_t i = 0; i < kc_mh->ncmds;
1098 		    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1099 			if (lc->cmd != LC_FILESET_ENTRY) {
1100 				continue;
1101 			}
1102 			struct fileset_entry_command *fse =
1103 			    (struct fileset_entry_command *)(vm_offset_t)lc;
1104 			kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1105 			kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1106 				kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1107 			if (sect != NULL) {
1108 				unsigned long uuidlen = 0;
1109 				void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
1110 				uuid_string_t kext_uuid_str;
1111 				if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
1112 					uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1113 				}
1114 				kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1115 				    &cur_count, false, kext_uuid_str);
1116 			}
1117 		}
1118 	} else if (kc_format == KCFormatKCGEN) {
1119 		/*
1120 		 * Parse __kalloc_type section from xnu
1121 		 */
1122 		kalloc_type_view_copy(type,
1123 		    kalloc_type_var(type, sec_start),
1124 		    kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1125 
1126 #ifndef __BUILDING_XNU_LIB_UNITTEST__ /* no kexts in unit-test */
1127 		/*
1128 		 * Parse __kalloc_type section for kexts
1129 		 *
1130 		 * Note: We don't process the kalloc_type_views for kexts on armv7
1131 		 * as this platform has insufficient memory for type based
1132 		 * segregation. kalloc_type_impl_external will direct callsites
1133 		 * based on their size.
1134 		 */
1135 		kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1136 		vm_offset_t cur = 0;
1137 		vm_offset_t end = 0;
1138 
1139 		/*
1140 		 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1141 		 * and traverse it.
1142 		 */
1143 		kernel_section_t *prelink_sect = getsectbynamefromheader(
1144 			xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1145 		assert(prelink_sect);
1146 		cur = prelink_sect->addr;
1147 		end = prelink_sect->addr + prelink_sect->size;
1148 
1149 		while (cur < end) {
1150 			uint64_t kext_text_sz = 0;
1151 			kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1152 
1153 			if (kext_mh->magic == 0) {
1154 				/*
1155 				 * Assert that we have processed all kexts and all that is left
1156 				 * is padding
1157 				 */
1158 				assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1159 				break;
1160 			} else if (kext_mh->magic != MH_MAGIC_64 &&
1161 			    kext_mh->magic != MH_CIGAM_64) {
1162 				panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1163 				    cur);
1164 			}
1165 
1166 			/*
1167 			 * Kext macho found, iterate through its segments
1168 			 */
1169 			struct load_command *lc =
1170 			    (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1171 			bool isSplitKext = false;
1172 
1173 			for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1174 			    i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1175 				if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1176 					isSplitKext = true;
1177 					continue;
1178 				} else if (lc->cmd != LC_SEGMENT_64) {
1179 					continue;
1180 				}
1181 
1182 				kernel_segment_command_t *seg_cmd =
1183 				    (struct segment_command_64 *)(vm_offset_t)lc;
1184 				/*
1185 				 * Parse kalloc_type section
1186 				 */
1187 				if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1188 					kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1189 					    KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1190 					if (kt_sect) {
1191 						kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1192 						    kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1193 						    true, NULL);
1194 					}
1195 				}
1196 				/*
1197 				 * If the kext has a __TEXT segment, that is the only thing that
1198 				 * will be in the special __PRELINK_TEXT KC segment, so the next
1199 				 * macho is right after.
1200 				 */
1201 				if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1202 					kext_text_sz = seg_cmd->filesize;
1203 				}
1204 			}
1205 			/*
1206 			 * If the kext did not have a __TEXT segment (special xnu kexts with
1207 			 * only a __LINKEDIT segment) then the next macho will be after all the
1208 			 * header commands.
1209 			 */
1210 			if (!kext_text_sz) {
1211 				kext_text_sz = kext_mh->sizeofcmds;
1212 			} else if (!isSplitKext) {
1213 				panic("kalloc_type_view_parse: No support for non-split seg KCs");
1214 				break;
1215 			}
1216 
1217 			cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1218 		}
1219 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1220 	} else {
1221 		/*
1222 		 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1223 		 * parsing kalloc_type_view structs during startup.
1224 		 */
1225 		panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1226 		    " for kc_format = %d\n", kc_format);
1227 	}
1228 	return cur_count;
1229 }
1230 
1231 __startup_func
1232 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1233 kalloc_type_cmp_fixed(const void *a, const void *b)
1234 {
1235 	const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1236 	const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1237 
1238 	const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1239 	const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1240 	/*
1241 	 * If the kalloc_type_views are in the same kalloc bucket, sort by
1242 	 * signature else sort by size
1243 	 */
1244 	if (idxA == idxB) {
1245 		int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1246 		/*
1247 		 * If the kalloc_type_views have the same signature sort by site
1248 		 * name
1249 		 */
1250 		if (result == 0) {
1251 			return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1252 		}
1253 		return result;
1254 	}
1255 	const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1256 	const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1257 	return (int)(sizeA - sizeB);
1258 }
1259 
1260 __startup_func
1261 static int
kalloc_type_cmp_var(const void * a,const void * b)1262 kalloc_type_cmp_var(const void *a, const void *b)
1263 {
1264 	const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1265 	const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1266 	const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1267 	const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1268 	bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1269 	bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1270 	int result = 0;
1271 
1272 	/*
1273 	 * Switched around (B - A) because we want the pointer arrays to be at the
1274 	 * top
1275 	 */
1276 	result = ktB_ptrArray - ktA_ptrArray;
1277 	if (result == 0) {
1278 		result = strcmp(ktA_hdr, ktB_hdr);
1279 		if (result == 0) {
1280 			result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1281 			if (result == 0) {
1282 				result = strcmp(ktA->kt_name, ktB->kt_name);
1283 			}
1284 		}
1285 	}
1286 	return result;
1287 }
1288 
1289 __startup_func
1290 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1291 kalloc_type_create_iterators_fixed(
1292 	uint16_t           *kt_skip_list_start,
1293 	uint64_t            count)
1294 {
1295 	uint16_t *kt_skip_list = kt_skip_list_start;
1296 	uint16_t p_idx = UINT16_MAX; /* previous size idx */
1297 	uint16_t c_idx = 0; /* current size idx */
1298 	uint16_t unique_sig = 0;
1299 	uint16_t total_sig = 0;
1300 	const char *p_sig = NULL;
1301 	const char *p_name = "";
1302 	const char *c_sig = NULL;
1303 	const char *c_name = NULL;
1304 
1305 	/*
1306 	 * Walk over each kalloc_type_view
1307 	 */
1308 	for (uint16_t i = 0; i < count; i++) {
1309 		kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1310 
1311 		c_idx = kalloc_type_get_idx(kt->kt_size);
1312 		c_sig = kt->kt_signature;
1313 		c_name = kt->kt_zv.zv_name;
1314 		/*
1315 		 * When current kalloc_type_view is in a different kalloc size
1316 		 * bucket than the previous, it means we have processed all in
1317 		 * the previous size bucket, so store the accumulated values
1318 		 * and advance the indices.
1319 		 */
1320 		if (p_idx == UINT16_MAX || c_idx != p_idx) {
1321 			/*
1322 			 * Updates for frequency lists
1323 			 */
1324 			if (p_idx != UINT16_MAX) {
1325 				kt_freq_list[p_idx] = unique_sig;
1326 				kt_freq_list_total[p_idx] = total_sig - unique_sig;
1327 			}
1328 			unique_sig = 1;
1329 			total_sig = 1;
1330 
1331 			p_idx = c_idx;
1332 			p_sig = c_sig;
1333 			p_name = c_name;
1334 
1335 			/*
1336 			 * Updates to signature skip list
1337 			 */
1338 			*kt_skip_list = i;
1339 			kt_skip_list++;
1340 
1341 			continue;
1342 		}
1343 
1344 		/*
1345 		 * When current kalloc_type_views is in the kalloc size bucket as
1346 		 * previous, analyze the siganture to see if it is unique.
1347 		 *
1348 		 * Signatures are collapsible if one is a substring of the next.
1349 		 */
1350 		if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1351 			/*
1352 			 * Unique signature detected. Update counts and advance index
1353 			 */
1354 			unique_sig++;
1355 			total_sig++;
1356 
1357 			*kt_skip_list = i;
1358 			kt_skip_list++;
1359 			p_sig = c_sig;
1360 			p_name = c_name;
1361 			continue;
1362 		}
1363 		/*
1364 		 * Need this here as we do substring matching for signatures so you
1365 		 * want to track the longer signature seen rather than the substring
1366 		 */
1367 		p_sig = c_sig;
1368 
1369 		/*
1370 		 * Check if current kalloc_type_view corresponds to a new type
1371 		 */
1372 		if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1373 			total_sig++;
1374 			p_name = c_name;
1375 		}
1376 	}
1377 	/*
1378 	 * Final update
1379 	 */
1380 	assert(c_idx == p_idx);
1381 	assert(kt_freq_list[c_idx] == 0);
1382 	kt_freq_list[c_idx] = unique_sig;
1383 	kt_freq_list_total[c_idx] = total_sig - unique_sig;
1384 	*kt_skip_list = (uint16_t) count;
1385 
1386 	return ++kt_skip_list;
1387 }
1388 
1389 __startup_func
1390 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1391 kalloc_type_create_iterators_var(
1392 	uint32_t           *kt_skip_list_start,
1393 	uint32_t            buf_start)
1394 {
1395 	uint32_t *kt_skip_list = kt_skip_list_start;
1396 	uint32_t n = 0;
1397 
1398 	kt_skip_list[n] = buf_start;
1399 	assert(kt_count > buf_start + 1);
1400 	for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1401 		kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1402 		kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1403 		const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1404 		const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1405 		assert(ktA->kt_sig_type != NULL);
1406 		assert(ktB->kt_sig_type != NULL);
1407 		if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1408 		    strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1409 			n++;
1410 			kt_skip_list[n] = i;
1411 		}
1412 	}
1413 	/*
1414 	 * Final update
1415 	 */
1416 	n++;
1417 	kt_skip_list[n] = (uint32_t) kt_count;
1418 	return n;
1419 }
1420 
1421 __startup_func
1422 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1423 kalloc_type_distribute_budget(
1424 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1425 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1426 	uint16_t            zone_budget,
1427 	uint16_t            min_zones_per_size)
1428 {
1429 	uint16_t total_sig = 0;
1430 	uint16_t min_sig = 0;
1431 	uint16_t assigned_zones = 0;
1432 	uint16_t remaining_zones = zone_budget;
1433 	uint16_t modulo = 0;
1434 
1435 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1436 		uint16_t sig_freq = freq_list[i];
1437 		uint16_t min_zones = min_zones_per_size;
1438 
1439 		if (sig_freq < min_zones_per_size) {
1440 			min_zones = sig_freq;
1441 		}
1442 		total_sig += sig_freq;
1443 		kt_zones[i] = min_zones;
1444 		min_sig += min_zones;
1445 	}
1446 	if (remaining_zones > total_sig) {
1447 		remaining_zones = total_sig;
1448 	}
1449 	assert(remaining_zones >= min_sig);
1450 	remaining_zones -= min_sig;
1451 	total_sig -= min_sig;
1452 	assigned_zones += min_sig;
1453 
1454 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1455 		uint16_t freq = freq_list[i];
1456 
1457 		if (freq < min_zones_per_size) {
1458 			continue;
1459 		}
1460 		uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1461 		uint16_t n_zones = (uint16_t) numer / total_sig;
1462 
1463 		/*
1464 		 * Accumulate remainder and increment n_zones when it goes above
1465 		 * denominator
1466 		 */
1467 		modulo += numer % total_sig;
1468 		if (modulo >= total_sig) {
1469 			n_zones++;
1470 			modulo -= total_sig;
1471 		}
1472 
1473 		/*
1474 		 * Cap the total number of zones to the unique signatures
1475 		 */
1476 		if ((n_zones + min_zones_per_size) > freq) {
1477 			uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1478 			modulo += (extra_zones * total_sig);
1479 			n_zones -= extra_zones;
1480 		}
1481 		kt_zones[i] += n_zones;
1482 		assigned_zones += n_zones;
1483 	}
1484 
1485 	if (kt_options & KT_OPTIONS_DEBUG) {
1486 		printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1487 		    assigned_zones, remaining_zones + min_sig - assigned_zones);
1488 	}
1489 	return remaining_zones + min_sig - assigned_zones;
1490 }
1491 
1492 __startup_func
1493 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1494 kalloc_type_cmp_type_zones(const void *a, const void *b)
1495 {
1496 	const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1497 	const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1498 
1499 	return (int)(B.nzones - A.nzones);
1500 }
1501 
1502 __startup_func
1503 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1504 kalloc_type_redistribute_budget(
1505 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1506 	uint16_t            kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1507 {
1508 	uint16_t count = 0, cur_count = 0;
1509 	struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1510 	uint16_t top_zone_total = 0;
1511 
1512 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1513 		uint16_t zones = kt_zones[i];
1514 
1515 		/*
1516 		 * If a sizeclass got no zones but has types to divide make a note
1517 		 * of it
1518 		 */
1519 		if (zones == 0 && (freq_total_list[i] != 0)) {
1520 			count++;
1521 		}
1522 
1523 		sorted_zones[i].nzones = kt_zones[i];
1524 		sorted_zones[i].idx = i;
1525 	}
1526 
1527 	qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1528 	    sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1529 
1530 	for (uint16_t i = 0; i < 3; i++) {
1531 		top_zone_total += sorted_zones[i].nzones;
1532 	}
1533 
1534 	/*
1535 	 * Borrow zones from the top 3 sizeclasses and redistribute to those
1536 	 * that didn't get a zone but that types to divide
1537 	 */
1538 	cur_count = count;
1539 	for (uint16_t i = 0; i < 3; i++) {
1540 		uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1541 		uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1542 
1543 		if (zone_borrow > (zone_available / 2)) {
1544 			zone_borrow = zone_available / 2;
1545 		}
1546 		kt_zones[sorted_zones[i].idx] -= zone_borrow;
1547 		cur_count -= zone_borrow;
1548 	}
1549 
1550 	for (uint16_t i = 0; i < 3; i++) {
1551 		if (cur_count == 0) {
1552 			break;
1553 		}
1554 		kt_zones[sorted_zones[i].idx]--;
1555 		cur_count--;
1556 	}
1557 
1558 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1559 		if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1560 		    (count > cur_count)) {
1561 			kt_zones[i]++;
1562 			count--;
1563 		}
1564 	}
1565 }
1566 
1567 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1568 kalloc_type_apply_policy(
1569 	uint16_t            freq_list[MAX_K_ZONE(kt_zone_cfg)],
1570 	uint16_t            freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1571 	uint16_t            kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1572 	uint16_t            kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1573 	uint16_t            zone_budget)
1574 {
1575 	uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1576 	uint16_t zbudget_type = zone_budget - zbudget_sig;
1577 	uint16_t wasted_zones = 0;
1578 
1579 #if DEBUG || DEVELOPMENT
1580 	if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1581 		__assert_only uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1582 		assert(zone_budget + current_zones <= MAX_ZONES);
1583 	}
1584 #endif
1585 
1586 	wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1587 	    zbudget_sig, 2);
1588 	wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1589 	    kt_zones_type, zbudget_type, 0);
1590 	kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1591 
1592 	/*
1593 	 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1594 	 */
1595 	if (kt_options & KT_OPTIONS_DEBUG) {
1596 		printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1597 		    "zones_type\n");
1598 		for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1599 			printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1600 			    freq_total_list[i] + freq_list[i], freq_list[i],
1601 			    kt_zones_sig[i] + kt_zones_type[i],
1602 			    kt_zones_sig[i], kt_zones_type[i]);
1603 		}
1604 	}
1605 
1606 	return wasted_zones;
1607 }
1608 
1609 
1610 __startup_func
1611 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1612 kalloc_type_create_zone_for_size(
1613 	zone_t             *kt_zones_for_size,
1614 	uint16_t            kt_zones,
1615 	vm_size_t           z_size)
1616 {
1617 	zone_t p_zone = NULL;
1618 	char *z_name = NULL;
1619 	zone_t shared_z = NULL;
1620 
1621 	for (uint16_t i = 0; i < kt_zones; i++) {
1622 		z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1623 		snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1624 		    (size_t) z_size);
1625 		zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1626 		if (i != 0) {
1627 			p_zone->z_kt_next = z;
1628 		}
1629 		p_zone = z;
1630 		kt_zones_for_size[i] = z;
1631 	}
1632 	/*
1633 	 * Create shared zone for sizeclass if it doesn't already exist
1634 	 */
1635 	if (kt_shared_fixed) {
1636 		shared_z = kalloc_zone_for_size(KHEAP_EARLY->kh_zstart, z_size);
1637 		if (zone_elem_inner_size(shared_z) != z_size) {
1638 			z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1639 			snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1640 			    (size_t) z_size);
1641 			shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1642 			    ^(zone_t zone){
1643 				zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_EARLY;
1644 			});
1645 		}
1646 	}
1647 	kt_zones_for_size[kt_zones] = shared_z;
1648 }
1649 
1650 __startup_func
1651 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1652 kalloc_type_zones_for_type(
1653 	uint16_t            zones_total_type,
1654 	uint16_t            unique_types,
1655 	uint16_t            total_types,
1656 	bool                last_sig)
1657 {
1658 	uint16_t zones_for_type = 0, n_mod = 0;
1659 
1660 	if (zones_total_type == 0) {
1661 		return 0;
1662 	}
1663 
1664 	zones_for_type = (zones_total_type * unique_types) / total_types;
1665 	n_mod = (zones_total_type * unique_types) % total_types;
1666 	zone_carry += n_mod;
1667 
1668 	/*
1669 	 * Drain carry opportunistically
1670 	 */
1671 	if (((unique_types > 3) && (zone_carry > 0)) ||
1672 	    (zone_carry >= (int) total_types) ||
1673 	    (last_sig && (zone_carry > 0))) {
1674 		zone_carry -= total_types;
1675 		zones_for_type++;
1676 	}
1677 
1678 	if (last_sig) {
1679 		assert(zone_carry == 0);
1680 	}
1681 
1682 	return zones_for_type;
1683 }
1684 
1685 __startup_func
1686 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1687 kalloc_type_build_skip_list(
1688 	kalloc_type_view_t     *start,
1689 	kalloc_type_view_t     *end,
1690 	uint16_t               *kt_skip_list)
1691 {
1692 	kalloc_type_view_t *cur = start;
1693 	kalloc_type_view_t prev = *start;
1694 	uint16_t i = 0, idx = 0;
1695 
1696 	kt_skip_list[idx] = i;
1697 	idx++;
1698 
1699 	while (cur < end) {
1700 		kalloc_type_view_t kt_cur = *cur;
1701 
1702 		if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1703 			kt_skip_list[idx] = i;
1704 
1705 			prev = kt_cur;
1706 			idx++;
1707 		}
1708 		i++;
1709 		cur++;
1710 	}
1711 
1712 	/*
1713 	 * Final update
1714 	 */
1715 	kt_skip_list[idx] = i;
1716 	return idx;
1717 }
1718 
1719 __startup_func
1720 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1721 kalloc_type_init_sig_eq(
1722 	zone_t             *zones,
1723 	uint16_t            n_zones,
1724 	zone_t              sig_zone)
1725 {
1726 	for (uint16_t i = 0; i < n_zones; i++) {
1727 		zone_t z = zones[i];
1728 
1729 		assert(!zone_get_sig_eq(z));
1730 		zone_set_sig_eq(z, zone_index(sig_zone));
1731 	}
1732 }
1733 
1734 #ifndef __BUILDING_XNU_LIB_UNITTEST__
1735 #define KT_ZONES_FOR_SIZE_SIZE 32
1736 #else /* __BUILDING_XNU_LIB_UNITTEST__ */
1737 /* different init sequence in unit-test requires a bigger buffer in the kalloc zones initialization */
1738 #define KT_ZONES_FOR_SIZE_SIZE 35
1739 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1740 
1741 __startup_func
1742 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],uint16_t type_zones_start,zone_t sig_zone,zone_t early_zone)1743 kalloc_type_distribute_zone_for_type(
1744 	kalloc_type_view_t *start,
1745 	kalloc_type_view_t *end,
1746 	bool                last_sig,
1747 	uint16_t            zones_total_type,
1748 	uint16_t            total_types,
1749 	uint16_t           *kt_skip_list,
1750 	zone_t              kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],
1751 	uint16_t            type_zones_start,
1752 	zone_t              sig_zone,
1753 	zone_t              early_zone)
1754 {
1755 	uint16_t count = 0, n_zones = 0;
1756 	uint16_t *shuffle_buf = NULL;
1757 	zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1758 
1759 	/*
1760 	 * Assert there is space in buffer
1761 	 */
1762 	count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1763 	n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1764 	    last_sig);
1765 	shuffle_buf = &kt_skip_list[count + 1];
1766 
1767 	/*
1768 	 * Initalize signature equivalence zone for type zones
1769 	 */
1770 	kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1771 
1772 	if (n_zones == 0) {
1773 		kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1774 		    early_zone);
1775 		return n_zones;
1776 	}
1777 
1778 	/*
1779 	 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1780 	 */
1781 	if (count == 1) {
1782 		kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1783 		    early_zone);
1784 		return n_zones;
1785 	}
1786 
1787 	/*
1788 	 * Add the signature based zone to n_zones
1789 	 */
1790 	n_zones++;
1791 
1792 	for (uint16_t i = 0; i < count; i++) {
1793 		uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1794 		uint16_t type_start = kt_skip_list[i];
1795 		kalloc_type_view_t *kt_type_start = &start[type_start];
1796 		uint16_t type_end = kt_skip_list[i + 1];
1797 		kalloc_type_view_t *kt_type_end = &start[type_end];
1798 		zone_t zone;
1799 
1800 		if (zidx == 0) {
1801 			kmem_shuffle(shuffle_buf, n_zones);
1802 		}
1803 
1804 		shuffled_zidx = shuffle_buf[zidx];
1805 		zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1806 		kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1807 		    early_zone);
1808 	}
1809 
1810 	return n_zones - 1;
1811 }
1812 
1813 __startup_func
1814 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1815 kalloc_type_create_zones_fixed(
1816 	uint16_t           *kt_skip_list_start,
1817 	uint16_t           *kt_shuffle_buf)
1818 {
1819 	uint16_t *kt_skip_list = kt_skip_list_start;
1820 	uint16_t p_j = 0;
1821 	uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1822 	uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1823 #if DEBUG || DEVELOPMENT
1824 	__assert_only uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1825 	    (vm_address_t) kt_buffer) / sizeof(uint16_t);
1826 #endif
1827 	/*
1828 	 * Apply policy to determine how many zones to create for each size
1829 	 * class.
1830 	 */
1831 	kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1832 	    kt_zones_sig, kt_zones_type, kt_fixed_zones);
1833 
1834 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1835 		uint16_t n_unique_sig = kt_freq_list[i];
1836 		vm_size_t z_size = kt_zone_cfg[i];
1837 		uint16_t n_zones_sig = kt_zones_sig[i];
1838 		uint16_t n_zones_type = kt_zones_type[i];
1839 		uint16_t total_types = kt_freq_list_total[i];
1840 		uint16_t type_zones_used = 0;
1841 
1842 		if (n_unique_sig == 0) {
1843 			continue;
1844 		}
1845 
1846 		zone_carry = 0;
1847 		assert(n_zones_sig + n_zones_type + 1 <= KT_ZONES_FOR_SIZE_SIZE);
1848 		zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE] = {};
1849 		kalloc_type_create_zone_for_size(kt_zones_for_size,
1850 		    n_zones_sig + n_zones_type, z_size);
1851 
1852 		kalloc_type_zarray[i] = kt_zones_for_size[0];
1853 		/*
1854 		 * Ensure that there is enough space to shuffle n_unique_sig
1855 		 * indices
1856 		 */
1857 		assert(n_unique_sig < kt_shuffle_count);
1858 
1859 		/*
1860 		 * Get a shuffled set of signature indices
1861 		 */
1862 		*kt_shuffle_buf = 0;
1863 		if (n_unique_sig > 1) {
1864 			kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1865 		}
1866 
1867 		for (uint16_t j = 0; j < n_zones_sig; j++) {
1868 			zone_t *z_ptr = &kt_zones_for_size[j];
1869 
1870 			kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1871 		}
1872 
1873 		for (uint16_t j = 0; j < n_unique_sig; j++) {
1874 			/*
1875 			 * For every size that has unique types
1876 			 */
1877 			uint16_t shuffle_idx = kt_shuffle_buf[j];
1878 			uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1879 			uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1880 			zone_t zone = kt_zones_for_size[j % n_zones_sig];
1881 			zone_t early_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1882 			bool last_sig;
1883 
1884 			last_sig = (j == (n_unique_sig - 1)) ? true : false;
1885 			type_zones_used += kalloc_type_distribute_zone_for_type(
1886 				&kt_buffer[cur].ktv_fixed,
1887 				&kt_buffer[end].ktv_fixed, last_sig,
1888 				n_zones_type, total_types + n_unique_sig,
1889 				&kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1890 				n_zones_sig + type_zones_used, zone, early_zone);
1891 		}
1892 		assert(type_zones_used <= n_zones_type);
1893 		p_j += n_unique_sig;
1894 	}
1895 }
1896 
1897 __startup_func
1898 static void
kalloc_type_view_init_fixed(void)1899 kalloc_type_view_init_fixed(void)
1900 {
1901 	kalloc_type_hash_seed = (uint32_t) early_random();
1902 	kalloc_type_build_dlut();
1903 	/*
1904 	 * Parse __kalloc_type sections and build array of pointers to
1905 	 * all kalloc type views in kt_buffer.
1906 	 */
1907 	kt_count = kalloc_type_view_parse(KTV_FIXED);
1908 	assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1909 
1910 #if MACH_ASSERT
1911 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1912 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1913 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1914 #endif
1915 
1916 	/*
1917 	 * Sort based on size class and signature
1918 	 */
1919 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1920 	    kalloc_type_cmp_fixed);
1921 
1922 	/*
1923 	 * Build a skip list that holds starts of unique signatures and a
1924 	 * frequency list of number of unique and total signatures per kalloc
1925 	 * size class
1926 	 */
1927 	uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1928 	uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1929 		kt_skip_list_start, kt_count);
1930 
1931 	/*
1932 	 * Create zones based on signatures
1933 	 */
1934 	kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1935 }
1936 
1937 __startup_func
1938 static void
kalloc_type_heap_init(void)1939 kalloc_type_heap_init(void)
1940 {
1941 	assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1942 	char kh_name[MAX_ZONE_NAME];
1943 	uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1944 
1945 	for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1946 		snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1947 		kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1948 		    &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1949 	}
1950 	/*
1951 	 * All variable kalloc type allocations are collapsed into a single
1952 	 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1953 	 */
1954 	KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1955 	zone_view_count += 1;
1956 }
1957 
1958 __startup_func
1959 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1960 kalloc_type_assign_heap(
1961 	uint32_t            start,
1962 	uint32_t            end,
1963 	uint32_t            heap_id)
1964 {
1965 	bool use_split = kmem_get_random16(1);
1966 
1967 	if (use_split) {
1968 		heap_id = kt_var_heaps;
1969 	}
1970 	kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1971 	    &kt_buffer[end].ktv_var, heap_id);
1972 }
1973 
1974 __startup_func
1975 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1976 kalloc_type_split_heap(
1977 	uint32_t            start,
1978 	uint32_t            end,
1979 	uint32_t            heap_id)
1980 {
1981 	uint32_t count = start;
1982 	const char *p_name = NULL;
1983 
1984 	while (count < end) {
1985 		kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1986 		const char *c_name = cur->kt_name;
1987 
1988 		if (!p_name) {
1989 			assert(count == start);
1990 			p_name = c_name;
1991 		}
1992 		if (strcmp(c_name, p_name) != 0) {
1993 			kalloc_type_assign_heap(start, count, heap_id);
1994 			start = count;
1995 			p_name = c_name;
1996 		}
1997 		count++;
1998 	}
1999 	kalloc_type_assign_heap(start, end, heap_id);
2000 }
2001 
2002 __startup_func
2003 static void
kalloc_type_view_init_var(void)2004 kalloc_type_view_init_var(void)
2005 {
2006 	uint32_t buf_start = 0, unique_sig = 0;
2007 	uint32_t *kt_skip_list_start;
2008 	uint16_t *shuffle_buf;
2009 	uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
2010 	uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
2011 	/*
2012 	 * Pick a random heap to split
2013 	 */
2014 	uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
2015 
2016 	/*
2017 	 * Zones are created prior to parsing the views as zone budget is fixed
2018 	 * per sizeclass and special types identified while parsing are redirected
2019 	 * as they are discovered.
2020 	 */
2021 	kalloc_type_heap_init();
2022 
2023 	/*
2024 	 * Parse __kalloc_var sections and build array of pointers to views that
2025 	 * aren't rediected in kt_buffer.
2026 	 */
2027 	kt_count = kalloc_type_view_parse(KTV_VAR);
2028 	assert(kt_count < UINT32_MAX);
2029 
2030 #if MACH_ASSERT
2031 	vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
2032 	vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
2033 	assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
2034 #endif
2035 
2036 	/*
2037 	 * Sort based on size class and signature
2038 	 */
2039 	qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
2040 	    kalloc_type_cmp_var);
2041 
2042 	buf_start = kalloc_type_handle_parray_var();
2043 
2044 	/*
2045 	 * Build a skip list that holds starts of unique signatures
2046 	 */
2047 	kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
2048 	unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
2049 	    buf_start);
2050 	shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
2051 	/*
2052 	 * If we have only one heap then other elements share heap with pointer
2053 	 * arrays
2054 	 */
2055 	if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
2056 		panic("kt_var_heaps is too small");
2057 	}
2058 
2059 	kmem_shuffle(shuffle_buf, flex_heap_count);
2060 	/*
2061 	 * The index of the heap we decide to split is placed twice in the shuffle
2062 	 * buffer so that it gets twice the number of signatures that we split
2063 	 * evenly
2064 	 */
2065 	shuffle_buf[flex_heap_count] = split_heap;
2066 	split_heap += (fixed_heaps + 1);
2067 
2068 	for (uint32_t i = 1; i <= unique_sig; i++) {
2069 		uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
2070 		    fixed_heaps + 1;
2071 		uint32_t start = kt_skip_list_start[i - 1];
2072 		uint32_t end = kt_skip_list_start[i];
2073 
2074 		assert(heap_id <= kt_var_heaps);
2075 		if (heap_id == split_heap) {
2076 			kalloc_type_split_heap(start, end, heap_id);
2077 			continue;
2078 		}
2079 		kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
2080 		    &kt_buffer[end].ktv_var, heap_id);
2081 	}
2082 }
2083 
2084 __startup_func
2085 static void
kalloc_init(void)2086 kalloc_init(void)
2087 {
2088 	/*
2089 	 * Allocate scratch space to parse kalloc_type_views and create
2090 	 * other structures necessary to process them.
2091 	 */
2092 	uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
2093 
2094 	static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
2095 	kalloc_zsize_compute();
2096 
2097 	/* Initialize kalloc data buffers heap */
2098 	kalloc_heap_init(KHEAP_DATA_BUFFERS);
2099 
2100 	/* Initialize kalloc shared data buffers heap */
2101 	kalloc_heap_init(KHEAP_DATA_SHARED);
2102 
2103 	/* Initialize kalloc shared buffers heap */
2104 	kalloc_heap_init(KHEAP_EARLY);
2105 
2106 	kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
2107 	    KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT | KMA_SPRAYQTN, VM_KERN_MEMORY_KALLOC);
2108 
2109 	/*
2110 	 * Handle fixed size views
2111 	 */
2112 	kalloc_type_view_init_fixed();
2113 
2114 	/*
2115 	 * Reset
2116 	 */
2117 	bzero(kt_buffer, kt_scratch_size);
2118 	kt_count = max_count;
2119 
2120 	/*
2121 	 * Handle variable size views
2122 	 */
2123 	kalloc_type_view_init_var();
2124 
2125 	/*
2126 	 * Free resources used
2127 	 */
2128 	kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2129 }
2130 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2131 
2132 #pragma mark accessors
2133 
2134 #define KFREE_ABSURD_SIZE \
2135 	((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2136 
2137 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2138 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2139 {
2140 	thread_t thr = current_thread();
2141 	ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2142 }
2143 
2144 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2145 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2146 {
2147 	thread_t thr = current_thread();
2148 	ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2149 }
2150 
2151 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2152 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2153 {
2154 	kmem_guard_t guard = {
2155 		.kmg_atomic      = true,
2156 		.kmg_tag         = tag,
2157 		.kmg_type_hash   = type_hash,
2158 		.kmg_context     = os_hash_kernel_pointer(owner),
2159 	};
2160 
2161 	/*
2162 	 * TODO: this use is really not sufficiently smart.
2163 	 */
2164 
2165 	return guard;
2166 }
2167 
2168 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
2169 
2170 #if __arm64e__
2171 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2172 
2173 /*
2174  * Zone encoding is:
2175  *
2176  *   <PAC SIG><1><1><PTR value><5 bits of size class>
2177  *
2178  * VM encoding is:
2179  *
2180  *   <PAC SIG><1><0><PTR value><14 bits of page count>
2181  *
2182  * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2183  * so that PAC authentication extends the proper sign bit.
2184  */
2185 
2186 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2187 #else /* __arm64e__ */
2188 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2189 
2190 /*
2191  * Zone encoding is:
2192  *
2193  *   <TBI><1><PTR value><5 bits of size class>
2194  *
2195  * VM encoding is:
2196  *
2197  *   <TBI><0><PTR value><14 bits of page count>
2198  */
2199 
2200 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2201 #endif /* __arm64e__*/
2202 
2203 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2204 
2205 __attribute__((always_inline))
2206 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2207 __kalloc_array_decode(vm_address_t ptr)
2208 {
2209 	struct kalloc_result kr;
2210 	vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2211 
2212 	if (ptr & zone_mask) {
2213 		kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2214 		ptr &= ~0x1full;
2215 	} else if (__probable(ptr)) {
2216 		kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2217 		ptr &= ~PAGE_MASK;
2218 		ptr |= zone_mask;
2219 	} else {
2220 		kr.size = 0;
2221 	}
2222 
2223 	kr.addr = (void *)ptr;
2224 	return kr;
2225 }
2226 
2227 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2228 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2229 {
2230 	return (void *)((vm_address_t)ptr | z->z_array_size_class);
2231 }
2232 
2233 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2234 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2235 {
2236 	addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2237 
2238 	return addr | atop(size);
2239 }
2240 
2241 #else /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2242 
2243 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2244 
2245 /*
2246  * Encoding is:
2247  * bits  0..46: pointer value
2248  * bits 47..47: 0: zones, 1: VM
2249  * bits 48..63: zones: elem size, VM: number of pages
2250  */
2251 
2252 #define KALLOC_ARRAY_TYPE_BIT   47
2253 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2254 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2255 
2256 __attribute__((always_inline))
2257 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2258 __kalloc_array_decode(vm_address_t ptr)
2259 {
2260 	struct kalloc_result kr;
2261 	uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2262 
2263 	kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2264 	if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2265 		kr.size <<= PAGE_SHIFT;
2266 	}
2267 	/* sign extend, so that it also works with NULL */
2268 	kr.addr = (void *)((long)(ptr << shift) >> shift);
2269 
2270 	return kr;
2271 }
2272 
2273 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2274 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2275 {
2276 	vm_address_t addr = (vm_address_t)ptr;
2277 
2278 	addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2279 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2280 
2281 	return (void *)addr;
2282 }
2283 
2284 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2285 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2286 {
2287 	addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2288 	addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2289 
2290 	return addr;
2291 }
2292 
2293 #endif /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2294 
2295 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2296 kalloc_next_good_size(vm_size_t size, uint32_t period)
2297 {
2298 	uint32_t scale = kalloc_log2down((uint32_t)size);
2299 	vm_size_t step, size_class;
2300 
2301 	if (size < KHEAP_STEP_START) {
2302 		return KHEAP_STEP_START;
2303 	}
2304 	if (size < 2 * KHEAP_STEP_START) {
2305 		return 2 * KHEAP_STEP_START;
2306 	}
2307 
2308 	if (size < KHEAP_MAX_SIZE) {
2309 		step = 1ul << (scale - 1);
2310 	} else {
2311 		step = round_page(1ul << (scale - kalloc_log2down(period)));
2312 	}
2313 
2314 	size_class = (size + step) & -step;
2315 #if KASAN_CLASSIC
2316 	if (size > K_SIZE_CLASS(size_class)) {
2317 		return kalloc_next_good_size(size_class, period);
2318 	}
2319 	size_class = K_SIZE_CLASS(size_class);
2320 #endif
2321 	return size_class;
2322 }
2323 
2324 
2325 #pragma mark kalloc
2326 
2327 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_flags_t kt_flags)2328 kalloc_type_get_heap(kalloc_type_flags_t kt_flags)
2329 {
2330 	/*
2331 	 * Redirect data-only views
2332 	 */
2333 	if (kalloc_type_is_data(kt_flags)) {
2334 		/*
2335 		 * There are kexts that allocate arrays of data types (uint8_t etc.)
2336 		 * and use krealloc_data / kfree_data to free it; therefore,
2337 		 * until adoption will land, we need to use shared heap for now.
2338 		 */
2339 		return GET_KEXT_KHEAP_DATA();
2340 	}
2341 
2342 	if (kt_flags & KT_PROCESSED) {
2343 		return KHEAP_KT_VAR;
2344 	}
2345 
2346 	return KHEAP_DEFAULT;
2347 }
2348 
2349 
2350 __attribute__((noinline))
2351 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2352 kalloc_large(
2353 	kalloc_heap_t         kheap,
2354 	vm_size_t             req_size,
2355 	zalloc_flags_t        flags,
2356 	uint16_t              kt_hash,
2357 	void                 *owner __unused)
2358 {
2359 	kma_flags_t kma_flags = KMA_KASAN_GUARD;
2360 	vm_tag_t tag;
2361 	vm_offset_t addr, size;
2362 
2363 	if (flags & Z_NOFAIL) {
2364 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2365 		    (size_t)req_size);
2366 	}
2367 
2368 	/*
2369 	 * kmem_alloc could block so we return if noblock
2370 	 *
2371 	 * also, reject sizes larger than our address space is quickly,
2372 	 * as kt_size or IOMallocArraySize() expect this.
2373 	 */
2374 	if ((flags & Z_NOWAIT) ||
2375 	    (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2376 		return (struct kalloc_result){ };
2377 	}
2378 
2379 	if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2380 		return (struct kalloc_result){ };
2381 	}
2382 
2383 	/*
2384 	 * (73465472) on Intel we didn't use to pass this flag,
2385 	 * which in turned allowed kalloc_large() memory to be shared
2386 	 * with user directly.
2387 	 *
2388 	 * We're bound by this unfortunate ABI.
2389 	 */
2390 	if ((flags & Z_MAY_COPYINMAP) == 0) {
2391 #ifndef __x86_64__
2392 		kma_flags |= KMA_KOBJECT;
2393 #endif
2394 	} else {
2395 		assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
2396 	}
2397 	if (flags & Z_NOPAGEWAIT) {
2398 		kma_flags |= KMA_NOPAGEWAIT;
2399 	}
2400 	if (flags & Z_ZERO) {
2401 		kma_flags |= KMA_ZERO;
2402 	}
2403 	if (kheap == KHEAP_DATA_BUFFERS) {
2404 		kma_flags |= KMA_DATA;
2405 	} else if (kheap == KHEAP_DATA_SHARED) {
2406 		kma_flags |= KMA_DATA_SHARED;
2407 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2408 		kma_flags |= KMA_SPRAYQTN;
2409 	}
2410 	if (flags & Z_NOSOFTLIMIT) {
2411 		kma_flags |= KMA_NOSOFTLIMIT;
2412 	}
2413 
2414 
2415 	tag = zalloc_flags_get_tag(flags);
2416 	if (flags & Z_VM_TAG_BT_BIT) {
2417 		tag = vm_tag_bt() ?: tag;
2418 	}
2419 	if (tag == VM_KERN_MEMORY_NONE) {
2420 		tag = kheap->kh_tag;
2421 	}
2422 
2423 	size = round_page(req_size);
2424 	if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2425 		req_size = round_page(size);
2426 	}
2427 
2428 	addr = kmem_alloc_guard(kernel_map, req_size, 0,
2429 	    kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2430 
2431 	if (addr != 0) {
2432 		counter_inc(&kalloc_large_count);
2433 		counter_add(&kalloc_large_total, size);
2434 		KALLOC_ZINFO_SALLOC(size);
2435 		if (flags & Z_KALLOC_ARRAY) {
2436 			addr = __kalloc_array_encode_vm(addr, req_size);
2437 		}
2438 	} else {
2439 		addr = 0;
2440 	}
2441 
2442 	DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2443 	return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2444 }
2445 
2446 #if KASAN
2447 
2448 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2449 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2450 {
2451 #if KASAN_CLASSIC
2452 	/*
2453 	 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2454 	 * tagging of the memory region is performed here.
2455 	 */
2456 	kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2457 	    __builtin_frame_address(0));
2458 #endif /* KASAN_CLASSIC */
2459 
2460 #if KASAN_TBI
2461 	kasan_tbi_retag_unused_space(addr, size, used ? :1);
2462 #endif /* KASAN_TBI */
2463 }
2464 #endif /* KASAN */
2465 
2466 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2467 kalloc_zone(
2468 	zone_t                  z,
2469 	zone_stats_t            zstats,
2470 	zalloc_flags_t          flags,
2471 	vm_size_t               req_size)
2472 {
2473 	struct kalloc_result kr;
2474 	vm_size_t esize;
2475 
2476 	kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2477 	esize = kr.size;
2478 
2479 	if (__probable(kr.addr)) {
2480 		if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2481 			req_size = esize;
2482 		} else {
2483 			kr.size = req_size;
2484 		}
2485 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2486 		kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2487 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2488 
2489 #if KASAN
2490 		kalloc_mark_unused_space(kr.addr, esize, kr.size);
2491 #endif /* KASAN */
2492 
2493 		if (flags & Z_KALLOC_ARRAY) {
2494 			kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2495 		}
2496 	}
2497 
2498 	DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2499 	return kr;
2500 }
2501 
2502 static zone_id_t
kalloc_use_early_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2503 kalloc_use_early_heap(
2504 	kalloc_heap_t           kheap,
2505 	zone_stats_t            zstats,
2506 	zone_id_t               zstart,
2507 	zalloc_flags_t         *flags)
2508 {
2509 	if (!zone_is_data_kheap(kheap->kh_heap_id)) {
2510 		zone_stats_t zstats_cpu = zpercpu_get(zstats);
2511 
2512 		if (os_atomic_load(&zstats_cpu->zs_alloc_not_early, relaxed) == 0) {
2513 			*flags |= Z_SET_NOTEARLY;
2514 			return KHEAP_EARLY->kh_zstart;
2515 		}
2516 	}
2517 
2518 	return zstart;
2519 }
2520 
2521 #undef kalloc_ext
2522 
2523 __mockable struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2524 kalloc_ext(
2525 	void                   *kheap_or_kt_view,
2526 	vm_size_t               size,
2527 	zalloc_flags_t          flags,
2528 	void                   *owner)
2529 {
2530 	kalloc_type_var_view_t kt_view;
2531 	kalloc_heap_t kheap;
2532 	zone_stats_t zstats = NULL;
2533 	zone_t z;
2534 	uint16_t kt_hash;
2535 	zone_id_t zstart;
2536 
2537 	if (kt_is_var_view(kheap_or_kt_view)) {
2538 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2539 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2540 		/*
2541 		 * Use stats from view if present, else use stats from kheap.
2542 		 * KHEAP_KT_VAR accumulates stats for all allocations going to
2543 		 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2544 		 * use stats from the respective zones.
2545 		 */
2546 		zstats  = kt_view->kt_stats;
2547 		kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2548 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
2549 	} else {
2550 		kt_view = NULL;
2551 		kheap   = kheap_or_kt_view;
2552 		kt_hash = kheap->kh_type_hash;
2553 		zstart  = kheap->kh_zstart;
2554 	}
2555 
2556 	if (!zstats) {
2557 		zstats = kheap->kh_stats;
2558 	}
2559 
2560 	zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
2561 	z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2562 	if (z) {
2563 		return kalloc_zone(z, zstats, flags, size);
2564 	} else {
2565 		return kalloc_large(kheap, size, flags, kt_hash, owner);
2566 	}
2567 }
2568 
2569 #if XNU_PLATFORM_MacOSX
2570 void *
2571 kalloc_external(vm_size_t size);
2572 void *
kalloc_external(vm_size_t size)2573 kalloc_external(vm_size_t size)
2574 {
2575 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2576 	return kheap_alloc(KHEAP_DEFAULT, size, flags);
2577 }
2578 #endif /* XNU_PLATFORM_MacOSX */
2579 
2580 void *
2581 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2582 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2583 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2584 {
2585 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2586 	return kheap_alloc(GET_KEXT_KHEAP_DATA(), size, flags);
2587 }
2588 
2589 void *
2590 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags);
2591 void *
kalloc_shared_data_external(vm_size_t size,zalloc_flags_t flags)2592 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags)
2593 {
2594 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
2595 	return kheap_alloc(KHEAP_DATA_SHARED, size, flags);
2596 }
2597 
2598 __abortlike
2599 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2600 kalloc_data_require_panic(void *addr, vm_size_t size)
2601 {
2602 	zone_id_t zid = zone_id_for_element(addr, size);
2603 
2604 	if (zid != ZONE_ID_INVALID) {
2605 		zone_t z = &zone_array[zid];
2606 		zone_security_flags_t zsflags = zone_security_array[zid];
2607 
2608 		if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
2609 			panic("kalloc_data_require failed: address %p in [%s%s]",
2610 			    addr, zone_heap_name(z), zone_name(z));
2611 		}
2612 
2613 		panic("kalloc_data_require failed: address %p in [%s%s], "
2614 		    "size too large %zd > %zd", addr,
2615 		    zone_heap_name(z), zone_name(z),
2616 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2617 	} else {
2618 		panic("kalloc_data_require failed: address %p not in zone native map",
2619 		    addr);
2620 	}
2621 }
2622 
2623 __abortlike
2624 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2625 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2626 {
2627 	zone_id_t zid = zone_id_for_element(addr, size);
2628 
2629 	if (zid != ZONE_ID_INVALID) {
2630 		zone_t z = &zone_array[zid];
2631 		zone_security_flags_t zsflags = zone_security_array[zid];
2632 
2633 		switch (zsflags.z_kheap_id) {
2634 		case KHEAP_ID_NONE:
2635 		case KHEAP_ID_DATA_BUFFERS:
2636 		case KHEAP_ID_DATA_SHARED:
2637 		case KHEAP_ID_KT_VAR:
2638 			panic("kalloc_non_data_require failed: address %p in [%s%s]",
2639 			    addr, zone_heap_name(z), zone_name(z));
2640 		default:
2641 			break;
2642 		}
2643 
2644 		panic("kalloc_non_data_require failed: address %p in [%s%s], "
2645 		    "size too large %zd > %zd", addr,
2646 		    zone_heap_name(z), zone_name(z),
2647 		    (size_t)size, (size_t)zone_elem_inner_size(z));
2648 	} else {
2649 		panic("kalloc_non_data_require failed: address %p not in zone native map",
2650 		    addr);
2651 	}
2652 }
2653 
2654 void
kalloc_data_require(void * addr,vm_size_t size)2655 kalloc_data_require(void *addr, vm_size_t size)
2656 {
2657 	zone_id_t zid = zone_id_for_element(addr, size);
2658 
2659 	if (zid != ZONE_ID_INVALID) {
2660 		zone_t z = &zone_array[zid];
2661 		zone_security_flags_t zsflags = zone_security_array[zid];
2662 		if (zone_is_data_kheap(zsflags.z_kheap_id) &&
2663 		    size <= zone_elem_inner_size(z)) {
2664 			return;
2665 		}
2666 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2667 	    (vm_address_t)addr, size)) {
2668 		return;
2669 	} else if (kmem_needs_data_share_range() &&
2670 	    kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2671 	    (vm_address_t)addr, size)) {
2672 		return;
2673 	}
2674 
2675 	kalloc_data_require_panic(addr, size);
2676 }
2677 
2678 void
kalloc_non_data_require(void * addr,vm_size_t size)2679 kalloc_non_data_require(void *addr, vm_size_t size)
2680 {
2681 	zone_id_t zid = zone_id_for_element(addr, size);
2682 
2683 	if (zid != ZONE_ID_INVALID) {
2684 		zone_t z = &zone_array[zid];
2685 		zone_security_flags_t zsflags = zone_security_array[zid];
2686 		switch (zsflags.z_kheap_id) {
2687 		case KHEAP_ID_NONE:
2688 			if (!zsflags.z_kalloc_type) {
2689 				break;
2690 			}
2691 			OS_FALLTHROUGH;
2692 		case KHEAP_ID_KT_VAR:
2693 			if (size < zone_elem_inner_size(z)) {
2694 				return;
2695 			}
2696 			break;
2697 		default:
2698 			break;
2699 		}
2700 	} else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2701 	    (vm_address_t)addr, size)) {
2702 		return;
2703 	} else if (kmem_needs_data_share_range() &&
2704 	    !kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2705 	    (vm_address_t)addr, size)) {
2706 		return;
2707 	}
2708 
2709 	kalloc_non_data_require_panic(addr, size);
2710 }
2711 
2712 bool
kalloc_is_data_buffers(void * addr,vm_size_t size)2713 kalloc_is_data_buffers(void *addr, vm_size_t size)
2714 {
2715 	zone_id_t zid = zone_id_for_element(addr, size);
2716 
2717 	/*
2718 	 * If we do not use dedicated data share range,
2719 	 * there is no way to fully distinguish between
2720 	 * shared and buffers heaps.
2721 	 *
2722 	 * When kmem_needs_data_share_range() == true, the
2723 	 * KMEM_RANGE_ID_DATA range is strictly for DATA_BUFFERS,
2724 	 * and KMEM_RANGE_ID_DATA_SHARED is strictly for DATA_SHARED.
2725 	 */
2726 	assert(kmem_needs_data_share_range());
2727 
2728 	if (zid != ZONE_ID_INVALID) {
2729 		zone_t z = &zone_array[zid];
2730 		zone_security_flags_t zsflags = zone_security_array[zid];
2731 		if (zone_is_data_buffers_kheap(zsflags.z_kheap_id) &&
2732 		    size <= zone_elem_inner_size(z)) {
2733 			return true;
2734 		}
2735 	} else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2736 	    (vm_address_t)addr, size)) {
2737 		return true;
2738 	}
2739 
2740 	return false;
2741 }
2742 
2743 __mockable void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2744 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2745 {
2746 	/*
2747 	 * Callsites from a kext that aren't in the BootKC on macOS or
2748 	 * any callsites on armv7 are not processed during startup,
2749 	 * default to using kheap_alloc
2750 	 *
2751 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2752 	 * NULL as we need to use the vm for the allocation
2753 	 *
2754 	 */
2755 	if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2756 		kalloc_heap_t kheap;
2757 		vm_size_t size;
2758 
2759 		flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2760 		size  = kalloc_type_get_size(kt_view->kt_size);
2761 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
2762 		return kalloc_ext(kheap, size, flags, NULL).addr;
2763 	}
2764 
2765 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2766 	return kalloc_type_impl(kt_view, flags);
2767 }
2768 
2769 void *
2770 kalloc_type_var_impl_external(
2771 	kalloc_type_var_view_t  kt_view,
2772 	vm_size_t               size,
2773 	zalloc_flags_t          flags,
2774 	void                   *owner);
2775 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2776 kalloc_type_var_impl_external(
2777 	kalloc_type_var_view_t  kt_view,
2778 	vm_size_t               size,
2779 	zalloc_flags_t          flags,
2780 	void                   *owner)
2781 {
2782 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2783 	return kalloc_type_var_impl(kt_view, size, flags, owner);
2784 }
2785 
2786 #pragma mark kfree
2787 
2788 __abortlike
2789 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2790 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2791 {
2792 	zone_security_flags_t zsflags = zone_security_config(z);
2793 	const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2794 
2795 	if (zsflags.z_kalloc_type) {
2796 		panic_include_kalloc_types = true;
2797 		kalloc_type_src_zone = z;
2798 		panic("kfree: addr %p found in kalloc type zone '%s'"
2799 		    "but being freed to %s heap", data, z->z_name, kheap_name);
2800 	}
2801 
2802 	if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2803 		panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2804 		    data, size, zone_heap_name(z), z->z_name);
2805 	} else {
2806 		panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2807 		    data, size, zone_heap_name(z), kheap_name);
2808 	}
2809 }
2810 
2811 __abortlike
2812 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2813 kfree_size_confusion_panic(zone_t z, void *data,
2814     size_t oob_offs, size_t size, size_t zsize)
2815 {
2816 	if (z) {
2817 		panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2818 		    "with elem_size %zd",
2819 		    data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2820 	} else {
2821 		panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2822 		    data, size, oob_offs);
2823 	}
2824 }
2825 
2826 __abortlike
2827 static void
kfree_size_invalid_panic(void * data,size_t size)2828 kfree_size_invalid_panic(void *data, size_t size)
2829 {
2830 	panic("kfree: addr %p trying to free with nonsensical size %zd",
2831 	    data, size);
2832 }
2833 
2834 __abortlike
2835 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2836 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2837     size_t max_size)
2838 {
2839 	panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2840 	    data, size, min_size, max_size);
2841 }
2842 
2843 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2844 kfree_size_require(
2845 	kalloc_heap_t kheap,
2846 	void *addr,
2847 	vm_size_t min_size,
2848 	vm_size_t max_size)
2849 {
2850 	assert3u(min_size, <=, max_size);
2851 	zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2852 	vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2853 	vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2854 	if (elem_size > max_zone_size || elem_size < min_size) {
2855 		kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2856 	}
2857 }
2858 
2859 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2860 kfree_large(
2861 	vm_offset_t             addr,
2862 	vm_size_t               size,
2863 	kmf_flags_t             flags,
2864 	void                   *owner)
2865 {
2866 	size = kmem_free_guard(kernel_map, addr, size,
2867 	    flags | KMF_TAG | KMF_KASAN_GUARD,
2868 	    kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2869 
2870 	counter_dec(&kalloc_large_count);
2871 	counter_add(&kalloc_large_total, -(uint64_t)size);
2872 	KALLOC_ZINFO_SFREE(size);
2873 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2874 }
2875 
2876 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2877 kfree_zone(
2878 	void                   *kheap_or_kt_view __unsafe_indexable,
2879 	void                   *data,
2880 	vm_size_t               size,
2881 	zone_t                  z,
2882 	vm_size_t               zsize)
2883 {
2884 	zone_security_flags_t zsflags = zone_security_config(z);
2885 	kalloc_type_var_view_t kt_view;
2886 	kalloc_heap_t kheap;
2887 	zone_stats_t zstats = NULL;
2888 
2889 	if (kt_is_var_view(kheap_or_kt_view)) {
2890 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
2891 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
2892 		/*
2893 		 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2894 		 * we will end up having incorrect stats. Cross frees may happen on
2895 		 * macOS due to allocation from an unprocessed view and free from
2896 		 * a processed view or vice versa.
2897 		 */
2898 		zstats  = kt_view->kt_stats;
2899 	} else {
2900 		kt_view = NULL;
2901 		kheap   = kheap_or_kt_view;
2902 	}
2903 
2904 	if (!zstats) {
2905 		zstats = kheap->kh_stats;
2906 	}
2907 
2908 	zsflags = zone_security_config(z);
2909 	if (kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED) {
2910 		if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2911 			kfree_heap_confusion_panic(kheap, data, size, z);
2912 		}
2913 	} else {
2914 		if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2915 		    (zsflags.z_kheap_id != KHEAP_ID_EARLY)) {
2916 			kfree_heap_confusion_panic(kheap, data, size, z);
2917 		}
2918 	}
2919 
2920 	DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2921 
2922 	/* needs to be __nosan because the user size might be partial */
2923 	__nosan_bzero(data, zsize);
2924 	zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2925 }
2926 
2927 __mockable void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2928 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2929 {
2930 	vm_size_t bucket_size;
2931 	zone_t z;
2932 
2933 	if (data == NULL) {
2934 		return;
2935 	}
2936 
2937 	if (size > KFREE_ABSURD_SIZE) {
2938 		kfree_size_invalid_panic(data, size);
2939 	}
2940 
2941 	if (size <= KHEAP_MAX_SIZE) {
2942 		vm_size_t oob_offs;
2943 
2944 		bucket_size = zone_element_size(data, &z, true, &oob_offs);
2945 		if (size + oob_offs > bucket_size || bucket_size == 0) {
2946 			kfree_size_confusion_panic(z, data,
2947 			    oob_offs, size, bucket_size);
2948 		}
2949 
2950 		data = (char *)data - oob_offs;
2951 		kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2952 	} else {
2953 		kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2954 	}
2955 }
2956 
2957 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2958 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2959 {
2960 	vm_offset_t oob_offs;
2961 	vm_size_t size, usize = 0;
2962 	zone_t z;
2963 
2964 	if (data == NULL) {
2965 		return;
2966 	}
2967 
2968 	size = zone_element_size(data, &z, true, &oob_offs);
2969 	if (size) {
2970 #if KASAN_CLASSIC
2971 		usize = kasan_user_size((vm_offset_t)data);
2972 #endif
2973 		data = (char *)data - oob_offs;
2974 		kfree_zone(kheap, data, usize, z, size);
2975 	} else {
2976 		kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2977 	}
2978 }
2979 
2980 #if XNU_PLATFORM_MacOSX
2981 void
2982 kfree_external(void *addr, vm_size_t size);
2983 void
kfree_external(void * addr,vm_size_t size)2984 kfree_external(void *addr, vm_size_t size)
2985 {
2986 	kalloc_heap_t kheap = KHEAP_DEFAULT;
2987 
2988 	kfree_ext(kheap, addr, size);
2989 }
2990 #endif /* XNU_PLATFORM_MacOSX */
2991 
2992 void
2993 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2994     vm_size_t min_sz, vm_size_t max_sz)
2995 {
2996 	if (__improbable(addr == NULL)) {
2997 		return;
2998 	}
2999 	kfree_size_require(kheap, addr, min_sz, max_sz);
3000 	kfree_addr_ext(kheap, addr);
3001 }
3002 
3003 __mockable void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)3004 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
3005 {
3006 	zone_stats_t zs = kt_view->kt_zv.zv_stats;
3007 	zone_t       z  = kt_view->kt_zv.zv_zone;
3008 	zone_stats_t zs_cpu = zpercpu_get(zs);
3009 
3010 	if ((flags & Z_SET_NOTEARLY) ||
3011 	    os_atomic_load(&zs_cpu->zs_alloc_not_early, relaxed)) {
3012 		return zalloc_ext(z, zs, flags).addr;
3013 	}
3014 
3015 	assert(!zone_is_data_kheap(zone_security_config(z).z_kheap_id));
3016 	return zalloc_ext(kt_view->kt_zearly, zs, flags | Z_SET_NOTEARLY).addr;
3017 }
3018 
3019 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)3020 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
3021 {
3022 	/*
3023 	 * If callsite is from a kext that isn't in the BootKC, it wasn't
3024 	 * processed during startup so default to using kheap_alloc
3025 	 *
3026 	 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
3027 	 * NULL as we need to use the vm for the allocation/free
3028 	 */
3029 	if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
3030 		kalloc_heap_t kheap;
3031 		vm_size_t size;
3032 
3033 		size  = kalloc_type_get_size(kt_view->kt_size);
3034 		kheap = kalloc_type_get_heap(kt_view->kt_flags);
3035 		return kheap_free(kheap, ptr, size);
3036 	}
3037 	return kfree_type_impl(kt_view, ptr);
3038 }
3039 
3040 void
3041 kfree_type_var_impl_external(
3042 	kalloc_type_var_view_t  kt_view,
3043 	void                   *ptr,
3044 	vm_size_t               size);
3045 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)3046 kfree_type_var_impl_external(
3047 	kalloc_type_var_view_t  kt_view,
3048 	void                   *ptr,
3049 	vm_size_t               size)
3050 {
3051 	return kfree_type_var_impl(kt_view, ptr, size);
3052 }
3053 
3054 void
3055 kfree_data_external(void *ptr, vm_size_t size);
3056 void
kfree_data_external(void * ptr,vm_size_t size)3057 kfree_data_external(void *ptr, vm_size_t size)
3058 {
3059 	return kheap_free(GET_KEXT_KHEAP_DATA(), ptr, size);
3060 }
3061 
3062 void
3063 kfree_data_addr_external(void *ptr);
3064 void
kfree_data_addr_external(void * ptr)3065 kfree_data_addr_external(void *ptr)
3066 {
3067 	return kheap_free_addr(GET_KEXT_KHEAP_DATA(), ptr);
3068 }
3069 
3070 void
3071 kfree_shared_data_external(void *ptr, vm_size_t size);
3072 void
kfree_shared_data_external(void * ptr,vm_size_t size)3073 kfree_shared_data_external(void *ptr, vm_size_t size)
3074 {
3075 	return kheap_free(KHEAP_DATA_SHARED, ptr, size);
3076 }
3077 
3078 void
3079 kfree_shared_data_addr_external(void *ptr);
3080 void
kfree_shared_data_addr_external(void * ptr)3081 kfree_shared_data_addr_external(void *ptr)
3082 {
3083 	return kheap_free_addr(KHEAP_DATA_SHARED, ptr);
3084 }
3085 
3086 #pragma mark krealloc
3087 
3088 __abortlike
3089 static void
krealloc_size_invalid_panic(void * data,size_t size)3090 krealloc_size_invalid_panic(void *data, size_t size)
3091 {
3092 	panic("krealloc: addr %p trying to free with nonsensical size %zd",
3093 	    data, size);
3094 }
3095 
3096 
3097 __attribute__((noinline))
3098 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)3099 krealloc_large(
3100 	kalloc_heap_t         kheap,
3101 	vm_offset_t           addr,
3102 	vm_size_t             old_size,
3103 	vm_size_t             new_size,
3104 	zalloc_flags_t        flags,
3105 	uint16_t              kt_hash,
3106 	void                 *owner __unused)
3107 {
3108 	kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_KASAN_GUARD;
3109 	vm_size_t new_req_size = new_size;
3110 	vm_size_t old_req_size = old_size;
3111 	uint64_t delta;
3112 	kmem_return_t kmr;
3113 	vm_tag_t tag;
3114 
3115 	if (flags & Z_NOFAIL) {
3116 		panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
3117 		    (size_t)new_req_size);
3118 	}
3119 
3120 	/*
3121 	 * kmem_alloc could block so we return if noblock
3122 	 *
3123 	 * also, reject sizes larger than our address space is quickly,
3124 	 * as kt_size or IOMallocArraySize() expect this.
3125 	 */
3126 	if ((flags & Z_NOWAIT) ||
3127 	    (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
3128 		return (struct kalloc_result){ };
3129 	}
3130 
3131 	/*
3132 	 * (73465472) on Intel we didn't use to pass this flag,
3133 	 * which in turned allowed kalloc_large() memory to be shared
3134 	 * with user directly.
3135 	 *
3136 	 * We're bound by this unfortunate ABI.
3137 	 */
3138 	if ((flags & Z_MAY_COPYINMAP) == 0) {
3139 #ifndef __x86_64__
3140 		kmr_flags |= KMR_KOBJECT;
3141 #endif
3142 	} else {
3143 		assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
3144 	}
3145 	if (flags & Z_NOPAGEWAIT) {
3146 		kmr_flags |= KMR_NOPAGEWAIT;
3147 	}
3148 	if (flags & Z_ZERO) {
3149 		kmr_flags |= KMR_ZERO;
3150 	}
3151 	if (kheap == KHEAP_DATA_BUFFERS) {
3152 		kmr_flags |= KMR_DATA;
3153 	} else if (kheap == KHEAP_DATA_SHARED) {
3154 		kmr_flags |= KMR_DATA_SHARED;
3155 	} else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
3156 		kmr_flags |= KMR_SPRAYQTN;
3157 	}
3158 	if (flags & Z_REALLOCF) {
3159 		kmr_flags |= KMR_REALLOCF;
3160 	}
3161 
3162 #if ZSECURITY_CONFIG(ZONE_TAGGING)
3163 	krealloc_enforce_large_tagging_policy(&kmr_flags, kheap);
3164 #endif /* ZSECURITY_CONFIG(ZONE_TAGGING) */
3165 
3166 	tag = zalloc_flags_get_tag(flags);
3167 	if (flags & Z_VM_TAG_BT_BIT) {
3168 		tag = vm_tag_bt() ?: tag;
3169 	}
3170 	if (tag == VM_KERN_MEMORY_NONE) {
3171 		tag = kheap->kh_tag;
3172 	}
3173 
3174 	kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
3175 	    kmr_flags, kalloc_guard(tag, kt_hash, owner));
3176 
3177 	new_size = round_page(new_req_size);
3178 	old_size = round_page(old_req_size);
3179 
3180 	if (kmr.kmr_address != 0) {
3181 		delta = (uint64_t)(new_size - old_size);
3182 	} else if (flags & Z_REALLOCF) {
3183 		counter_dec(&kalloc_large_count);
3184 		delta = (uint64_t)(-old_size);
3185 	} else {
3186 		delta = 0;
3187 	}
3188 
3189 	counter_add(&kalloc_large_total, delta);
3190 	KALLOC_ZINFO_SALLOC(delta);
3191 
3192 	if (addr != 0 || (flags & Z_REALLOCF)) {
3193 		DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
3194 		    void*, addr);
3195 	}
3196 	if (__improbable(kmr.kmr_address == 0)) {
3197 		return (struct kalloc_result){ };
3198 	}
3199 
3200 	DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
3201 	    void*, kmr.kmr_address);
3202 
3203 	if (flags & Z_KALLOC_ARRAY) {
3204 		kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
3205 		    new_req_size);
3206 	}
3207 	return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
3208 }
3209 
3210 #undef krealloc_ext
3211 
3212 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3213 krealloc_ext(
3214 	void                 *kheap_or_kt_view __unsafe_indexable,
3215 	void                 *addr,
3216 	vm_size_t             old_size,
3217 	vm_size_t             new_size,
3218 	zalloc_flags_t        flags,
3219 	void                 *owner)
3220 {
3221 	vm_size_t old_bucket_size, new_bucket_size, min_size;
3222 	kalloc_type_var_view_t kt_view;
3223 	kalloc_heap_t kheap;
3224 	zone_stats_t zstats = NULL;
3225 	struct kalloc_result kr;
3226 	vm_offset_t oob_offs = 0;
3227 	zone_t old_z, new_z;
3228 	uint16_t kt_hash = 0;
3229 	zone_id_t zstart;
3230 
3231 	if (old_size > KFREE_ABSURD_SIZE) {
3232 		krealloc_size_invalid_panic(addr, old_size);
3233 	}
3234 
3235 	if (addr == NULL && new_size == 0) {
3236 		return (struct kalloc_result){ };
3237 	}
3238 
3239 	if (kt_is_var_view(kheap_or_kt_view)) {
3240 		kt_view = kt_demangle_var_view(kheap_or_kt_view);
3241 		kheap   = kalloc_type_get_heap(kt_view->kt_flags);
3242 		/*
3243 		 * Similar to kalloc_ext: Use stats from view if present,
3244 		 * else use stats from kheap.
3245 		 *
3246 		 * krealloc_type isn't exposed to kexts, so we don't need to
3247 		 * handle cross frees and can rely on stats from view or kheap.
3248 		 */
3249 		zstats  = kt_view->kt_stats;
3250 		kt_hash = KT_GET_HASH(kt_view->kt_flags);
3251 		zstart  = kt_view->kt_heap_start ?: kheap->kh_zstart;
3252 	} else {
3253 		kt_view = NULL;
3254 		kheap   = kheap_or_kt_view;
3255 		kt_hash = kheap->kh_type_hash;
3256 		zstart  = kheap->kh_zstart;
3257 	}
3258 
3259 	if (!zstats) {
3260 		zstats = kheap->kh_stats;
3261 	}
3262 	/*
3263 	 * Find out the size of the bucket in which the new sized allocation
3264 	 * would land. If it matches the bucket of the original allocation,
3265 	 * simply return the same address.
3266 	 */
3267 	if (new_size == 0) {
3268 		new_z = ZONE_NULL;
3269 		new_bucket_size = new_size = 0;
3270 	} else {
3271 		zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
3272 		new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3273 		new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3274 	}
3275 #if !KASAN_CLASSIC
3276 	if (flags & Z_FULLSIZE) {
3277 		new_size = new_bucket_size;
3278 	}
3279 #endif /* !KASAN_CLASSIC */
3280 
3281 	if (addr == NULL) {
3282 		old_z = ZONE_NULL;
3283 		old_size = old_bucket_size = 0;
3284 	} else if (kheap_size_from_zone(addr, old_size, flags)) {
3285 		old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3286 		if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3287 			kfree_size_confusion_panic(old_z, addr,
3288 			    oob_offs, old_size, old_bucket_size);
3289 		}
3290 		__builtin_assume(old_z != ZONE_NULL);
3291 	} else {
3292 		old_z = ZONE_NULL;
3293 		old_bucket_size = round_page(old_size);
3294 	}
3295 	min_size = MIN(old_size, new_size);
3296 
3297 	if (old_bucket_size == new_bucket_size && old_z) {
3298 		kr.addr = (char *)addr - oob_offs;
3299 		kr.size = new_size;
3300 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3301 		kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3302 		    new_size, new_bucket_size);
3303 		if (kr.addr != addr) {
3304 			memmove(kr.addr, addr, min_size);
3305 			bzero((char *)kr.addr + min_size,
3306 			    kr.size - min_size);
3307 		}
3308 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3309 #if KASAN
3310 		/*
3311 		 * On KASAN kernels, treat a reallocation effectively as a new
3312 		 * allocation and add a sanity check around the existing one
3313 		 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3314 		 * to much extra work, on KASAN_TBI, assign a new tag both to the
3315 		 * buffer and to the potential free space.
3316 		 */
3317 #if KASAN_CLASSIC
3318 		kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3319 		kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3320 		    KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3321 #endif /* KASAN_CLASSIC */
3322 #if KASAN_TBI
3323 		/*
3324 		 * Validate the current buffer, then generate a new tag,
3325 		 * even if the address is stable, it's a "new" allocation.
3326 		 */
3327 		__asan_loadN((vm_offset_t)addr, old_size);
3328 		kr.addr = vm_memtag_generate_and_store_tag(kr.addr, kr.size);
3329 		kasan_tbi_retag_unused_space(kr.addr, new_bucket_size, kr.size);
3330 #endif /* KASAN_TBI */
3331 #endif /* KASAN */
3332 		goto out_success;
3333 	}
3334 
3335 #if !KASAN
3336 	/*
3337 	 * Fallthrough to krealloc_large() for KASAN,
3338 	 * because we can't use kasan_check_alloc()
3339 	 * on kalloc_large() memory.
3340 	 *
3341 	 * kmem_realloc_guard() will perform all the validations,
3342 	 * and re-tagging.
3343 	 */
3344 	if (old_bucket_size == new_bucket_size) {
3345 		kr.addr = (char *)addr - oob_offs;
3346 		kr.size = new_size;
3347 		goto out_success;
3348 	}
3349 #endif
3350 
3351 	if (addr && !old_z && new_size && !new_z) {
3352 		return krealloc_large(kheap, (vm_offset_t)addr,
3353 		           old_size, new_size, flags, kt_hash, owner);
3354 	}
3355 
3356 	if (!new_size) {
3357 		kr.addr = NULL;
3358 		kr.size = 0;
3359 	} else if (new_z) {
3360 		kr = kalloc_zone(new_z, zstats,
3361 		    flags & ~Z_KALLOC_ARRAY, new_size);
3362 	} else if (old_z || addr == NULL) {
3363 		kr = kalloc_large(kheap, new_size,
3364 		    flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3365 	}
3366 
3367 	if (addr && kr.addr) {
3368 		__nosan_memcpy(kr.addr, addr, min_size);
3369 	}
3370 
3371 	if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3372 		if (old_z) {
3373 			kfree_zone(kheap_or_kt_view,
3374 			    (char *)addr - oob_offs, old_size,
3375 			    old_z, old_bucket_size);
3376 		} else {
3377 			kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3378 		}
3379 	}
3380 
3381 	if (__improbable(kr.addr == NULL)) {
3382 		return kr;
3383 	}
3384 
3385 out_success:
3386 	if ((flags & Z_KALLOC_ARRAY) == 0) {
3387 		return kr;
3388 	}
3389 
3390 	if (new_z) {
3391 		kr.addr = __kalloc_array_encode_zone(new_z,
3392 		    kr.addr, kr.size);
3393 	} else {
3394 		kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3395 		    kr.size);
3396 	}
3397 	return kr;
3398 }
3399 
3400 void *
3401 krealloc_data_external(
3402 	void               *ptr,
3403 	vm_size_t           old_size,
3404 	vm_size_t           new_size,
3405 	zalloc_flags_t      flags);
3406 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3407 krealloc_data_external(
3408 	void               *ptr,
3409 	vm_size_t           old_size,
3410 	vm_size_t           new_size,
3411 	zalloc_flags_t      flags)
3412 {
3413 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3414 	return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3415 }
3416 
3417 void *
3418 krealloc_shared_data_external(
3419 	void               *ptr,
3420 	vm_size_t           old_size,
3421 	vm_size_t           new_size,
3422 	zalloc_flags_t      flags);
3423 void *
krealloc_shared_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3424 krealloc_shared_data_external(
3425 	void               *ptr,
3426 	vm_size_t           old_size,
3427 	vm_size_t           new_size,
3428 	zalloc_flags_t      flags)
3429 {
3430 	flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
3431 	return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3432 }
3433 
3434 __startup_func
3435 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3436 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3437 {
3438 	kheap->kh_zstart      = parent_heap->kh_zstart;
3439 	kheap->kh_heap_id     = parent_heap->kh_heap_id;
3440 	kheap->kh_tag         = parent_heap->kh_tag;
3441 	kheap->kh_stats       = zalloc_percpu_permanent_type(struct zone_stats);
3442 	zone_view_count += 1;
3443 }
3444 
3445 __startup_func
3446 static void
kheap_init_data(kalloc_heap_t kheap)3447 kheap_init_data(kalloc_heap_t kheap)
3448 {
3449 	kheap_init(KHEAP_DATA_BUFFERS, kheap);
3450 	kheap->kh_views               = KHEAP_DATA_BUFFERS->kh_views;
3451 	KHEAP_DATA_BUFFERS->kh_views  = kheap;
3452 }
3453 
3454 __startup_func
3455 static void
kheap_init_data_shared(kalloc_heap_t kheap)3456 kheap_init_data_shared(kalloc_heap_t kheap)
3457 {
3458 	kheap_init(KHEAP_DATA_SHARED, kheap);
3459 	kheap->kh_views               = KHEAP_DATA_SHARED->kh_views;
3460 	KHEAP_DATA_SHARED->kh_views   = kheap;
3461 }
3462 
3463 __startup_func
3464 static void
kheap_init_var(kalloc_heap_t kheap)3465 kheap_init_var(kalloc_heap_t kheap)
3466 {
3467 	uint16_t idx;
3468 	struct kheap_info *parent_heap;
3469 
3470 	kheap_init(KHEAP_KT_VAR, kheap);
3471 	idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3472 	    KT_VAR__FIRST_FLEXIBLE_HEAP;
3473 	parent_heap = &kalloc_type_heap_array[idx];
3474 	kheap->kh_zstart = parent_heap->kh_zstart;
3475 	kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3476 		(uint32_t) early_random(), 0);
3477 	kheap->kh_views       = parent_heap->kh_views;
3478 	parent_heap->kh_views = kheap;
3479 }
3480 
3481 __startup_func
3482 void
kheap_startup_init(kalloc_heap_t kheap)3483 kheap_startup_init(kalloc_heap_t kheap)
3484 {
3485 	switch (kheap->kh_heap_id) {
3486 	case KHEAP_ID_DATA_BUFFERS:
3487 		kheap_init_data(kheap);
3488 		break;
3489 	case KHEAP_ID_DATA_SHARED:
3490 		kheap_init_data_shared(kheap);
3491 		break;
3492 	case KHEAP_ID_KT_VAR:
3493 		kheap_init_var(kheap);
3494 		break;
3495 	default:
3496 		panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3497 		    kheap->kh_heap_id);
3498 	}
3499 }
3500 
3501 #pragma mark IOKit/libkern helpers
3502 
3503 #if XNU_PLATFORM_MacOSX
3504 
3505 void *
3506 kern_os_malloc_external(size_t size);
3507 void *
kern_os_malloc_external(size_t size)3508 kern_os_malloc_external(size_t size)
3509 {
3510 	if (size == 0) {
3511 		return NULL;
3512 	}
3513 
3514 	return kheap_alloc(KERN_OS_MALLOC, size,
3515 	           Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3516 }
3517 
3518 void
3519 kern_os_free_external(void *addr);
3520 void
kern_os_free_external(void * addr)3521 kern_os_free_external(void *addr)
3522 {
3523 	kheap_free_addr(KERN_OS_MALLOC, addr);
3524 }
3525 
3526 void *
3527 kern_os_realloc_external(void *addr, size_t nsize);
3528 void *
kern_os_realloc_external(void * addr,size_t nsize)3529 kern_os_realloc_external(void *addr, size_t nsize)
3530 {
3531 	zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3532 	vm_size_t osize, oob_offs = 0;
3533 
3534 	if (addr == NULL) {
3535 		return kern_os_malloc_external(nsize);
3536 	}
3537 
3538 	osize = zone_element_size(addr, NULL, false, &oob_offs);
3539 	if (osize == 0) {
3540 		osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3541 		    kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3542 #if KASAN_CLASSIC
3543 	} else {
3544 		osize = kasan_user_size((vm_offset_t)addr);
3545 #endif
3546 	}
3547 	return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3548 }
3549 
3550 #endif /* XNU_PLATFORM_MacOSX */
3551 
3552 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3553 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3554 {
3555 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3556 #pragma unused(size)
3557 	zfree(zone, addr);
3558 #else
3559 	if (zone_owns(zone, addr)) {
3560 		zfree(zone, addr);
3561 	} else {
3562 		/*
3563 		 * Third party kexts might not know about the operator new
3564 		 * and be allocated from the default heap
3565 		 */
3566 		printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3567 		    zone->z_name);
3568 		kheap_free(KHEAP_DEFAULT, addr, size);
3569 	}
3570 #endif
3571 }
3572 
3573 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3574 IOMallocType_from_vm(kalloc_type_view_t ktv)
3575 {
3576 	return kalloc_type_from_vm(ktv->kt_flags);
3577 }
3578 
3579 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3580 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3581 {
3582 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3583 #pragma unused(esize)
3584 #else
3585 	/*
3586 	 * For third party kexts that have been compiled with sdk pre macOS 11,
3587 	 * an allocation of an OSObject that is defined in xnu or first pary
3588 	 * kexts, by directly calling new will lead to using the default heap
3589 	 * as it will call OSObject_operator_new_external. If this object
3590 	 * is freed by xnu, it panics as xnu uses the typed free which
3591 	 * requires the object to have been allocated in a kalloc.type zone.
3592 	 * To workaround this issue, detect if the allocation being freed is
3593 	 * from the default heap and allow freeing to it.
3594 	 */
3595 	zone_id_t zid = zone_id_for_element(addr, esize);
3596 	if (__probable(zid < MAX_ZONES)) {
3597 		zone_security_flags_t zsflags = zone_security_array[zid];
3598 		if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3599 			return kheap_free(KHEAP_DEFAULT, addr, esize);
3600 		}
3601 	}
3602 #endif
3603 	kfree_type_impl_external(ktv, addr);
3604 }
3605 
3606 #pragma mark tests
3607 #if DEBUG || DEVELOPMENT
3608 
3609 #include <sys/random.h>
3610 
3611 /*
3612  * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3613  *
3614  * Note: Presence of zones with name kalloc.type* is used to
3615  * determine if the feature is on.
3616  */
3617 static int
kalloc_type_feature_on(void)3618 kalloc_type_feature_on(void)
3619 {
3620 	boolean_t zone_found = false;
3621 	const char kalloc_type_str[] = "kalloc.type";
3622 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3623 		zone_t z = kalloc_type_zarray[i];
3624 		while (z != NULL) {
3625 			zone_found = true;
3626 			if (strncmp(z->z_name, kalloc_type_str,
3627 			    strlen(kalloc_type_str)) != 0) {
3628 				return 0;
3629 			}
3630 			z = z->z_kt_next;
3631 		}
3632 	}
3633 
3634 	if (!zone_found) {
3635 		return 0;
3636 	}
3637 
3638 	return 1;
3639 }
3640 
3641 /*
3642  * Ensure that the policy uses the zone budget completely
3643  */
3644 static int
kalloc_type_test_policy(int64_t in)3645 kalloc_type_test_policy(int64_t in)
3646 {
3647 	uint16_t zone_budget = (uint16_t) in;
3648 	uint16_t max_bucket_freq = 25;
3649 	uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3650 	uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3651 	uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3652 	uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3653 	uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3654 	uint16_t wasted_zone_budget = 0, total_types = 0;
3655 	uint16_t n_zones = 0, n_zones_cal = 0;
3656 	int ret = 0;
3657 
3658 	/*
3659 	 * Need a minimum of 2 zones per size class
3660 	 */
3661 	if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3662 		return ret;
3663 	}
3664 	read_random((void *)&random[0], sizeof(random));
3665 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3666 		uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3667 		uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3668 
3669 		freq_list[i] = r1 > r2 ? r2 : r1;
3670 		freq_total_list[i] = r1 > r2 ? r1 : r2;
3671 	}
3672 	wasted_zone_budget = kalloc_type_apply_policy(
3673 		freq_list, freq_total_list,
3674 		zones_per_sig, zones_per_type, zone_budget);
3675 
3676 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3677 		total_types += freq_total_list[i];
3678 	}
3679 
3680 	n_zones = kmem_get_random16(total_types);
3681 	printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3682 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3683 		uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3684 		    freq_total_list[i], total_types,
3685 		    (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3686 
3687 		n_zones_cal += n_zones_for_type;
3688 
3689 		printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3690 	}
3691 	printf("-----------------------\n%u\t%u\n", total_types,
3692 	    n_zones_cal);
3693 
3694 	if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3695 		ret = 1;
3696 	}
3697 	return ret;
3698 }
3699 
3700 /*
3701  * Ensure that size of adopters of kalloc_type fit in the zone
3702  * they have been assigned.
3703  */
3704 static int
kalloc_type_check_size(zone_t z)3705 kalloc_type_check_size(zone_t z)
3706 {
3707 	kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3708 
3709 	while (kt_cur != NULL) {
3710 		if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3711 			return 0;
3712 		}
3713 		kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3714 	}
3715 
3716 	return 1;
3717 }
3718 
3719 struct test_kt_data {
3720 	int a;
3721 };
3722 
3723 static int
kalloc_type_test_data_redirect(void)3724 kalloc_type_test_data_redirect(void)
3725 {
3726 	struct kalloc_type_view ktv_data = {
3727 		.kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3728 		.kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3729 	};
3730 	if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3731 		printf("%s: data redirect failed\n", __func__);
3732 		return 0;
3733 	}
3734 	return 1;
3735 }
3736 
3737 static int
run_kalloc_type_test(int64_t in,int64_t * out)3738 run_kalloc_type_test(int64_t in, int64_t *out)
3739 {
3740 	*out = 0;
3741 	for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3742 		zone_t z = kalloc_type_zarray[i];
3743 		while (z != NULL) {
3744 			if (!kalloc_type_check_size(z)) {
3745 				printf("%s: size check failed\n", __func__);
3746 				return 0;
3747 			}
3748 			z = z->z_kt_next;
3749 		}
3750 	}
3751 
3752 	if (!kalloc_type_test_policy(in)) {
3753 		printf("%s: policy check failed\n", __func__);
3754 		return 0;
3755 	}
3756 
3757 	if (!kalloc_type_feature_on()) {
3758 		printf("%s: boot-arg is on but feature isn't\n", __func__);
3759 		return 0;
3760 	}
3761 
3762 	if (!kalloc_type_test_data_redirect()) {
3763 		printf("%s: kalloc_type redirect for all data signature failed\n",
3764 		    __func__);
3765 		return 0;
3766 	}
3767 
3768 	printf("%s: test passed\n", __func__);
3769 
3770 	*out = 1;
3771 	return 0;
3772 }
3773 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3774 
3775 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3776 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3777 {
3778 	zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3779 
3780 	return z ? zone_elem_inner_size(z) : round_page(size);
3781 }
3782 
3783 static int
run_kalloc_test_kheap(kalloc_heap_t kheap)3784 run_kalloc_test_kheap(kalloc_heap_t kheap)
3785 {
3786 	uint64_t *data_ptr;
3787 	void *strippedp_old, *strippedp_new;
3788 	size_t alloc_size = 0, old_alloc_size = 0;
3789 	struct kalloc_result kr = {};
3790 
3791 	printf("%s: %s test running\n", __func__, kheap->kh_name);
3792 
3793 	/*
3794 	 * Test size 0: alloc, free, realloc
3795 	 */
3796 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3797 	    NULL).addr;
3798 	if (!data_ptr) {
3799 		printf("%s: kalloc 0 returned null\n", __func__);
3800 		return 1;
3801 	}
3802 	kheap_free(kheap, data_ptr, alloc_size);
3803 
3804 	data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3805 	    NULL).addr;
3806 	alloc_size = sizeof(uint64_t) + 1;
3807 	data_ptr = krealloc_ext(kheap, kr.addr, old_alloc_size,
3808 	    alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3809 	if (!data_ptr) {
3810 		printf("%s: krealloc -> old size 0 failed\n", __func__);
3811 		return 1;
3812 	}
3813 	*data_ptr = 0;
3814 
3815 	/*
3816 	 * Test krealloc: same sizeclass, different size classes, 2pgs,
3817 	 * VM (with owner)
3818 	 */
3819 	old_alloc_size = alloc_size;
3820 	alloc_size++;
3821 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3822 	    Z_WAITOK | Z_NOFAIL, NULL);
3823 
3824 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3825 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3826 
3827 	if (!kr.addr || (strippedp_old != strippedp_new) ||
3828 	    (test_bucket_size(kheap, kr.size) !=
3829 	    test_bucket_size(kheap, old_alloc_size))) {
3830 		printf("%s: krealloc -> same size class failed\n", __func__);
3831 		return 1;
3832 	}
3833 	data_ptr = kr.addr;
3834 	*data_ptr = 0;
3835 
3836 	old_alloc_size = alloc_size;
3837 	alloc_size *= 2;
3838 	kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3839 	    Z_WAITOK | Z_NOFAIL, NULL);
3840 
3841 	strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3842 	strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3843 
3844 	if (!kr.addr || (strippedp_old == strippedp_new) ||
3845 	    (test_bucket_size(kheap, kr.size) ==
3846 	    test_bucket_size(kheap, old_alloc_size))) {
3847 		printf("%s: krealloc -> different size class failed\n", __func__);
3848 		return 1;
3849 	}
3850 	data_ptr = kr.addr;
3851 	*data_ptr = 0;
3852 
3853 	kheap_free(kheap, kr.addr, alloc_size);
3854 
3855 	alloc_size = 3544;
3856 	data_ptr = kalloc_ext(kheap, alloc_size,
3857 	    Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3858 	if (!data_ptr) {
3859 		printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3860 		    __func__);
3861 		return 1;
3862 	}
3863 	*data_ptr = 0;
3864 
3865 	data_ptr = krealloc_ext(kheap, data_ptr, alloc_size,
3866 	    PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3867 	if (!data_ptr) {
3868 		printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3869 		return 1;
3870 	}
3871 	*data_ptr = 0;
3872 
3873 	data_ptr = krealloc_ext(kheap, data_ptr, PAGE_SIZE * 2,
3874 	    KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3875 	if (!data_ptr) {
3876 		printf("%s: krealloc -> VM1 returned not null\n", __func__);
3877 		return 1;
3878 	}
3879 	*data_ptr = 0;
3880 
3881 	data_ptr = krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 2,
3882 	    KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3883 	*data_ptr = 0;
3884 	if (!data_ptr) {
3885 		printf("%s: krealloc -> VM2 returned not null\n", __func__);
3886 		return 1;
3887 	}
3888 
3889 	krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 4,
3890 	    0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3891 
3892 	printf("%s: test passed\n", __func__);
3893 	return 0;
3894 }
3895 
3896 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3897 run_kalloc_test(int64_t in __unused, int64_t *out)
3898 {
3899 	*out = 1;
3900 
3901 	if (run_kalloc_test_kheap(KHEAP_DATA_BUFFERS) != 0 ||
3902 	    run_kalloc_test_kheap(KHEAP_DATA_SHARED) != 0) {
3903 		*out = 0;
3904 	}
3905 
3906 	return 0;
3907 }
3908 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3909 
3910 #endif /* DEBUG || DEVELOPMENT */
3911