1 /*
2 * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/kalloc.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1985
62 *
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
65 */
66
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern_internal.h>
79 #include <vm/vm_object_xnu.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88
89
90 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
91 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
92
93 #pragma mark initialization
94
95 /*
96 * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
97 * sized zone. This allocator is built on top of the zone allocator. A zone
98 * is created for each potential size that we are willing to get in small
99 * blocks.
100 *
101 * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
102 */
103
104 /*
105 * The kt_zone_cfg table defines the configuration of zones on various
106 * platforms for kalloc_type fixed size allocations.
107 */
108
109 #if KASAN_CLASSIC
110 #define K_SIZE_CLASS(size) \
111 (((size) & PAGE_MASK) == 0 ? (size) : \
112 ((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
113 #else
114 #define K_SIZE_CLASS(size) (size)
115 #endif
116 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
117
118 static const uint16_t kt_zone_cfg[] = {
119 K_SIZE_CLASS(16),
120 K_SIZE_CLASS(32),
121 K_SIZE_CLASS(48),
122 K_SIZE_CLASS(64),
123 K_SIZE_CLASS(80),
124 K_SIZE_CLASS(96),
125 K_SIZE_CLASS(128),
126 K_SIZE_CLASS(160),
127 K_SIZE_CLASS(192),
128 K_SIZE_CLASS(224),
129 K_SIZE_CLASS(256),
130 K_SIZE_CLASS(288),
131 K_SIZE_CLASS(368),
132 K_SIZE_CLASS(400),
133 K_SIZE_CLASS(512),
134 K_SIZE_CLASS(576),
135 K_SIZE_CLASS(768),
136 K_SIZE_CLASS(1024),
137 K_SIZE_CLASS(1152),
138 K_SIZE_CLASS(1280),
139 K_SIZE_CLASS(1664),
140 K_SIZE_CLASS(2048),
141 K_SIZE_CLASS(4096),
142 K_SIZE_CLASS(6144),
143 K_SIZE_CLASS(8192),
144 K_SIZE_CLASS(12288),
145 K_SIZE_CLASS(16384),
146 #if __arm64__
147 K_SIZE_CLASS(24576),
148 K_SIZE_CLASS(32768),
149 #endif /* __arm64__ */
150 };
151
152 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
153
154 /*
155 * kalloc_type callsites are assigned a zone during early boot. They
156 * use the dlut[] (direct lookup table), indexed by size normalized
157 * to the minimum alignment to find the right zone index quickly.
158 */
159 #define INDEX_ZDLUT(size) (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
160 #define KALLOC_DLUT_SIZE (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
161 #define MAX_SIZE_ZDLUT ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
162 static __startup_data uint8_t kalloc_type_dlut[KALLOC_DLUT_SIZE];
163 static __startup_data uint32_t kheap_zsize[KHEAP_NUM_ZONES];
164
165 #if VM_TAG_SIZECLASSES
166 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
167 #endif
168
169 const char * const kalloc_heap_names[] = {
170 [KHEAP_ID_NONE] = "",
171 [KHEAP_ID_EARLY] = "early.",
172 [KHEAP_ID_DATA_BUFFERS] = "data.",
173 [KHEAP_ID_DATA_SHARED] = "data_shared.",
174 [KHEAP_ID_KT_VAR] = "",
175 };
176
177 /*
178 * Early heap configuration
179 */
180 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_EARLY[1] = {
181 {
182 .kh_name = "early.kalloc",
183 .kh_heap_id = KHEAP_ID_EARLY,
184 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE,
185 }
186 };
187
188 /*
189 * Bag of bytes heap configuration
190 */
191 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
192 {
193 .kh_name = "data.kalloc",
194 .kh_heap_id = KHEAP_ID_DATA_BUFFERS,
195 .kh_tag = VM_KERN_MEMORY_KALLOC_DATA,
196 }
197 };
198
199 /*
200 * Configuration of variable kalloc type heaps
201 */
202 SECURITY_READ_ONLY_LATE(struct kheap_info)
203 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
204 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
205 {
206 .kh_name = "kalloc.type.var",
207 .kh_heap_id = KHEAP_ID_KT_VAR,
208 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE
209 }
210 };
211
212 /*
213 * Share heap configuration
214 */
215 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_SHARED[1] = {
216 {
217 .kh_name = "data_shared.kalloc",
218 .kh_heap_id = KHEAP_ID_DATA_SHARED,
219 .kh_tag = VM_KERN_MEMORY_KALLOC_SHARED,
220 }
221 };
222
223 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
224
225 __startup_func
226 static void
kalloc_zsize_compute(void)227 kalloc_zsize_compute(void)
228 {
229 uint32_t step = KHEAP_STEP_START;
230 uint32_t size = KHEAP_START_SIZE;
231
232 /*
233 * Manually initialize extra initial zones
234 */
235 kheap_zsize[0] = size / 2;
236 kheap_zsize[1] = size;
237 static_assert(KHEAP_EXTRA_ZONES == 2);
238
239 /*
240 * Compute sizes for remaining zones
241 */
242 for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
243 uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
244
245 kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
246 kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
247
248 step *= 2;
249 size += step;
250 }
251 }
252
253 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)254 kalloc_zone_for_size_with_flags(
255 zone_id_t zid,
256 vm_size_t size,
257 zalloc_flags_t flags)
258 {
259 vm_size_t max_size = KHEAP_MAX_SIZE;
260 bool forcopyin = flags & Z_MAY_COPYINMAP;
261 zone_t zone;
262
263 if (flags & Z_KALLOC_ARRAY) {
264 size = roundup(size, KALLOC_ARRAY_GRANULE);
265 }
266
267 if (forcopyin) {
268 #if __x86_64__
269 /*
270 * On Intel, the OSData() ABI used to allocate
271 * from the kernel map starting at PAGE_SIZE.
272 *
273 * If only vm_map_copyin() or a wrapper is used,
274 * then everything will work fine because vm_map_copy_t
275 * will perform an actual copy if the data is smaller
276 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
277 *
278 * However, if anyone is trying to call mach_vm_remap(),
279 * then bad things (TM) happen.
280 *
281 * Avoid this by preserving the ABI and moving
282 * to kalloc_large() earlier.
283 *
284 * Any recent code really ought to use IOMemoryDescriptor
285 * for this purpose however.
286 */
287 max_size = PAGE_SIZE - 1;
288 #endif
289 }
290
291 if (size <= max_size) {
292 uint32_t idx;
293
294 if (size <= KHEAP_START_SIZE) {
295 zid += (size > 16);
296 } else {
297 /*
298 * . log2down(size - 1) is log2up(size) - 1
299 * . (size - 1) >> (log2down(size - 1) - 1)
300 * is either 0x2 or 0x3
301 */
302 idx = kalloc_log2down((uint32_t)(size - 1));
303 zid += KHEAP_EXTRA_ZONES +
304 2 * (idx - KHEAP_START_IDX) +
305 ((uint32_t)(size - 1) >> (idx - 1)) - 2;
306 }
307
308 zone = zone_by_id(zid);
309 #if KASAN_CLASSIC
310 /*
311 * Under kasan classic, certain size classes are a redzone
312 * away from the mathematical formula above, and we need
313 * to "go to the next zone".
314 *
315 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
316 * this will never go to an "invalid" zone that doesn't
317 * belong to the kheap.
318 */
319 if (size > zone_elem_inner_size(zone)) {
320 zone++;
321 }
322 #endif
323 return zone;
324 }
325
326 return ZONE_NULL;
327 }
328
329 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)330 kalloc_zone_for_size(zone_id_t zid, size_t size)
331 {
332 return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
333 }
334
335 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)336 kheap_size_from_zone(
337 void *addr,
338 vm_size_t size,
339 zalloc_flags_t flags)
340 {
341 vm_size_t max_size = KHEAP_MAX_SIZE;
342 bool forcopyin = flags & Z_MAY_COPYINMAP;
343
344 #if __x86_64__
345 /*
346 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
347 * behavior, then the element could have a PAGE_SIZE reported size,
348 * yet still be from a zone for Z_MAY_COPYINMAP.
349 */
350 if (forcopyin) {
351 if (size == PAGE_SIZE &&
352 zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
353 return true;
354 }
355
356 max_size = PAGE_SIZE - 1;
357 }
358 #else
359 #pragma unused(addr, forcopyin)
360 #endif
361
362 return size <= max_size;
363 }
364
365 /*
366 * All data zones shouldn't use the early zone. Therefore set the no early alloc
367 * bit right after creation.
368 */
369 __startup_func
370 static void
kalloc_set_no_early_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)371 kalloc_set_no_early_for_data(
372 zone_kheap_id_t kheap_id,
373 zone_stats_t zstats)
374 {
375 if (zone_is_data_kheap(kheap_id)) {
376 zpercpu_foreach(zs, zstats) {
377 os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
378 }
379 }
380 }
381
382 __startup_func
383 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)384 kalloc_zone_init(
385 const char *kheap_name,
386 zone_kheap_id_t kheap_id,
387 zone_id_t *kheap_zstart,
388 zone_create_flags_t zc_flags)
389 {
390 if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
391 zc_flags |= ZC_DATA;
392 }
393
394 if (kheap_id == KHEAP_ID_DATA_SHARED) {
395 zc_flags |= ZC_SHARED_DATA;
396 }
397
398 for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
399 uint32_t size = kheap_zsize[i];
400 char buf[MAX_ZONE_NAME], *z_name;
401 int len;
402
403 len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
404 z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
405 strlcpy(z_name, buf, len + 1);
406
407 (void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
408 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
409 uint32_t scale = kalloc_log2down(size / 32);
410
411 if (size == 32 << scale) {
412 z->z_array_size_class = scale;
413 } else {
414 z->z_array_size_class = scale | 0x10;
415 }
416 #endif
417 zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
418 if (i == 0) {
419 *kheap_zstart = zone_index(z);
420 }
421 kalloc_set_no_early_for_data(kheap_id, z->z_stats);
422 });
423 }
424 }
425
426 __startup_func
427 static void
kalloc_heap_init(struct kalloc_heap * kheap)428 kalloc_heap_init(struct kalloc_heap *kheap)
429 {
430 kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
431 ZC_NONE);
432 /*
433 * Count all the "raw" views for zones in the heap.
434 */
435 zone_view_count += KHEAP_NUM_ZONES;
436 }
437
438 #define KEXT_ALIGN_SHIFT 6
439 #define KEXT_ALIGN_BYTES (1<< KEXT_ALIGN_SHIFT)
440 #define KEXT_ALIGN_MASK (KEXT_ALIGN_BYTES-1)
441 #define kt_scratch_size (256ul << 10)
442 #define KALLOC_TYPE_SECTION(type) \
443 (type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
444
445 /*
446 * Enum to specify the kalloc_type variant being used.
447 */
448 __options_decl(kalloc_type_variant_t, uint16_t, {
449 KTV_FIXED = 0x0001,
450 KTV_VAR = 0x0002,
451 });
452
453 /*
454 * Macros that generate the appropriate kalloc_type variant (i.e fixed or
455 * variable) of the desired variable/function.
456 */
457 #define kalloc_type_var(type, var) \
458 ((type) == KTV_FIXED? \
459 (vm_offset_t) kalloc_type_##var##_fixed: \
460 (vm_offset_t) kalloc_type_##var##_var)
461 #define kalloc_type_func(type, func, ...) \
462 ((type) == KTV_FIXED? \
463 kalloc_type_##func##_fixed(__VA_ARGS__): \
464 kalloc_type_##func##_var(__VA_ARGS__))
465
466 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
467 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
468 ZSECURITY_CONFIG_KT_VAR_BUDGET);
469 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
470 ZSECURITY_CONFIG_KT_BUDGET);
471 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
472 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
473
474
475 /**
476 * @const kexts_enroll_data_shared
477 *
478 * @brief
479 * We have two heaps for data allocations:
480 * - KHEAP_DATA_BUFFERS, which is for allocations that never shared.
481 * - KHEAP_DATA_SHARED, which is for allocations that need to be shared.
482 *
483 * This is a control that indicates which heap we expose to kexts via the
484 * exported allocations functions.
485 */
486 STATIC_IF_KEY_DEFINE_TRUE(kexts_enroll_data_shared);
487
488 /**
489 * @const restricted_data_mode
490 *
491 * @brief
492 * This is a control that sets the mode of mapping policies
493 * enforcement on data allocations:
494 * - none: the state before the change (no telemetry, no enforcement).
495 * - telemetry: do not enforce, do emit telemetry
496 * - enforce: type the KHEAP_DATA_BUFFERS pages as restricted mappings.
497 *
498 * Combined with kexts_enroll_data_shared, we can create the modes we need
499 * for none/telemetry/enforcement on core kernel/kexts.
500 *
501 * restricted_data_mode_t is an enum used to specify the mode being used.
502 */
503
504 __options_decl(restricted_data_mode_t, uint8_t, {
505 RESTRICTED_DATA_MODE_NONE = 0x0000,
506 RESTRICTED_DATA_MODE_TELEMETRY = 0x0001,
507 RESTRICTED_DATA_MODE_ENFORCE = 0x0002
508 });
509
510 TUNABLE(restricted_data_mode_t,
511 restricted_data_mode,
512 "restricted_data_mode",
513 #if __x86_64__
514 RESTRICTED_DATA_MODE_NONE
515 #else
516 RESTRICTED_DATA_MODE_TELEMETRY
517 #endif /* __x86_64__ */
518 );
519
520 inline bool
kalloc_is_restricted_data_mode_telemetry(void)521 kalloc_is_restricted_data_mode_telemetry(void)
522 {
523 return restricted_data_mode == RESTRICTED_DATA_MODE_TELEMETRY;
524 }
525
526 inline bool
kalloc_is_restricted_data_mode_enforced(void)527 kalloc_is_restricted_data_mode_enforced(void)
528 {
529 return restricted_data_mode == RESTRICTED_DATA_MODE_ENFORCE;
530 }
531
532 inline bool
kmem_needs_data_share_range(void)533 kmem_needs_data_share_range(void)
534 {
535 /*
536 * The dedicated range is required only for
537 * telemetry reporting, when we need to distinguish
538 * between the two kind of data via kmem ranges.
539 *
540 * Even though this is strictly like checking telemetry
541 * mode, it's better to have well-defined abstraction layer
542 * for that adopted in all the call-sites, to be flexible
543 * w.r.t future changes / unrolling.
544 */
545 return kalloc_is_restricted_data_mode_telemetry();
546 }
547
548 /*
549 * Section start/end for fixed kalloc_type views
550 */
551 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
552 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
553
554 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
555 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
556
557 /*
558 * Section start/end for variable kalloc_type views
559 */
560 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
561 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
562
563 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
564 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
565
566 __startup_data
567 static kalloc_type_views_t *kt_buffer = NULL;
568 __startup_data
569 static uint64_t kt_count;
570 __startup_data
571 uint32_t kalloc_type_hash_seed;
572
573 __startup_data
574 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
575 __startup_data
576 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
577
578 struct nzones_with_idx {
579 uint16_t nzones;
580 uint16_t idx;
581 };
582 int16_t zone_carry = 0;
583
584 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
585 "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
586
587 /*
588 * For use by lldb to iterate over kalloc types
589 */
590 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
591 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
592 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
593
594 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
595 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
596 KMEM_DIRECTION_MASK),
597 "Insufficient bits to represent range and dir for VM allocations");
598 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
599 "validate idx mask");
600 /* qsort routines */
601 typedef int (*cmpfunc_t)(const void *a, const void *b);
602 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
603
604 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)605 kalloc_type_get_idx(uint32_t kt_size)
606 {
607 return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
608 }
609
610 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)611 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
612 {
613 return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
614 }
615
616 static void
kalloc_type_build_dlut(void)617 kalloc_type_build_dlut(void)
618 {
619 vm_size_t size = 0;
620 for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
621 uint8_t zindex = 0;
622 while (kt_zone_cfg[zindex] < size) {
623 zindex++;
624 }
625 kalloc_type_dlut[i] = zindex;
626 }
627 }
628
629 static uint32_t
kalloc_type_idx_for_size(uint32_t size)630 kalloc_type_idx_for_size(uint32_t size)
631 {
632 assert(size <= KHEAP_MAX_SIZE);
633 uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
634 return kalloc_type_set_idx(size, idx);
635 }
636
637 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t early_zone)638 kalloc_type_assign_zone_fixed(
639 kalloc_type_view_t *cur,
640 kalloc_type_view_t *end,
641 zone_t z,
642 zone_t sig_zone,
643 zone_t early_zone)
644 {
645 /*
646 * Assign the zone created for every kalloc_type_view
647 * of the same unique signature
648 */
649 bool need_raw_view = false;
650
651 while (cur < end) {
652 kalloc_type_view_t kt = *cur;
653 struct zone_view *zv = &kt->kt_zv;
654 zv->zv_zone = z;
655 kalloc_type_flags_t kt_flags = kt->kt_flags;
656 zone_security_flags_t zsflags = zone_security_config(z);
657
658 assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
659 if (!early_zone) {
660 assert(zone_is_data_kheap(zsflags.z_kheap_id));
661 }
662
663 if (kt_flags & KT_SLID) {
664 kt->kt_signature -= vm_kernel_slide;
665 kt->kt_zv.zv_name -= vm_kernel_slide;
666 }
667
668 if ((kt_flags & KT_PRIV_ACCT) ||
669 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
670 zv->zv_stats = zalloc_percpu_permanent_type(
671 struct zone_stats);
672 need_raw_view = true;
673 zone_view_count += 1;
674 } else {
675 zv->zv_stats = z->z_stats;
676 }
677
678 if ((kt_flags & KT_NOEARLY) || !early_zone) {
679 if ((kt_flags & KT_NOEARLY) && !(kt_flags & KT_PRIV_ACCT)) {
680 panic("KT_NOEARLY used w/o private accounting for view %s",
681 zv->zv_name);
682 }
683
684 zpercpu_foreach(zs, zv->zv_stats) {
685 os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
686 }
687 }
688
689 if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
690 kt->kt_zearly = early_zone;
691 kt->kt_zsig = sig_zone;
692 /*
693 * If we haven't yet set the signature equivalance then set it
694 * otherwise validate that the zone has the same signature equivalance
695 * as the sig_zone provided
696 */
697 if (!zone_get_sig_eq(z)) {
698 zone_set_sig_eq(z, zone_index(sig_zone));
699 } else {
700 assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
701 }
702 }
703 zv->zv_next = (zone_view_t) z->z_views;
704 zv->zv_zone->z_views = (zone_view_t) kt;
705 cur++;
706 }
707 if (need_raw_view) {
708 zone_view_count += 1;
709 }
710 }
711
712 __startup_func
713 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)714 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
715 kalloc_type_var_view_t *end, uint32_t heap_idx)
716 {
717 struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
718 while (cur < end) {
719 kalloc_type_var_view_t kt = *cur;
720 kt->kt_heap_start = cfg->kh_zstart;
721 kalloc_type_flags_t kt_flags = kt->kt_flags;
722
723 if (kt_flags & KT_SLID) {
724 if (kt->kt_sig_hdr) {
725 kt->kt_sig_hdr -= vm_kernel_slide;
726 }
727 kt->kt_sig_type -= vm_kernel_slide;
728 kt->kt_name -= vm_kernel_slide;
729 }
730
731 if ((kt_flags & KT_PRIV_ACCT) ||
732 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
733 kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
734 zone_view_count += 1;
735 }
736
737 kt->kt_next = (zone_view_t) cfg->kt_views;
738 cfg->kt_views = kt;
739 cur++;
740 }
741 }
742
743 __startup_func
744 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)745 kalloc_type_slide_fixed(vm_offset_t addr)
746 {
747 kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
748 ktv->kt_signature += vm_kernel_slide;
749 ktv->kt_zv.zv_name += vm_kernel_slide;
750 ktv->kt_flags |= KT_SLID;
751 }
752
753 __startup_func
754 static inline void
kalloc_type_slide_var(vm_offset_t addr)755 kalloc_type_slide_var(vm_offset_t addr)
756 {
757 kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
758 if (ktv->kt_sig_hdr) {
759 ktv->kt_sig_hdr += vm_kernel_slide;
760 }
761 ktv->kt_sig_type += vm_kernel_slide;
762 ktv->kt_name += vm_kernel_slide;
763 ktv->kt_flags |= KT_SLID;
764 }
765
766 __startup_func
767 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)768 kalloc_type_validate_flags(
769 kalloc_type_flags_t kt_flags,
770 const char *kt_name,
771 uuid_string_t kext_uuid)
772 {
773 if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
774 panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
775 "required xnu headers", kt_name, kext_uuid);
776 }
777 }
778
779 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)780 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
781 {
782 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
783 kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
784 return ktv->kt_flags;
785 }
786
787 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)788 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
789 {
790 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
791 kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
792 return ktv->kt_flags;
793 }
794
795 /*
796 * Check if signature of type is made up of only data and padding,
797 * which is meant to never be shared.
798 */
799 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)800 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
801 {
802 assert(kt_flags & KT_CHANGED);
803 return kt_flags & KT_DATA_ONLY;
804 }
805
806 /*
807 * Check if signature of type is made up of only pointers
808 */
809 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)810 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
811 {
812 assert(kt_flags & KT_CHANGED2);
813 return kt_flags & KT_PTR_ARRAY;
814 }
815
816 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)817 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
818 {
819 assert(kt_flags & KT_CHANGED);
820 return kt_flags & KT_VM;
821 }
822
823 __startup_func
824 static inline vm_size_t
kalloc_type_view_sz_fixed(void)825 kalloc_type_view_sz_fixed(void)
826 {
827 return sizeof(struct kalloc_type_view);
828 }
829
830 __startup_func
831 static inline vm_size_t
kalloc_type_view_sz_var(void)832 kalloc_type_view_sz_var(void)
833 {
834 return sizeof(struct kalloc_type_var_view);
835 }
836
837 __startup_func
838 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)839 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
840 vm_offset_t end)
841 {
842 return (end - start) / kalloc_type_func(type, view_sz);
843 }
844
845 __startup_func
846 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)847 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
848 {
849 buffer->ktv_fixed = (kalloc_type_view_t) ktv;
850 }
851
852 __startup_func
853 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)854 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
855 {
856 buffer->ktv_var = (kalloc_type_var_view_t) ktv;
857 }
858
859 __startup_func
860 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)861 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
862 {
863 kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
864 zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
865 cur_data_view->kt_size);
866 kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
867 NULL);
868 }
869
870 __startup_func
871 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)872 kalloc_type_handle_data_view_var(vm_offset_t addr)
873 {
874 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
875 kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
876 }
877
878 __startup_func
879 static void
kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)880 kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)
881 {
882 kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
883 zone_t z = kalloc_zone_for_size(KHEAP_DATA_SHARED->kh_zstart,
884 cur_data_view->kt_size);
885 kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
886 NULL);
887 }
888
889 __startup_func
890 static void
kalloc_type_handle_data_shared_view_var(vm_offset_t addr)891 kalloc_type_handle_data_shared_view_var(vm_offset_t addr)
892 {
893 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
894 kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_SHARED_HEAP);
895 }
896
897 __startup_func
898 static uint32_t
kalloc_type_handle_parray_var(void)899 kalloc_type_handle_parray_var(void)
900 {
901 uint32_t i = 0;
902 kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
903 const char *p_name = kt->kt_name;
904
905 /*
906 * The sorted list of variable kalloc_type_view has pointer arrays at the
907 * beginning. Walk through them and assign a random pointer heap to each
908 * type detected by typename.
909 */
910 while (kalloc_type_is_ptr_array(kt->kt_flags)) {
911 uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
912 const char *c_name = kt->kt_name;
913 uint32_t p_i = i;
914
915 while (strcmp(c_name, p_name) == 0) {
916 i++;
917 kt = kt_buffer[i].ktv_var;
918 c_name = kt->kt_name;
919 }
920 p_name = c_name;
921 kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
922 &kt_buffer[i].ktv_var, heap_id);
923 }
924
925 /*
926 * Returns the the index of the first view that isn't a pointer array
927 */
928 return i;
929 }
930
931 __startup_func
932 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)933 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
934 {
935 /*
936 * Limit range_id to ptr ranges
937 */
938 uint32_t range_id = kmem_adjust_range_id(hash);
939 uint32_t direction = hash & 0x8000;
940 return (range_id | KMEM_HASH_SET | direction) << shift;
941 }
942
943 __startup_func
944 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)945 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
946 kalloc_type_flags_t *kt_flags)
947 {
948 uint32_t hash = 0;
949
950 assert(sig_ty != NULL);
951 hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
952 kalloc_type_hash_seed);
953 if (sig_hdr) {
954 hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
955 }
956 os_hash_jenkins_finish(hash);
957 hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
958
959 *kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
960 }
961
962 __startup_func
963 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)964 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
965 {
966 /*
967 * Use backtraces on fixed as we don't have signatures for types that go
968 * to the VM due to rdar://85182551.
969 */
970 (void) addr;
971 }
972
973 __startup_func
974 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)975 kalloc_type_set_type_hash_var(vm_offset_t addr)
976 {
977 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
978 kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
979 &ktv->kt_flags);
980 }
981
982 __startup_func
983 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)984 kalloc_type_mark_processed_fixed(vm_offset_t addr)
985 {
986 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
987 ktv->kt_flags |= KT_PROCESSED;
988 }
989
990 __startup_func
991 static void
kalloc_type_mark_processed_var(vm_offset_t addr)992 kalloc_type_mark_processed_var(vm_offset_t addr)
993 {
994 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
995 ktv->kt_flags |= KT_PROCESSED;
996 }
997
998 __startup_func
999 static void
kalloc_type_update_view_fixed(vm_offset_t addr)1000 kalloc_type_update_view_fixed(vm_offset_t addr)
1001 {
1002 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
1003 ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
1004 }
1005
1006 __startup_func
1007 static void
kalloc_type_update_view_var(vm_offset_t addr)1008 kalloc_type_update_view_var(vm_offset_t addr)
1009 {
1010 (void) addr;
1011 }
1012
1013 __startup_func
1014 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)1015 kalloc_type_view_copy(
1016 const kalloc_type_variant_t type,
1017 vm_offset_t start,
1018 vm_offset_t end,
1019 uint64_t *cur_count,
1020 bool slide,
1021 uuid_string_t kext_uuid)
1022 {
1023 uint64_t count = kalloc_type_view_count(type, start, end);
1024 if (count + *cur_count >= kt_count) {
1025 panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
1026 }
1027 vm_offset_t cur = start;
1028 while (cur < end) {
1029 if (slide) {
1030 kalloc_type_func(type, slide, cur);
1031 }
1032 kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
1033 kext_uuid);
1034 kalloc_type_func(type, mark_processed, cur);
1035 /*
1036 * Skip views that go to the VM
1037 */
1038 if (kalloc_type_from_vm(kt_flags)) {
1039 cur += kalloc_type_func(type, view_sz);
1040 continue;
1041 }
1042
1043 /*
1044 * Check if the signature indicates that the entire allocation is data.
1045 *
1046 * Note that KT_VAR_DATA_HEAP is fake "data" heap, variable kalloc_type handles
1047 * the actual redirection in the entry points kalloc/kfree_type_var_impl.
1048 */
1049 if (kalloc_type_is_data(kt_flags)) {
1050 kalloc_type_func(type, handle_data_view, cur);
1051 cur += kalloc_type_func(type, view_sz);
1052 continue;
1053 }
1054
1055 /*
1056 * Set type hash that is used by kmem_*_guard
1057 */
1058 kalloc_type_func(type, set_type_hash, cur);
1059 kalloc_type_func(type, update_view, cur);
1060 kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
1061 cur += kalloc_type_func(type, view_sz);
1062 *cur_count = *cur_count + 1;
1063 }
1064 }
1065
1066 __startup_func
1067 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)1068 kalloc_type_view_parse(const kalloc_type_variant_t type)
1069 {
1070 kc_format_t kc_format;
1071 uint64_t cur_count = 0;
1072
1073 if (!PE_get_primary_kc_format(&kc_format)) {
1074 panic("kalloc_type_view_parse: wasn't able to determine kc format");
1075 }
1076
1077 if (kc_format == KCFormatStatic) {
1078 /*
1079 * If kc is static or KCGEN, __kalloc_type sections from kexts and
1080 * xnu are coalesced.
1081 */
1082 kalloc_type_view_copy(type,
1083 kalloc_type_var(type, sec_start),
1084 kalloc_type_var(type, sec_end),
1085 &cur_count, false, NULL);
1086 } else if (kc_format == KCFormatFileset) {
1087 /*
1088 * If kc uses filesets, traverse __kalloc_type section for each
1089 * macho in the BootKC.
1090 */
1091 kernel_mach_header_t *kc_mh = NULL;
1092 kernel_mach_header_t *kext_mh = NULL;
1093
1094 kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1095 struct load_command *lc =
1096 (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1097 for (uint32_t i = 0; i < kc_mh->ncmds;
1098 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1099 if (lc->cmd != LC_FILESET_ENTRY) {
1100 continue;
1101 }
1102 struct fileset_entry_command *fse =
1103 (struct fileset_entry_command *)(vm_offset_t)lc;
1104 kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1105 kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1106 kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1107 if (sect != NULL) {
1108 unsigned long uuidlen = 0;
1109 void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
1110 uuid_string_t kext_uuid_str;
1111 if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
1112 uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1113 }
1114 kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1115 &cur_count, false, kext_uuid_str);
1116 }
1117 }
1118 } else if (kc_format == KCFormatKCGEN) {
1119 /*
1120 * Parse __kalloc_type section from xnu
1121 */
1122 kalloc_type_view_copy(type,
1123 kalloc_type_var(type, sec_start),
1124 kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1125
1126 #ifndef __BUILDING_XNU_LIB_UNITTEST__ /* no kexts in unit-test */
1127 /*
1128 * Parse __kalloc_type section for kexts
1129 *
1130 * Note: We don't process the kalloc_type_views for kexts on armv7
1131 * as this platform has insufficient memory for type based
1132 * segregation. kalloc_type_impl_external will direct callsites
1133 * based on their size.
1134 */
1135 kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1136 vm_offset_t cur = 0;
1137 vm_offset_t end = 0;
1138
1139 /*
1140 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1141 * and traverse it.
1142 */
1143 kernel_section_t *prelink_sect = getsectbynamefromheader(
1144 xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1145 assert(prelink_sect);
1146 cur = prelink_sect->addr;
1147 end = prelink_sect->addr + prelink_sect->size;
1148
1149 while (cur < end) {
1150 uint64_t kext_text_sz = 0;
1151 kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1152
1153 if (kext_mh->magic == 0) {
1154 /*
1155 * Assert that we have processed all kexts and all that is left
1156 * is padding
1157 */
1158 assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1159 break;
1160 } else if (kext_mh->magic != MH_MAGIC_64 &&
1161 kext_mh->magic != MH_CIGAM_64) {
1162 panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1163 cur);
1164 }
1165
1166 /*
1167 * Kext macho found, iterate through its segments
1168 */
1169 struct load_command *lc =
1170 (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1171 bool isSplitKext = false;
1172
1173 for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1174 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1175 if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1176 isSplitKext = true;
1177 continue;
1178 } else if (lc->cmd != LC_SEGMENT_64) {
1179 continue;
1180 }
1181
1182 kernel_segment_command_t *seg_cmd =
1183 (struct segment_command_64 *)(vm_offset_t)lc;
1184 /*
1185 * Parse kalloc_type section
1186 */
1187 if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1188 kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1189 KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1190 if (kt_sect) {
1191 kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1192 kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1193 true, NULL);
1194 }
1195 }
1196 /*
1197 * If the kext has a __TEXT segment, that is the only thing that
1198 * will be in the special __PRELINK_TEXT KC segment, so the next
1199 * macho is right after.
1200 */
1201 if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1202 kext_text_sz = seg_cmd->filesize;
1203 }
1204 }
1205 /*
1206 * If the kext did not have a __TEXT segment (special xnu kexts with
1207 * only a __LINKEDIT segment) then the next macho will be after all the
1208 * header commands.
1209 */
1210 if (!kext_text_sz) {
1211 kext_text_sz = kext_mh->sizeofcmds;
1212 } else if (!isSplitKext) {
1213 panic("kalloc_type_view_parse: No support for non-split seg KCs");
1214 break;
1215 }
1216
1217 cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1218 }
1219 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1220 } else {
1221 /*
1222 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1223 * parsing kalloc_type_view structs during startup.
1224 */
1225 panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1226 " for kc_format = %d\n", kc_format);
1227 }
1228 return cur_count;
1229 }
1230
1231 __startup_func
1232 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1233 kalloc_type_cmp_fixed(const void *a, const void *b)
1234 {
1235 const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1236 const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1237
1238 const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1239 const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1240 /*
1241 * If the kalloc_type_views are in the same kalloc bucket, sort by
1242 * signature else sort by size
1243 */
1244 if (idxA == idxB) {
1245 int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1246 /*
1247 * If the kalloc_type_views have the same signature sort by site
1248 * name
1249 */
1250 if (result == 0) {
1251 return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1252 }
1253 return result;
1254 }
1255 const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1256 const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1257 return (int)(sizeA - sizeB);
1258 }
1259
1260 __startup_func
1261 static int
kalloc_type_cmp_var(const void * a,const void * b)1262 kalloc_type_cmp_var(const void *a, const void *b)
1263 {
1264 const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1265 const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1266 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1267 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1268 bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1269 bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1270 int result = 0;
1271
1272 /*
1273 * Switched around (B - A) because we want the pointer arrays to be at the
1274 * top
1275 */
1276 result = ktB_ptrArray - ktA_ptrArray;
1277 if (result == 0) {
1278 result = strcmp(ktA_hdr, ktB_hdr);
1279 if (result == 0) {
1280 result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1281 if (result == 0) {
1282 result = strcmp(ktA->kt_name, ktB->kt_name);
1283 }
1284 }
1285 }
1286 return result;
1287 }
1288
1289 __startup_func
1290 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1291 kalloc_type_create_iterators_fixed(
1292 uint16_t *kt_skip_list_start,
1293 uint64_t count)
1294 {
1295 uint16_t *kt_skip_list = kt_skip_list_start;
1296 uint16_t p_idx = UINT16_MAX; /* previous size idx */
1297 uint16_t c_idx = 0; /* current size idx */
1298 uint16_t unique_sig = 0;
1299 uint16_t total_sig = 0;
1300 const char *p_sig = NULL;
1301 const char *p_name = "";
1302 const char *c_sig = NULL;
1303 const char *c_name = NULL;
1304
1305 /*
1306 * Walk over each kalloc_type_view
1307 */
1308 for (uint16_t i = 0; i < count; i++) {
1309 kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1310
1311 c_idx = kalloc_type_get_idx(kt->kt_size);
1312 c_sig = kt->kt_signature;
1313 c_name = kt->kt_zv.zv_name;
1314 /*
1315 * When current kalloc_type_view is in a different kalloc size
1316 * bucket than the previous, it means we have processed all in
1317 * the previous size bucket, so store the accumulated values
1318 * and advance the indices.
1319 */
1320 if (p_idx == UINT16_MAX || c_idx != p_idx) {
1321 /*
1322 * Updates for frequency lists
1323 */
1324 if (p_idx != UINT16_MAX) {
1325 kt_freq_list[p_idx] = unique_sig;
1326 kt_freq_list_total[p_idx] = total_sig - unique_sig;
1327 }
1328 unique_sig = 1;
1329 total_sig = 1;
1330
1331 p_idx = c_idx;
1332 p_sig = c_sig;
1333 p_name = c_name;
1334
1335 /*
1336 * Updates to signature skip list
1337 */
1338 *kt_skip_list = i;
1339 kt_skip_list++;
1340
1341 continue;
1342 }
1343
1344 /*
1345 * When current kalloc_type_views is in the kalloc size bucket as
1346 * previous, analyze the siganture to see if it is unique.
1347 *
1348 * Signatures are collapsible if one is a substring of the next.
1349 */
1350 if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1351 /*
1352 * Unique signature detected. Update counts and advance index
1353 */
1354 unique_sig++;
1355 total_sig++;
1356
1357 *kt_skip_list = i;
1358 kt_skip_list++;
1359 p_sig = c_sig;
1360 p_name = c_name;
1361 continue;
1362 }
1363 /*
1364 * Need this here as we do substring matching for signatures so you
1365 * want to track the longer signature seen rather than the substring
1366 */
1367 p_sig = c_sig;
1368
1369 /*
1370 * Check if current kalloc_type_view corresponds to a new type
1371 */
1372 if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1373 total_sig++;
1374 p_name = c_name;
1375 }
1376 }
1377 /*
1378 * Final update
1379 */
1380 assert(c_idx == p_idx);
1381 assert(kt_freq_list[c_idx] == 0);
1382 kt_freq_list[c_idx] = unique_sig;
1383 kt_freq_list_total[c_idx] = total_sig - unique_sig;
1384 *kt_skip_list = (uint16_t) count;
1385
1386 return ++kt_skip_list;
1387 }
1388
1389 __startup_func
1390 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1391 kalloc_type_create_iterators_var(
1392 uint32_t *kt_skip_list_start,
1393 uint32_t buf_start)
1394 {
1395 uint32_t *kt_skip_list = kt_skip_list_start;
1396 uint32_t n = 0;
1397
1398 kt_skip_list[n] = buf_start;
1399 assert(kt_count > buf_start + 1);
1400 for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1401 kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1402 kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1403 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1404 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1405 assert(ktA->kt_sig_type != NULL);
1406 assert(ktB->kt_sig_type != NULL);
1407 if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1408 strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1409 n++;
1410 kt_skip_list[n] = i;
1411 }
1412 }
1413 /*
1414 * Final update
1415 */
1416 n++;
1417 kt_skip_list[n] = (uint32_t) kt_count;
1418 return n;
1419 }
1420
1421 __startup_func
1422 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1423 kalloc_type_distribute_budget(
1424 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1425 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1426 uint16_t zone_budget,
1427 uint16_t min_zones_per_size)
1428 {
1429 uint16_t total_sig = 0;
1430 uint16_t min_sig = 0;
1431 uint16_t assigned_zones = 0;
1432 uint16_t remaining_zones = zone_budget;
1433 uint16_t modulo = 0;
1434
1435 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1436 uint16_t sig_freq = freq_list[i];
1437 uint16_t min_zones = min_zones_per_size;
1438
1439 if (sig_freq < min_zones_per_size) {
1440 min_zones = sig_freq;
1441 }
1442 total_sig += sig_freq;
1443 kt_zones[i] = min_zones;
1444 min_sig += min_zones;
1445 }
1446 if (remaining_zones > total_sig) {
1447 remaining_zones = total_sig;
1448 }
1449 assert(remaining_zones >= min_sig);
1450 remaining_zones -= min_sig;
1451 total_sig -= min_sig;
1452 assigned_zones += min_sig;
1453
1454 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1455 uint16_t freq = freq_list[i];
1456
1457 if (freq < min_zones_per_size) {
1458 continue;
1459 }
1460 uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1461 uint16_t n_zones = (uint16_t) numer / total_sig;
1462
1463 /*
1464 * Accumulate remainder and increment n_zones when it goes above
1465 * denominator
1466 */
1467 modulo += numer % total_sig;
1468 if (modulo >= total_sig) {
1469 n_zones++;
1470 modulo -= total_sig;
1471 }
1472
1473 /*
1474 * Cap the total number of zones to the unique signatures
1475 */
1476 if ((n_zones + min_zones_per_size) > freq) {
1477 uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1478 modulo += (extra_zones * total_sig);
1479 n_zones -= extra_zones;
1480 }
1481 kt_zones[i] += n_zones;
1482 assigned_zones += n_zones;
1483 }
1484
1485 if (kt_options & KT_OPTIONS_DEBUG) {
1486 printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1487 assigned_zones, remaining_zones + min_sig - assigned_zones);
1488 }
1489 return remaining_zones + min_sig - assigned_zones;
1490 }
1491
1492 __startup_func
1493 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1494 kalloc_type_cmp_type_zones(const void *a, const void *b)
1495 {
1496 const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1497 const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1498
1499 return (int)(B.nzones - A.nzones);
1500 }
1501
1502 __startup_func
1503 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1504 kalloc_type_redistribute_budget(
1505 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1506 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1507 {
1508 uint16_t count = 0, cur_count = 0;
1509 struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1510 uint16_t top_zone_total = 0;
1511
1512 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1513 uint16_t zones = kt_zones[i];
1514
1515 /*
1516 * If a sizeclass got no zones but has types to divide make a note
1517 * of it
1518 */
1519 if (zones == 0 && (freq_total_list[i] != 0)) {
1520 count++;
1521 }
1522
1523 sorted_zones[i].nzones = kt_zones[i];
1524 sorted_zones[i].idx = i;
1525 }
1526
1527 qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1528 sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1529
1530 for (uint16_t i = 0; i < 3; i++) {
1531 top_zone_total += sorted_zones[i].nzones;
1532 }
1533
1534 /*
1535 * Borrow zones from the top 3 sizeclasses and redistribute to those
1536 * that didn't get a zone but that types to divide
1537 */
1538 cur_count = count;
1539 for (uint16_t i = 0; i < 3; i++) {
1540 uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1541 uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1542
1543 if (zone_borrow > (zone_available / 2)) {
1544 zone_borrow = zone_available / 2;
1545 }
1546 kt_zones[sorted_zones[i].idx] -= zone_borrow;
1547 cur_count -= zone_borrow;
1548 }
1549
1550 for (uint16_t i = 0; i < 3; i++) {
1551 if (cur_count == 0) {
1552 break;
1553 }
1554 kt_zones[sorted_zones[i].idx]--;
1555 cur_count--;
1556 }
1557
1558 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1559 if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1560 (count > cur_count)) {
1561 kt_zones[i]++;
1562 count--;
1563 }
1564 }
1565 }
1566
1567 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1568 kalloc_type_apply_policy(
1569 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1570 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1571 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1572 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1573 uint16_t zone_budget)
1574 {
1575 uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1576 uint16_t zbudget_type = zone_budget - zbudget_sig;
1577 uint16_t wasted_zones = 0;
1578
1579 #if DEBUG || DEVELOPMENT
1580 if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1581 __assert_only uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1582 assert(zone_budget + current_zones <= MAX_ZONES);
1583 }
1584 #endif
1585
1586 wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1587 zbudget_sig, 2);
1588 wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1589 kt_zones_type, zbudget_type, 0);
1590 kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1591
1592 /*
1593 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1594 */
1595 if (kt_options & KT_OPTIONS_DEBUG) {
1596 printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1597 "zones_type\n");
1598 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1599 printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1600 freq_total_list[i] + freq_list[i], freq_list[i],
1601 kt_zones_sig[i] + kt_zones_type[i],
1602 kt_zones_sig[i], kt_zones_type[i]);
1603 }
1604 }
1605
1606 return wasted_zones;
1607 }
1608
1609
1610 __startup_func
1611 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1612 kalloc_type_create_zone_for_size(
1613 zone_t *kt_zones_for_size,
1614 uint16_t kt_zones,
1615 vm_size_t z_size)
1616 {
1617 zone_t p_zone = NULL;
1618 char *z_name = NULL;
1619 zone_t shared_z = NULL;
1620
1621 for (uint16_t i = 0; i < kt_zones; i++) {
1622 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1623 snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1624 (size_t) z_size);
1625 zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1626 if (i != 0) {
1627 p_zone->z_kt_next = z;
1628 }
1629 p_zone = z;
1630 kt_zones_for_size[i] = z;
1631 }
1632 /*
1633 * Create shared zone for sizeclass if it doesn't already exist
1634 */
1635 if (kt_shared_fixed) {
1636 shared_z = kalloc_zone_for_size(KHEAP_EARLY->kh_zstart, z_size);
1637 if (zone_elem_inner_size(shared_z) != z_size) {
1638 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1639 snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1640 (size_t) z_size);
1641 shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1642 ^(zone_t zone){
1643 zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_EARLY;
1644 });
1645 }
1646 }
1647 kt_zones_for_size[kt_zones] = shared_z;
1648 }
1649
1650 __startup_func
1651 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1652 kalloc_type_zones_for_type(
1653 uint16_t zones_total_type,
1654 uint16_t unique_types,
1655 uint16_t total_types,
1656 bool last_sig)
1657 {
1658 uint16_t zones_for_type = 0, n_mod = 0;
1659
1660 if (zones_total_type == 0) {
1661 return 0;
1662 }
1663
1664 zones_for_type = (zones_total_type * unique_types) / total_types;
1665 n_mod = (zones_total_type * unique_types) % total_types;
1666 zone_carry += n_mod;
1667
1668 /*
1669 * Drain carry opportunistically
1670 */
1671 if (((unique_types > 3) && (zone_carry > 0)) ||
1672 (zone_carry >= (int) total_types) ||
1673 (last_sig && (zone_carry > 0))) {
1674 zone_carry -= total_types;
1675 zones_for_type++;
1676 }
1677
1678 if (last_sig) {
1679 assert(zone_carry == 0);
1680 }
1681
1682 return zones_for_type;
1683 }
1684
1685 __startup_func
1686 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1687 kalloc_type_build_skip_list(
1688 kalloc_type_view_t *start,
1689 kalloc_type_view_t *end,
1690 uint16_t *kt_skip_list)
1691 {
1692 kalloc_type_view_t *cur = start;
1693 kalloc_type_view_t prev = *start;
1694 uint16_t i = 0, idx = 0;
1695
1696 kt_skip_list[idx] = i;
1697 idx++;
1698
1699 while (cur < end) {
1700 kalloc_type_view_t kt_cur = *cur;
1701
1702 if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1703 kt_skip_list[idx] = i;
1704
1705 prev = kt_cur;
1706 idx++;
1707 }
1708 i++;
1709 cur++;
1710 }
1711
1712 /*
1713 * Final update
1714 */
1715 kt_skip_list[idx] = i;
1716 return idx;
1717 }
1718
1719 __startup_func
1720 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1721 kalloc_type_init_sig_eq(
1722 zone_t *zones,
1723 uint16_t n_zones,
1724 zone_t sig_zone)
1725 {
1726 for (uint16_t i = 0; i < n_zones; i++) {
1727 zone_t z = zones[i];
1728
1729 assert(!zone_get_sig_eq(z));
1730 zone_set_sig_eq(z, zone_index(sig_zone));
1731 }
1732 }
1733
1734 #ifndef __BUILDING_XNU_LIB_UNITTEST__
1735 #define KT_ZONES_FOR_SIZE_SIZE 32
1736 #else /* __BUILDING_XNU_LIB_UNITTEST__ */
1737 /* different init sequence in unit-test requires a bigger buffer in the kalloc zones initialization */
1738 #define KT_ZONES_FOR_SIZE_SIZE 35
1739 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1740
1741 __startup_func
1742 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],uint16_t type_zones_start,zone_t sig_zone,zone_t early_zone)1743 kalloc_type_distribute_zone_for_type(
1744 kalloc_type_view_t *start,
1745 kalloc_type_view_t *end,
1746 bool last_sig,
1747 uint16_t zones_total_type,
1748 uint16_t total_types,
1749 uint16_t *kt_skip_list,
1750 zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],
1751 uint16_t type_zones_start,
1752 zone_t sig_zone,
1753 zone_t early_zone)
1754 {
1755 uint16_t count = 0, n_zones = 0;
1756 uint16_t *shuffle_buf = NULL;
1757 zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1758
1759 /*
1760 * Assert there is space in buffer
1761 */
1762 count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1763 n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1764 last_sig);
1765 shuffle_buf = &kt_skip_list[count + 1];
1766
1767 /*
1768 * Initalize signature equivalence zone for type zones
1769 */
1770 kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1771
1772 if (n_zones == 0) {
1773 kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1774 early_zone);
1775 return n_zones;
1776 }
1777
1778 /*
1779 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1780 */
1781 if (count == 1) {
1782 kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1783 early_zone);
1784 return n_zones;
1785 }
1786
1787 /*
1788 * Add the signature based zone to n_zones
1789 */
1790 n_zones++;
1791
1792 for (uint16_t i = 0; i < count; i++) {
1793 uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1794 uint16_t type_start = kt_skip_list[i];
1795 kalloc_type_view_t *kt_type_start = &start[type_start];
1796 uint16_t type_end = kt_skip_list[i + 1];
1797 kalloc_type_view_t *kt_type_end = &start[type_end];
1798 zone_t zone;
1799
1800 if (zidx == 0) {
1801 kmem_shuffle(shuffle_buf, n_zones);
1802 }
1803
1804 shuffled_zidx = shuffle_buf[zidx];
1805 zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1806 kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1807 early_zone);
1808 }
1809
1810 return n_zones - 1;
1811 }
1812
1813 __startup_func
1814 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1815 kalloc_type_create_zones_fixed(
1816 uint16_t *kt_skip_list_start,
1817 uint16_t *kt_shuffle_buf)
1818 {
1819 uint16_t *kt_skip_list = kt_skip_list_start;
1820 uint16_t p_j = 0;
1821 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1822 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1823 #if DEBUG || DEVELOPMENT
1824 __assert_only uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1825 (vm_address_t) kt_buffer) / sizeof(uint16_t);
1826 #endif
1827 /*
1828 * Apply policy to determine how many zones to create for each size
1829 * class.
1830 */
1831 kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1832 kt_zones_sig, kt_zones_type, kt_fixed_zones);
1833
1834 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1835 uint16_t n_unique_sig = kt_freq_list[i];
1836 vm_size_t z_size = kt_zone_cfg[i];
1837 uint16_t n_zones_sig = kt_zones_sig[i];
1838 uint16_t n_zones_type = kt_zones_type[i];
1839 uint16_t total_types = kt_freq_list_total[i];
1840 uint16_t type_zones_used = 0;
1841
1842 if (n_unique_sig == 0) {
1843 continue;
1844 }
1845
1846 zone_carry = 0;
1847 assert(n_zones_sig + n_zones_type + 1 <= KT_ZONES_FOR_SIZE_SIZE);
1848 zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE] = {};
1849 kalloc_type_create_zone_for_size(kt_zones_for_size,
1850 n_zones_sig + n_zones_type, z_size);
1851
1852 kalloc_type_zarray[i] = kt_zones_for_size[0];
1853 /*
1854 * Ensure that there is enough space to shuffle n_unique_sig
1855 * indices
1856 */
1857 assert(n_unique_sig < kt_shuffle_count);
1858
1859 /*
1860 * Get a shuffled set of signature indices
1861 */
1862 *kt_shuffle_buf = 0;
1863 if (n_unique_sig > 1) {
1864 kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1865 }
1866
1867 for (uint16_t j = 0; j < n_zones_sig; j++) {
1868 zone_t *z_ptr = &kt_zones_for_size[j];
1869
1870 kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1871 }
1872
1873 for (uint16_t j = 0; j < n_unique_sig; j++) {
1874 /*
1875 * For every size that has unique types
1876 */
1877 uint16_t shuffle_idx = kt_shuffle_buf[j];
1878 uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1879 uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1880 zone_t zone = kt_zones_for_size[j % n_zones_sig];
1881 zone_t early_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1882 bool last_sig;
1883
1884 last_sig = (j == (n_unique_sig - 1)) ? true : false;
1885 type_zones_used += kalloc_type_distribute_zone_for_type(
1886 &kt_buffer[cur].ktv_fixed,
1887 &kt_buffer[end].ktv_fixed, last_sig,
1888 n_zones_type, total_types + n_unique_sig,
1889 &kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1890 n_zones_sig + type_zones_used, zone, early_zone);
1891 }
1892 assert(type_zones_used <= n_zones_type);
1893 p_j += n_unique_sig;
1894 }
1895 }
1896
1897 __startup_func
1898 static void
kalloc_type_view_init_fixed(void)1899 kalloc_type_view_init_fixed(void)
1900 {
1901 kalloc_type_hash_seed = (uint32_t) early_random();
1902 kalloc_type_build_dlut();
1903 /*
1904 * Parse __kalloc_type sections and build array of pointers to
1905 * all kalloc type views in kt_buffer.
1906 */
1907 kt_count = kalloc_type_view_parse(KTV_FIXED);
1908 assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1909
1910 #if MACH_ASSERT
1911 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1912 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1913 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1914 #endif
1915
1916 /*
1917 * Sort based on size class and signature
1918 */
1919 qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1920 kalloc_type_cmp_fixed);
1921
1922 /*
1923 * Build a skip list that holds starts of unique signatures and a
1924 * frequency list of number of unique and total signatures per kalloc
1925 * size class
1926 */
1927 uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1928 uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1929 kt_skip_list_start, kt_count);
1930
1931 /*
1932 * Create zones based on signatures
1933 */
1934 kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1935 }
1936
1937 __startup_func
1938 static void
kalloc_type_heap_init(void)1939 kalloc_type_heap_init(void)
1940 {
1941 assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1942 char kh_name[MAX_ZONE_NAME];
1943 uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1944
1945 for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1946 snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1947 kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1948 &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1949 }
1950 /*
1951 * All variable kalloc type allocations are collapsed into a single
1952 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1953 */
1954 KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1955 zone_view_count += 1;
1956 }
1957
1958 __startup_func
1959 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1960 kalloc_type_assign_heap(
1961 uint32_t start,
1962 uint32_t end,
1963 uint32_t heap_id)
1964 {
1965 bool use_split = kmem_get_random16(1);
1966
1967 if (use_split) {
1968 heap_id = kt_var_heaps;
1969 }
1970 kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1971 &kt_buffer[end].ktv_var, heap_id);
1972 }
1973
1974 __startup_func
1975 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1976 kalloc_type_split_heap(
1977 uint32_t start,
1978 uint32_t end,
1979 uint32_t heap_id)
1980 {
1981 uint32_t count = start;
1982 const char *p_name = NULL;
1983
1984 while (count < end) {
1985 kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1986 const char *c_name = cur->kt_name;
1987
1988 if (!p_name) {
1989 assert(count == start);
1990 p_name = c_name;
1991 }
1992 if (strcmp(c_name, p_name) != 0) {
1993 kalloc_type_assign_heap(start, count, heap_id);
1994 start = count;
1995 p_name = c_name;
1996 }
1997 count++;
1998 }
1999 kalloc_type_assign_heap(start, end, heap_id);
2000 }
2001
2002 __startup_func
2003 static void
kalloc_type_view_init_var(void)2004 kalloc_type_view_init_var(void)
2005 {
2006 uint32_t buf_start = 0, unique_sig = 0;
2007 uint32_t *kt_skip_list_start;
2008 uint16_t *shuffle_buf;
2009 uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
2010 uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
2011 /*
2012 * Pick a random heap to split
2013 */
2014 uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
2015
2016 /*
2017 * Zones are created prior to parsing the views as zone budget is fixed
2018 * per sizeclass and special types identified while parsing are redirected
2019 * as they are discovered.
2020 */
2021 kalloc_type_heap_init();
2022
2023 /*
2024 * Parse __kalloc_var sections and build array of pointers to views that
2025 * aren't rediected in kt_buffer.
2026 */
2027 kt_count = kalloc_type_view_parse(KTV_VAR);
2028 assert(kt_count < UINT32_MAX);
2029
2030 #if MACH_ASSERT
2031 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
2032 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
2033 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
2034 #endif
2035
2036 /*
2037 * Sort based on size class and signature
2038 */
2039 qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
2040 kalloc_type_cmp_var);
2041
2042 buf_start = kalloc_type_handle_parray_var();
2043
2044 /*
2045 * Build a skip list that holds starts of unique signatures
2046 */
2047 kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
2048 unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
2049 buf_start);
2050 shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
2051 /*
2052 * If we have only one heap then other elements share heap with pointer
2053 * arrays
2054 */
2055 if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
2056 panic("kt_var_heaps is too small");
2057 }
2058
2059 kmem_shuffle(shuffle_buf, flex_heap_count);
2060 /*
2061 * The index of the heap we decide to split is placed twice in the shuffle
2062 * buffer so that it gets twice the number of signatures that we split
2063 * evenly
2064 */
2065 shuffle_buf[flex_heap_count] = split_heap;
2066 split_heap += (fixed_heaps + 1);
2067
2068 for (uint32_t i = 1; i <= unique_sig; i++) {
2069 uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
2070 fixed_heaps + 1;
2071 uint32_t start = kt_skip_list_start[i - 1];
2072 uint32_t end = kt_skip_list_start[i];
2073
2074 assert(heap_id <= kt_var_heaps);
2075 if (heap_id == split_heap) {
2076 kalloc_type_split_heap(start, end, heap_id);
2077 continue;
2078 }
2079 kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
2080 &kt_buffer[end].ktv_var, heap_id);
2081 }
2082 }
2083
2084 __startup_func
2085 static void
kalloc_init(void)2086 kalloc_init(void)
2087 {
2088 /*
2089 * Allocate scratch space to parse kalloc_type_views and create
2090 * other structures necessary to process them.
2091 */
2092 uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
2093
2094 static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
2095 kalloc_zsize_compute();
2096
2097 /* Initialize kalloc data buffers heap */
2098 kalloc_heap_init(KHEAP_DATA_BUFFERS);
2099
2100 /* Initialize kalloc shared data buffers heap */
2101 kalloc_heap_init(KHEAP_DATA_SHARED);
2102
2103 /* Initialize kalloc shared buffers heap */
2104 kalloc_heap_init(KHEAP_EARLY);
2105
2106 kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
2107 KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT | KMA_SPRAYQTN, VM_KERN_MEMORY_KALLOC);
2108
2109 /*
2110 * Handle fixed size views
2111 */
2112 kalloc_type_view_init_fixed();
2113
2114 /*
2115 * Reset
2116 */
2117 bzero(kt_buffer, kt_scratch_size);
2118 kt_count = max_count;
2119
2120 /*
2121 * Handle variable size views
2122 */
2123 kalloc_type_view_init_var();
2124
2125 /*
2126 * Free resources used
2127 */
2128 kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2129 }
2130 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2131
2132 #pragma mark accessors
2133
2134 #define KFREE_ABSURD_SIZE \
2135 ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2136
2137 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2138 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2139 {
2140 thread_t thr = current_thread();
2141 ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2142 }
2143
2144 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2145 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2146 {
2147 thread_t thr = current_thread();
2148 ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2149 }
2150
2151 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2152 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2153 {
2154 kmem_guard_t guard = {
2155 .kmg_atomic = true,
2156 .kmg_tag = tag,
2157 .kmg_type_hash = type_hash,
2158 .kmg_context = os_hash_kernel_pointer(owner),
2159 };
2160
2161 /*
2162 * TODO: this use is really not sufficiently smart.
2163 */
2164
2165 return guard;
2166 }
2167
2168 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
2169
2170 #if __arm64e__
2171 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2172
2173 /*
2174 * Zone encoding is:
2175 *
2176 * <PAC SIG><1><1><PTR value><5 bits of size class>
2177 *
2178 * VM encoding is:
2179 *
2180 * <PAC SIG><1><0><PTR value><14 bits of page count>
2181 *
2182 * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2183 * so that PAC authentication extends the proper sign bit.
2184 */
2185
2186 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2187 #else /* __arm64e__ */
2188 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2189
2190 /*
2191 * Zone encoding is:
2192 *
2193 * <TBI><1><PTR value><5 bits of size class>
2194 *
2195 * VM encoding is:
2196 *
2197 * <TBI><0><PTR value><14 bits of page count>
2198 */
2199
2200 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2201 #endif /* __arm64e__*/
2202
2203 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2204
2205 __attribute__((always_inline))
2206 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2207 __kalloc_array_decode(vm_address_t ptr)
2208 {
2209 struct kalloc_result kr;
2210 vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2211
2212 if (ptr & zone_mask) {
2213 kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2214 ptr &= ~0x1full;
2215 } else if (__probable(ptr)) {
2216 kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2217 ptr &= ~PAGE_MASK;
2218 ptr |= zone_mask;
2219 } else {
2220 kr.size = 0;
2221 }
2222
2223 kr.addr = (void *)ptr;
2224 return kr;
2225 }
2226
2227 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2228 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2229 {
2230 return (void *)((vm_address_t)ptr | z->z_array_size_class);
2231 }
2232
2233 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2234 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2235 {
2236 addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2237
2238 return addr | atop(size);
2239 }
2240
2241 #else /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2242
2243 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2244
2245 /*
2246 * Encoding is:
2247 * bits 0..46: pointer value
2248 * bits 47..47: 0: zones, 1: VM
2249 * bits 48..63: zones: elem size, VM: number of pages
2250 */
2251
2252 #define KALLOC_ARRAY_TYPE_BIT 47
2253 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2254 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2255
2256 __attribute__((always_inline))
2257 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2258 __kalloc_array_decode(vm_address_t ptr)
2259 {
2260 struct kalloc_result kr;
2261 uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2262
2263 kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2264 if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2265 kr.size <<= PAGE_SHIFT;
2266 }
2267 /* sign extend, so that it also works with NULL */
2268 kr.addr = (void *)((long)(ptr << shift) >> shift);
2269
2270 return kr;
2271 }
2272
2273 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2274 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2275 {
2276 vm_address_t addr = (vm_address_t)ptr;
2277
2278 addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2279 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2280
2281 return (void *)addr;
2282 }
2283
2284 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2285 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2286 {
2287 addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2288 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2289
2290 return addr;
2291 }
2292
2293 #endif /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2294
2295 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2296 kalloc_next_good_size(vm_size_t size, uint32_t period)
2297 {
2298 uint32_t scale = kalloc_log2down((uint32_t)size);
2299 vm_size_t step, size_class;
2300
2301 if (size < KHEAP_STEP_START) {
2302 return KHEAP_STEP_START;
2303 }
2304 if (size < 2 * KHEAP_STEP_START) {
2305 return 2 * KHEAP_STEP_START;
2306 }
2307
2308 if (size < KHEAP_MAX_SIZE) {
2309 step = 1ul << (scale - 1);
2310 } else {
2311 step = round_page(1ul << (scale - kalloc_log2down(period)));
2312 }
2313
2314 size_class = (size + step) & -step;
2315 #if KASAN_CLASSIC
2316 if (size > K_SIZE_CLASS(size_class)) {
2317 return kalloc_next_good_size(size_class, period);
2318 }
2319 size_class = K_SIZE_CLASS(size_class);
2320 #endif
2321 return size_class;
2322 }
2323
2324
2325 #pragma mark kalloc
2326
2327 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_flags_t kt_flags)2328 kalloc_type_get_heap(kalloc_type_flags_t kt_flags)
2329 {
2330 /*
2331 * Redirect data-only views
2332 */
2333 if (kalloc_type_is_data(kt_flags)) {
2334 /*
2335 * There are kexts that allocate arrays of data types (uint8_t etc.)
2336 * and use krealloc_data / kfree_data to free it; therefore,
2337 * until adoption will land, we need to use shared heap for now.
2338 */
2339 return GET_KEXT_KHEAP_DATA();
2340 }
2341
2342 if (kt_flags & KT_PROCESSED) {
2343 return KHEAP_KT_VAR;
2344 }
2345
2346 return KHEAP_DEFAULT;
2347 }
2348
2349
2350 __attribute__((noinline))
2351 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2352 kalloc_large(
2353 kalloc_heap_t kheap,
2354 vm_size_t req_size,
2355 zalloc_flags_t flags,
2356 uint16_t kt_hash,
2357 void *owner __unused)
2358 {
2359 kma_flags_t kma_flags = KMA_KASAN_GUARD;
2360 vm_tag_t tag;
2361 vm_offset_t addr, size;
2362
2363 if (flags & Z_NOFAIL) {
2364 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2365 (size_t)req_size);
2366 }
2367
2368 /*
2369 * kmem_alloc could block so we return if noblock
2370 *
2371 * also, reject sizes larger than our address space is quickly,
2372 * as kt_size or IOMallocArraySize() expect this.
2373 */
2374 if ((flags & Z_NOWAIT) ||
2375 (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2376 return (struct kalloc_result){ };
2377 }
2378
2379 if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2380 return (struct kalloc_result){ };
2381 }
2382
2383 /*
2384 * (73465472) on Intel we didn't use to pass this flag,
2385 * which in turned allowed kalloc_large() memory to be shared
2386 * with user directly.
2387 *
2388 * We're bound by this unfortunate ABI.
2389 */
2390 if ((flags & Z_MAY_COPYINMAP) == 0) {
2391 #ifndef __x86_64__
2392 kma_flags |= KMA_KOBJECT;
2393 #endif
2394 } else {
2395 assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
2396 }
2397 if (flags & Z_NOPAGEWAIT) {
2398 kma_flags |= KMA_NOPAGEWAIT;
2399 }
2400 if (flags & Z_ZERO) {
2401 kma_flags |= KMA_ZERO;
2402 }
2403 if (kheap == KHEAP_DATA_BUFFERS) {
2404 kma_flags |= KMA_DATA;
2405 } else if (kheap == KHEAP_DATA_SHARED) {
2406 kma_flags |= KMA_DATA_SHARED;
2407 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2408 kma_flags |= KMA_SPRAYQTN;
2409 }
2410 if (flags & Z_NOSOFTLIMIT) {
2411 kma_flags |= KMA_NOSOFTLIMIT;
2412 }
2413
2414
2415 tag = zalloc_flags_get_tag(flags);
2416 if (flags & Z_VM_TAG_BT_BIT) {
2417 tag = vm_tag_bt() ?: tag;
2418 }
2419 if (tag == VM_KERN_MEMORY_NONE) {
2420 tag = kheap->kh_tag;
2421 }
2422
2423 size = round_page(req_size);
2424 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2425 req_size = round_page(size);
2426 }
2427
2428 addr = kmem_alloc_guard(kernel_map, req_size, 0,
2429 kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2430
2431 if (addr != 0) {
2432 counter_inc(&kalloc_large_count);
2433 counter_add(&kalloc_large_total, size);
2434 KALLOC_ZINFO_SALLOC(size);
2435 if (flags & Z_KALLOC_ARRAY) {
2436 addr = __kalloc_array_encode_vm(addr, req_size);
2437 }
2438 } else {
2439 addr = 0;
2440 }
2441
2442 DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2443 return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2444 }
2445
2446 #if KASAN
2447
2448 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2449 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2450 {
2451 #if KASAN_CLASSIC
2452 /*
2453 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2454 * tagging of the memory region is performed here.
2455 */
2456 kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2457 __builtin_frame_address(0));
2458 #endif /* KASAN_CLASSIC */
2459
2460 #if KASAN_TBI
2461 kasan_tbi_retag_unused_space(addr, size, used ? :1);
2462 #endif /* KASAN_TBI */
2463 }
2464 #endif /* KASAN */
2465
2466 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2467 kalloc_zone(
2468 zone_t z,
2469 zone_stats_t zstats,
2470 zalloc_flags_t flags,
2471 vm_size_t req_size)
2472 {
2473 struct kalloc_result kr;
2474 vm_size_t esize;
2475
2476 kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2477 esize = kr.size;
2478
2479 if (__probable(kr.addr)) {
2480 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2481 req_size = esize;
2482 } else {
2483 kr.size = req_size;
2484 }
2485 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2486 kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2487 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2488
2489 #if KASAN
2490 kalloc_mark_unused_space(kr.addr, esize, kr.size);
2491 #endif /* KASAN */
2492
2493 if (flags & Z_KALLOC_ARRAY) {
2494 kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2495 }
2496 }
2497
2498 DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2499 return kr;
2500 }
2501
2502 static zone_id_t
kalloc_use_early_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2503 kalloc_use_early_heap(
2504 kalloc_heap_t kheap,
2505 zone_stats_t zstats,
2506 zone_id_t zstart,
2507 zalloc_flags_t *flags)
2508 {
2509 if (!zone_is_data_kheap(kheap->kh_heap_id)) {
2510 zone_stats_t zstats_cpu = zpercpu_get(zstats);
2511
2512 if (os_atomic_load(&zstats_cpu->zs_alloc_not_early, relaxed) == 0) {
2513 *flags |= Z_SET_NOTEARLY;
2514 return KHEAP_EARLY->kh_zstart;
2515 }
2516 }
2517
2518 return zstart;
2519 }
2520
2521 #undef kalloc_ext
2522
2523 __mockable struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2524 kalloc_ext(
2525 void *kheap_or_kt_view,
2526 vm_size_t size,
2527 zalloc_flags_t flags,
2528 void *owner)
2529 {
2530 kalloc_type_var_view_t kt_view;
2531 kalloc_heap_t kheap;
2532 zone_stats_t zstats = NULL;
2533 zone_t z;
2534 uint16_t kt_hash;
2535 zone_id_t zstart;
2536
2537 if (kt_is_var_view(kheap_or_kt_view)) {
2538 kt_view = kt_demangle_var_view(kheap_or_kt_view);
2539 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2540 /*
2541 * Use stats from view if present, else use stats from kheap.
2542 * KHEAP_KT_VAR accumulates stats for all allocations going to
2543 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2544 * use stats from the respective zones.
2545 */
2546 zstats = kt_view->kt_stats;
2547 kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2548 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
2549 } else {
2550 kt_view = NULL;
2551 kheap = kheap_or_kt_view;
2552 kt_hash = kheap->kh_type_hash;
2553 zstart = kheap->kh_zstart;
2554 }
2555
2556 if (!zstats) {
2557 zstats = kheap->kh_stats;
2558 }
2559
2560 zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
2561 z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2562 if (z) {
2563 return kalloc_zone(z, zstats, flags, size);
2564 } else {
2565 return kalloc_large(kheap, size, flags, kt_hash, owner);
2566 }
2567 }
2568
2569 #if XNU_PLATFORM_MacOSX
2570 void *
2571 kalloc_external(vm_size_t size);
2572 void *
kalloc_external(vm_size_t size)2573 kalloc_external(vm_size_t size)
2574 {
2575 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2576 return kheap_alloc(KHEAP_DEFAULT, size, flags);
2577 }
2578 #endif /* XNU_PLATFORM_MacOSX */
2579
2580 void *
2581 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2582 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2583 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2584 {
2585 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2586 return kheap_alloc(GET_KEXT_KHEAP_DATA(), size, flags);
2587 }
2588
2589 void *
2590 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags);
2591 void *
kalloc_shared_data_external(vm_size_t size,zalloc_flags_t flags)2592 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags)
2593 {
2594 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
2595 return kheap_alloc(KHEAP_DATA_SHARED, size, flags);
2596 }
2597
2598 __abortlike
2599 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2600 kalloc_data_require_panic(void *addr, vm_size_t size)
2601 {
2602 zone_id_t zid = zone_id_for_element(addr, size);
2603
2604 if (zid != ZONE_ID_INVALID) {
2605 zone_t z = &zone_array[zid];
2606 zone_security_flags_t zsflags = zone_security_array[zid];
2607
2608 if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
2609 panic("kalloc_data_require failed: address %p in [%s%s]",
2610 addr, zone_heap_name(z), zone_name(z));
2611 }
2612
2613 panic("kalloc_data_require failed: address %p in [%s%s], "
2614 "size too large %zd > %zd", addr,
2615 zone_heap_name(z), zone_name(z),
2616 (size_t)size, (size_t)zone_elem_inner_size(z));
2617 } else {
2618 panic("kalloc_data_require failed: address %p not in zone native map",
2619 addr);
2620 }
2621 }
2622
2623 __abortlike
2624 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2625 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2626 {
2627 zone_id_t zid = zone_id_for_element(addr, size);
2628
2629 if (zid != ZONE_ID_INVALID) {
2630 zone_t z = &zone_array[zid];
2631 zone_security_flags_t zsflags = zone_security_array[zid];
2632
2633 switch (zsflags.z_kheap_id) {
2634 case KHEAP_ID_NONE:
2635 case KHEAP_ID_DATA_BUFFERS:
2636 case KHEAP_ID_DATA_SHARED:
2637 case KHEAP_ID_KT_VAR:
2638 panic("kalloc_non_data_require failed: address %p in [%s%s]",
2639 addr, zone_heap_name(z), zone_name(z));
2640 default:
2641 break;
2642 }
2643
2644 panic("kalloc_non_data_require failed: address %p in [%s%s], "
2645 "size too large %zd > %zd", addr,
2646 zone_heap_name(z), zone_name(z),
2647 (size_t)size, (size_t)zone_elem_inner_size(z));
2648 } else {
2649 panic("kalloc_non_data_require failed: address %p not in zone native map",
2650 addr);
2651 }
2652 }
2653
2654 void
kalloc_data_require(void * addr,vm_size_t size)2655 kalloc_data_require(void *addr, vm_size_t size)
2656 {
2657 zone_id_t zid = zone_id_for_element(addr, size);
2658
2659 if (zid != ZONE_ID_INVALID) {
2660 zone_t z = &zone_array[zid];
2661 zone_security_flags_t zsflags = zone_security_array[zid];
2662 if (zone_is_data_kheap(zsflags.z_kheap_id) &&
2663 size <= zone_elem_inner_size(z)) {
2664 return;
2665 }
2666 } else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2667 (vm_address_t)addr, size)) {
2668 return;
2669 } else if (kmem_needs_data_share_range() &&
2670 kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2671 (vm_address_t)addr, size)) {
2672 return;
2673 }
2674
2675 kalloc_data_require_panic(addr, size);
2676 }
2677
2678 void
kalloc_non_data_require(void * addr,vm_size_t size)2679 kalloc_non_data_require(void *addr, vm_size_t size)
2680 {
2681 zone_id_t zid = zone_id_for_element(addr, size);
2682
2683 if (zid != ZONE_ID_INVALID) {
2684 zone_t z = &zone_array[zid];
2685 zone_security_flags_t zsflags = zone_security_array[zid];
2686 switch (zsflags.z_kheap_id) {
2687 case KHEAP_ID_NONE:
2688 if (!zsflags.z_kalloc_type) {
2689 break;
2690 }
2691 OS_FALLTHROUGH;
2692 case KHEAP_ID_KT_VAR:
2693 if (size < zone_elem_inner_size(z)) {
2694 return;
2695 }
2696 break;
2697 default:
2698 break;
2699 }
2700 } else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2701 (vm_address_t)addr, size)) {
2702 return;
2703 } else if (kmem_needs_data_share_range() &&
2704 !kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2705 (vm_address_t)addr, size)) {
2706 return;
2707 }
2708
2709 kalloc_non_data_require_panic(addr, size);
2710 }
2711
2712 bool
kalloc_is_data_buffers(void * addr,vm_size_t size)2713 kalloc_is_data_buffers(void *addr, vm_size_t size)
2714 {
2715 zone_id_t zid = zone_id_for_element(addr, size);
2716
2717 /*
2718 * If we do not use dedicated data share range,
2719 * there is no way to fully distinguish between
2720 * shared and buffers heaps.
2721 *
2722 * When kmem_needs_data_share_range() == true, the
2723 * KMEM_RANGE_ID_DATA range is strictly for DATA_BUFFERS,
2724 * and KMEM_RANGE_ID_DATA_SHARED is strictly for DATA_SHARED.
2725 */
2726 assert(kmem_needs_data_share_range());
2727
2728 if (zid != ZONE_ID_INVALID) {
2729 zone_t z = &zone_array[zid];
2730 zone_security_flags_t zsflags = zone_security_array[zid];
2731 if (zone_is_data_buffers_kheap(zsflags.z_kheap_id) &&
2732 size <= zone_elem_inner_size(z)) {
2733 return true;
2734 }
2735 } else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2736 (vm_address_t)addr, size)) {
2737 return true;
2738 }
2739
2740 return false;
2741 }
2742
2743 __mockable void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2744 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2745 {
2746 /*
2747 * Callsites from a kext that aren't in the BootKC on macOS or
2748 * any callsites on armv7 are not processed during startup,
2749 * default to using kheap_alloc
2750 *
2751 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2752 * NULL as we need to use the vm for the allocation
2753 *
2754 */
2755 if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2756 kalloc_heap_t kheap;
2757 vm_size_t size;
2758
2759 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2760 size = kalloc_type_get_size(kt_view->kt_size);
2761 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2762 return kalloc_ext(kheap, size, flags, NULL).addr;
2763 }
2764
2765 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2766 return kalloc_type_impl(kt_view, flags);
2767 }
2768
2769 void *
2770 kalloc_type_var_impl_external(
2771 kalloc_type_var_view_t kt_view,
2772 vm_size_t size,
2773 zalloc_flags_t flags,
2774 void *owner);
2775 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2776 kalloc_type_var_impl_external(
2777 kalloc_type_var_view_t kt_view,
2778 vm_size_t size,
2779 zalloc_flags_t flags,
2780 void *owner)
2781 {
2782 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2783 return kalloc_type_var_impl(kt_view, size, flags, owner);
2784 }
2785
2786 #pragma mark kfree
2787
2788 __abortlike
2789 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2790 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2791 {
2792 zone_security_flags_t zsflags = zone_security_config(z);
2793 const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2794
2795 if (zsflags.z_kalloc_type) {
2796 panic_include_kalloc_types = true;
2797 kalloc_type_src_zone = z;
2798 panic("kfree: addr %p found in kalloc type zone '%s'"
2799 "but being freed to %s heap", data, z->z_name, kheap_name);
2800 }
2801
2802 if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2803 panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2804 data, size, zone_heap_name(z), z->z_name);
2805 } else {
2806 panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2807 data, size, zone_heap_name(z), kheap_name);
2808 }
2809 }
2810
2811 __abortlike
2812 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2813 kfree_size_confusion_panic(zone_t z, void *data,
2814 size_t oob_offs, size_t size, size_t zsize)
2815 {
2816 if (z) {
2817 panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2818 "with elem_size %zd",
2819 data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2820 } else {
2821 panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2822 data, size, oob_offs);
2823 }
2824 }
2825
2826 __abortlike
2827 static void
kfree_size_invalid_panic(void * data,size_t size)2828 kfree_size_invalid_panic(void *data, size_t size)
2829 {
2830 panic("kfree: addr %p trying to free with nonsensical size %zd",
2831 data, size);
2832 }
2833
2834 __abortlike
2835 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2836 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2837 size_t max_size)
2838 {
2839 panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2840 data, size, min_size, max_size);
2841 }
2842
2843 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2844 kfree_size_require(
2845 kalloc_heap_t kheap,
2846 void *addr,
2847 vm_size_t min_size,
2848 vm_size_t max_size)
2849 {
2850 assert3u(min_size, <=, max_size);
2851 zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2852 vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2853 vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2854 if (elem_size > max_zone_size || elem_size < min_size) {
2855 kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2856 }
2857 }
2858
2859 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2860 kfree_large(
2861 vm_offset_t addr,
2862 vm_size_t size,
2863 kmf_flags_t flags,
2864 void *owner)
2865 {
2866 size = kmem_free_guard(kernel_map, addr, size,
2867 flags | KMF_TAG | KMF_KASAN_GUARD,
2868 kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2869
2870 counter_dec(&kalloc_large_count);
2871 counter_add(&kalloc_large_total, -(uint64_t)size);
2872 KALLOC_ZINFO_SFREE(size);
2873 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2874 }
2875
2876 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2877 kfree_zone(
2878 void *kheap_or_kt_view __unsafe_indexable,
2879 void *data,
2880 vm_size_t size,
2881 zone_t z,
2882 vm_size_t zsize)
2883 {
2884 zone_security_flags_t zsflags = zone_security_config(z);
2885 kalloc_type_var_view_t kt_view;
2886 kalloc_heap_t kheap;
2887 zone_stats_t zstats = NULL;
2888
2889 if (kt_is_var_view(kheap_or_kt_view)) {
2890 kt_view = kt_demangle_var_view(kheap_or_kt_view);
2891 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2892 /*
2893 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2894 * we will end up having incorrect stats. Cross frees may happen on
2895 * macOS due to allocation from an unprocessed view and free from
2896 * a processed view or vice versa.
2897 */
2898 zstats = kt_view->kt_stats;
2899 } else {
2900 kt_view = NULL;
2901 kheap = kheap_or_kt_view;
2902 }
2903
2904 if (!zstats) {
2905 zstats = kheap->kh_stats;
2906 }
2907
2908 zsflags = zone_security_config(z);
2909 if (kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED) {
2910 if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2911 kfree_heap_confusion_panic(kheap, data, size, z);
2912 }
2913 } else {
2914 if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2915 (zsflags.z_kheap_id != KHEAP_ID_EARLY)) {
2916 kfree_heap_confusion_panic(kheap, data, size, z);
2917 }
2918 }
2919
2920 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2921
2922 /* needs to be __nosan because the user size might be partial */
2923 __nosan_bzero(data, zsize);
2924 zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2925 }
2926
2927 __mockable void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2928 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2929 {
2930 vm_size_t bucket_size;
2931 zone_t z;
2932
2933 if (data == NULL) {
2934 return;
2935 }
2936
2937 if (size > KFREE_ABSURD_SIZE) {
2938 kfree_size_invalid_panic(data, size);
2939 }
2940
2941 if (size <= KHEAP_MAX_SIZE) {
2942 vm_size_t oob_offs;
2943
2944 bucket_size = zone_element_size(data, &z, true, &oob_offs);
2945 if (size + oob_offs > bucket_size || bucket_size == 0) {
2946 kfree_size_confusion_panic(z, data,
2947 oob_offs, size, bucket_size);
2948 }
2949
2950 data = (char *)data - oob_offs;
2951 kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2952 } else {
2953 kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2954 }
2955 }
2956
2957 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2958 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2959 {
2960 vm_offset_t oob_offs;
2961 vm_size_t size, usize = 0;
2962 zone_t z;
2963
2964 if (data == NULL) {
2965 return;
2966 }
2967
2968 size = zone_element_size(data, &z, true, &oob_offs);
2969 if (size) {
2970 #if KASAN_CLASSIC
2971 usize = kasan_user_size((vm_offset_t)data);
2972 #endif
2973 data = (char *)data - oob_offs;
2974 kfree_zone(kheap, data, usize, z, size);
2975 } else {
2976 kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2977 }
2978 }
2979
2980 #if XNU_PLATFORM_MacOSX
2981 void
2982 kfree_external(void *addr, vm_size_t size);
2983 void
kfree_external(void * addr,vm_size_t size)2984 kfree_external(void *addr, vm_size_t size)
2985 {
2986 kalloc_heap_t kheap = KHEAP_DEFAULT;
2987
2988 kfree_ext(kheap, addr, size);
2989 }
2990 #endif /* XNU_PLATFORM_MacOSX */
2991
2992 void
2993 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2994 vm_size_t min_sz, vm_size_t max_sz)
2995 {
2996 if (__improbable(addr == NULL)) {
2997 return;
2998 }
2999 kfree_size_require(kheap, addr, min_sz, max_sz);
3000 kfree_addr_ext(kheap, addr);
3001 }
3002
3003 __mockable void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)3004 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
3005 {
3006 zone_stats_t zs = kt_view->kt_zv.zv_stats;
3007 zone_t z = kt_view->kt_zv.zv_zone;
3008 zone_stats_t zs_cpu = zpercpu_get(zs);
3009
3010 if ((flags & Z_SET_NOTEARLY) ||
3011 os_atomic_load(&zs_cpu->zs_alloc_not_early, relaxed)) {
3012 return zalloc_ext(z, zs, flags).addr;
3013 }
3014
3015 assert(!zone_is_data_kheap(zone_security_config(z).z_kheap_id));
3016 return zalloc_ext(kt_view->kt_zearly, zs, flags | Z_SET_NOTEARLY).addr;
3017 }
3018
3019 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)3020 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
3021 {
3022 /*
3023 * If callsite is from a kext that isn't in the BootKC, it wasn't
3024 * processed during startup so default to using kheap_alloc
3025 *
3026 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
3027 * NULL as we need to use the vm for the allocation/free
3028 */
3029 if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
3030 kalloc_heap_t kheap;
3031 vm_size_t size;
3032
3033 size = kalloc_type_get_size(kt_view->kt_size);
3034 kheap = kalloc_type_get_heap(kt_view->kt_flags);
3035 return kheap_free(kheap, ptr, size);
3036 }
3037 return kfree_type_impl(kt_view, ptr);
3038 }
3039
3040 void
3041 kfree_type_var_impl_external(
3042 kalloc_type_var_view_t kt_view,
3043 void *ptr,
3044 vm_size_t size);
3045 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)3046 kfree_type_var_impl_external(
3047 kalloc_type_var_view_t kt_view,
3048 void *ptr,
3049 vm_size_t size)
3050 {
3051 return kfree_type_var_impl(kt_view, ptr, size);
3052 }
3053
3054 void
3055 kfree_data_external(void *ptr, vm_size_t size);
3056 void
kfree_data_external(void * ptr,vm_size_t size)3057 kfree_data_external(void *ptr, vm_size_t size)
3058 {
3059 return kheap_free(GET_KEXT_KHEAP_DATA(), ptr, size);
3060 }
3061
3062 void
3063 kfree_data_addr_external(void *ptr);
3064 void
kfree_data_addr_external(void * ptr)3065 kfree_data_addr_external(void *ptr)
3066 {
3067 return kheap_free_addr(GET_KEXT_KHEAP_DATA(), ptr);
3068 }
3069
3070 void
3071 kfree_shared_data_external(void *ptr, vm_size_t size);
3072 void
kfree_shared_data_external(void * ptr,vm_size_t size)3073 kfree_shared_data_external(void *ptr, vm_size_t size)
3074 {
3075 return kheap_free(KHEAP_DATA_SHARED, ptr, size);
3076 }
3077
3078 void
3079 kfree_shared_data_addr_external(void *ptr);
3080 void
kfree_shared_data_addr_external(void * ptr)3081 kfree_shared_data_addr_external(void *ptr)
3082 {
3083 return kheap_free_addr(KHEAP_DATA_SHARED, ptr);
3084 }
3085
3086 #pragma mark krealloc
3087
3088 __abortlike
3089 static void
krealloc_size_invalid_panic(void * data,size_t size)3090 krealloc_size_invalid_panic(void *data, size_t size)
3091 {
3092 panic("krealloc: addr %p trying to free with nonsensical size %zd",
3093 data, size);
3094 }
3095
3096
3097 __attribute__((noinline))
3098 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)3099 krealloc_large(
3100 kalloc_heap_t kheap,
3101 vm_offset_t addr,
3102 vm_size_t old_size,
3103 vm_size_t new_size,
3104 zalloc_flags_t flags,
3105 uint16_t kt_hash,
3106 void *owner __unused)
3107 {
3108 kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_KASAN_GUARD;
3109 vm_size_t new_req_size = new_size;
3110 vm_size_t old_req_size = old_size;
3111 uint64_t delta;
3112 kmem_return_t kmr;
3113 vm_tag_t tag;
3114
3115 if (flags & Z_NOFAIL) {
3116 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
3117 (size_t)new_req_size);
3118 }
3119
3120 /*
3121 * kmem_alloc could block so we return if noblock
3122 *
3123 * also, reject sizes larger than our address space is quickly,
3124 * as kt_size or IOMallocArraySize() expect this.
3125 */
3126 if ((flags & Z_NOWAIT) ||
3127 (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
3128 return (struct kalloc_result){ };
3129 }
3130
3131 /*
3132 * (73465472) on Intel we didn't use to pass this flag,
3133 * which in turned allowed kalloc_large() memory to be shared
3134 * with user directly.
3135 *
3136 * We're bound by this unfortunate ABI.
3137 */
3138 if ((flags & Z_MAY_COPYINMAP) == 0) {
3139 #ifndef __x86_64__
3140 kmr_flags |= KMR_KOBJECT;
3141 #endif
3142 } else {
3143 assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
3144 }
3145 if (flags & Z_NOPAGEWAIT) {
3146 kmr_flags |= KMR_NOPAGEWAIT;
3147 }
3148 if (flags & Z_ZERO) {
3149 kmr_flags |= KMR_ZERO;
3150 }
3151 if (kheap == KHEAP_DATA_BUFFERS) {
3152 kmr_flags |= KMR_DATA;
3153 } else if (kheap == KHEAP_DATA_SHARED) {
3154 kmr_flags |= KMR_DATA_SHARED;
3155 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
3156 kmr_flags |= KMR_SPRAYQTN;
3157 }
3158 if (flags & Z_REALLOCF) {
3159 kmr_flags |= KMR_REALLOCF;
3160 }
3161
3162 #if ZSECURITY_CONFIG(ZONE_TAGGING)
3163 krealloc_enforce_large_tagging_policy(&kmr_flags, kheap);
3164 #endif /* ZSECURITY_CONFIG(ZONE_TAGGING) */
3165
3166 tag = zalloc_flags_get_tag(flags);
3167 if (flags & Z_VM_TAG_BT_BIT) {
3168 tag = vm_tag_bt() ?: tag;
3169 }
3170 if (tag == VM_KERN_MEMORY_NONE) {
3171 tag = kheap->kh_tag;
3172 }
3173
3174 kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
3175 kmr_flags, kalloc_guard(tag, kt_hash, owner));
3176
3177 new_size = round_page(new_req_size);
3178 old_size = round_page(old_req_size);
3179
3180 if (kmr.kmr_address != 0) {
3181 delta = (uint64_t)(new_size - old_size);
3182 } else if (flags & Z_REALLOCF) {
3183 counter_dec(&kalloc_large_count);
3184 delta = (uint64_t)(-old_size);
3185 } else {
3186 delta = 0;
3187 }
3188
3189 counter_add(&kalloc_large_total, delta);
3190 KALLOC_ZINFO_SALLOC(delta);
3191
3192 if (addr != 0 || (flags & Z_REALLOCF)) {
3193 DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
3194 void*, addr);
3195 }
3196 if (__improbable(kmr.kmr_address == 0)) {
3197 return (struct kalloc_result){ };
3198 }
3199
3200 DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
3201 void*, kmr.kmr_address);
3202
3203 if (flags & Z_KALLOC_ARRAY) {
3204 kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
3205 new_req_size);
3206 }
3207 return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
3208 }
3209
3210 #undef krealloc_ext
3211
3212 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3213 krealloc_ext(
3214 void *kheap_or_kt_view __unsafe_indexable,
3215 void *addr,
3216 vm_size_t old_size,
3217 vm_size_t new_size,
3218 zalloc_flags_t flags,
3219 void *owner)
3220 {
3221 vm_size_t old_bucket_size, new_bucket_size, min_size;
3222 kalloc_type_var_view_t kt_view;
3223 kalloc_heap_t kheap;
3224 zone_stats_t zstats = NULL;
3225 struct kalloc_result kr;
3226 vm_offset_t oob_offs = 0;
3227 zone_t old_z, new_z;
3228 uint16_t kt_hash = 0;
3229 zone_id_t zstart;
3230
3231 if (old_size > KFREE_ABSURD_SIZE) {
3232 krealloc_size_invalid_panic(addr, old_size);
3233 }
3234
3235 if (addr == NULL && new_size == 0) {
3236 return (struct kalloc_result){ };
3237 }
3238
3239 if (kt_is_var_view(kheap_or_kt_view)) {
3240 kt_view = kt_demangle_var_view(kheap_or_kt_view);
3241 kheap = kalloc_type_get_heap(kt_view->kt_flags);
3242 /*
3243 * Similar to kalloc_ext: Use stats from view if present,
3244 * else use stats from kheap.
3245 *
3246 * krealloc_type isn't exposed to kexts, so we don't need to
3247 * handle cross frees and can rely on stats from view or kheap.
3248 */
3249 zstats = kt_view->kt_stats;
3250 kt_hash = KT_GET_HASH(kt_view->kt_flags);
3251 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
3252 } else {
3253 kt_view = NULL;
3254 kheap = kheap_or_kt_view;
3255 kt_hash = kheap->kh_type_hash;
3256 zstart = kheap->kh_zstart;
3257 }
3258
3259 if (!zstats) {
3260 zstats = kheap->kh_stats;
3261 }
3262 /*
3263 * Find out the size of the bucket in which the new sized allocation
3264 * would land. If it matches the bucket of the original allocation,
3265 * simply return the same address.
3266 */
3267 if (new_size == 0) {
3268 new_z = ZONE_NULL;
3269 new_bucket_size = new_size = 0;
3270 } else {
3271 zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
3272 new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3273 new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3274 }
3275 #if !KASAN_CLASSIC
3276 if (flags & Z_FULLSIZE) {
3277 new_size = new_bucket_size;
3278 }
3279 #endif /* !KASAN_CLASSIC */
3280
3281 if (addr == NULL) {
3282 old_z = ZONE_NULL;
3283 old_size = old_bucket_size = 0;
3284 } else if (kheap_size_from_zone(addr, old_size, flags)) {
3285 old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3286 if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3287 kfree_size_confusion_panic(old_z, addr,
3288 oob_offs, old_size, old_bucket_size);
3289 }
3290 __builtin_assume(old_z != ZONE_NULL);
3291 } else {
3292 old_z = ZONE_NULL;
3293 old_bucket_size = round_page(old_size);
3294 }
3295 min_size = MIN(old_size, new_size);
3296
3297 if (old_bucket_size == new_bucket_size && old_z) {
3298 kr.addr = (char *)addr - oob_offs;
3299 kr.size = new_size;
3300 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3301 kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3302 new_size, new_bucket_size);
3303 if (kr.addr != addr) {
3304 memmove(kr.addr, addr, min_size);
3305 bzero((char *)kr.addr + min_size,
3306 kr.size - min_size);
3307 }
3308 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3309 #if KASAN
3310 /*
3311 * On KASAN kernels, treat a reallocation effectively as a new
3312 * allocation and add a sanity check around the existing one
3313 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3314 * to much extra work, on KASAN_TBI, assign a new tag both to the
3315 * buffer and to the potential free space.
3316 */
3317 #if KASAN_CLASSIC
3318 kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3319 kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3320 KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3321 #endif /* KASAN_CLASSIC */
3322 #if KASAN_TBI
3323 /*
3324 * Validate the current buffer, then generate a new tag,
3325 * even if the address is stable, it's a "new" allocation.
3326 */
3327 __asan_loadN((vm_offset_t)addr, old_size);
3328 kr.addr = vm_memtag_generate_and_store_tag(kr.addr, kr.size);
3329 kasan_tbi_retag_unused_space(kr.addr, new_bucket_size, kr.size);
3330 #endif /* KASAN_TBI */
3331 #endif /* KASAN */
3332 goto out_success;
3333 }
3334
3335 #if !KASAN
3336 /*
3337 * Fallthrough to krealloc_large() for KASAN,
3338 * because we can't use kasan_check_alloc()
3339 * on kalloc_large() memory.
3340 *
3341 * kmem_realloc_guard() will perform all the validations,
3342 * and re-tagging.
3343 */
3344 if (old_bucket_size == new_bucket_size) {
3345 kr.addr = (char *)addr - oob_offs;
3346 kr.size = new_size;
3347 goto out_success;
3348 }
3349 #endif
3350
3351 if (addr && !old_z && new_size && !new_z) {
3352 return krealloc_large(kheap, (vm_offset_t)addr,
3353 old_size, new_size, flags, kt_hash, owner);
3354 }
3355
3356 if (!new_size) {
3357 kr.addr = NULL;
3358 kr.size = 0;
3359 } else if (new_z) {
3360 kr = kalloc_zone(new_z, zstats,
3361 flags & ~Z_KALLOC_ARRAY, new_size);
3362 } else if (old_z || addr == NULL) {
3363 kr = kalloc_large(kheap, new_size,
3364 flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3365 }
3366
3367 if (addr && kr.addr) {
3368 __nosan_memcpy(kr.addr, addr, min_size);
3369 }
3370
3371 if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3372 if (old_z) {
3373 kfree_zone(kheap_or_kt_view,
3374 (char *)addr - oob_offs, old_size,
3375 old_z, old_bucket_size);
3376 } else {
3377 kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3378 }
3379 }
3380
3381 if (__improbable(kr.addr == NULL)) {
3382 return kr;
3383 }
3384
3385 out_success:
3386 if ((flags & Z_KALLOC_ARRAY) == 0) {
3387 return kr;
3388 }
3389
3390 if (new_z) {
3391 kr.addr = __kalloc_array_encode_zone(new_z,
3392 kr.addr, kr.size);
3393 } else {
3394 kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3395 kr.size);
3396 }
3397 return kr;
3398 }
3399
3400 void *
3401 krealloc_data_external(
3402 void *ptr,
3403 vm_size_t old_size,
3404 vm_size_t new_size,
3405 zalloc_flags_t flags);
3406 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3407 krealloc_data_external(
3408 void *ptr,
3409 vm_size_t old_size,
3410 vm_size_t new_size,
3411 zalloc_flags_t flags)
3412 {
3413 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3414 return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3415 }
3416
3417 void *
3418 krealloc_shared_data_external(
3419 void *ptr,
3420 vm_size_t old_size,
3421 vm_size_t new_size,
3422 zalloc_flags_t flags);
3423 void *
krealloc_shared_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3424 krealloc_shared_data_external(
3425 void *ptr,
3426 vm_size_t old_size,
3427 vm_size_t new_size,
3428 zalloc_flags_t flags)
3429 {
3430 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
3431 return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3432 }
3433
3434 __startup_func
3435 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3436 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3437 {
3438 kheap->kh_zstart = parent_heap->kh_zstart;
3439 kheap->kh_heap_id = parent_heap->kh_heap_id;
3440 kheap->kh_tag = parent_heap->kh_tag;
3441 kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
3442 zone_view_count += 1;
3443 }
3444
3445 __startup_func
3446 static void
kheap_init_data(kalloc_heap_t kheap)3447 kheap_init_data(kalloc_heap_t kheap)
3448 {
3449 kheap_init(KHEAP_DATA_BUFFERS, kheap);
3450 kheap->kh_views = KHEAP_DATA_BUFFERS->kh_views;
3451 KHEAP_DATA_BUFFERS->kh_views = kheap;
3452 }
3453
3454 __startup_func
3455 static void
kheap_init_data_shared(kalloc_heap_t kheap)3456 kheap_init_data_shared(kalloc_heap_t kheap)
3457 {
3458 kheap_init(KHEAP_DATA_SHARED, kheap);
3459 kheap->kh_views = KHEAP_DATA_SHARED->kh_views;
3460 KHEAP_DATA_SHARED->kh_views = kheap;
3461 }
3462
3463 __startup_func
3464 static void
kheap_init_var(kalloc_heap_t kheap)3465 kheap_init_var(kalloc_heap_t kheap)
3466 {
3467 uint16_t idx;
3468 struct kheap_info *parent_heap;
3469
3470 kheap_init(KHEAP_KT_VAR, kheap);
3471 idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3472 KT_VAR__FIRST_FLEXIBLE_HEAP;
3473 parent_heap = &kalloc_type_heap_array[idx];
3474 kheap->kh_zstart = parent_heap->kh_zstart;
3475 kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3476 (uint32_t) early_random(), 0);
3477 kheap->kh_views = parent_heap->kh_views;
3478 parent_heap->kh_views = kheap;
3479 }
3480
3481 __startup_func
3482 void
kheap_startup_init(kalloc_heap_t kheap)3483 kheap_startup_init(kalloc_heap_t kheap)
3484 {
3485 switch (kheap->kh_heap_id) {
3486 case KHEAP_ID_DATA_BUFFERS:
3487 kheap_init_data(kheap);
3488 break;
3489 case KHEAP_ID_DATA_SHARED:
3490 kheap_init_data_shared(kheap);
3491 break;
3492 case KHEAP_ID_KT_VAR:
3493 kheap_init_var(kheap);
3494 break;
3495 default:
3496 panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3497 kheap->kh_heap_id);
3498 }
3499 }
3500
3501 #pragma mark IOKit/libkern helpers
3502
3503 #if XNU_PLATFORM_MacOSX
3504
3505 void *
3506 kern_os_malloc_external(size_t size);
3507 void *
kern_os_malloc_external(size_t size)3508 kern_os_malloc_external(size_t size)
3509 {
3510 if (size == 0) {
3511 return NULL;
3512 }
3513
3514 return kheap_alloc(KERN_OS_MALLOC, size,
3515 Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3516 }
3517
3518 void
3519 kern_os_free_external(void *addr);
3520 void
kern_os_free_external(void * addr)3521 kern_os_free_external(void *addr)
3522 {
3523 kheap_free_addr(KERN_OS_MALLOC, addr);
3524 }
3525
3526 void *
3527 kern_os_realloc_external(void *addr, size_t nsize);
3528 void *
kern_os_realloc_external(void * addr,size_t nsize)3529 kern_os_realloc_external(void *addr, size_t nsize)
3530 {
3531 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3532 vm_size_t osize, oob_offs = 0;
3533
3534 if (addr == NULL) {
3535 return kern_os_malloc_external(nsize);
3536 }
3537
3538 osize = zone_element_size(addr, NULL, false, &oob_offs);
3539 if (osize == 0) {
3540 osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3541 kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3542 #if KASAN_CLASSIC
3543 } else {
3544 osize = kasan_user_size((vm_offset_t)addr);
3545 #endif
3546 }
3547 return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3548 }
3549
3550 #endif /* XNU_PLATFORM_MacOSX */
3551
3552 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3553 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3554 {
3555 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3556 #pragma unused(size)
3557 zfree(zone, addr);
3558 #else
3559 if (zone_owns(zone, addr)) {
3560 zfree(zone, addr);
3561 } else {
3562 /*
3563 * Third party kexts might not know about the operator new
3564 * and be allocated from the default heap
3565 */
3566 printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3567 zone->z_name);
3568 kheap_free(KHEAP_DEFAULT, addr, size);
3569 }
3570 #endif
3571 }
3572
3573 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3574 IOMallocType_from_vm(kalloc_type_view_t ktv)
3575 {
3576 return kalloc_type_from_vm(ktv->kt_flags);
3577 }
3578
3579 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3580 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3581 {
3582 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3583 #pragma unused(esize)
3584 #else
3585 /*
3586 * For third party kexts that have been compiled with sdk pre macOS 11,
3587 * an allocation of an OSObject that is defined in xnu or first pary
3588 * kexts, by directly calling new will lead to using the default heap
3589 * as it will call OSObject_operator_new_external. If this object
3590 * is freed by xnu, it panics as xnu uses the typed free which
3591 * requires the object to have been allocated in a kalloc.type zone.
3592 * To workaround this issue, detect if the allocation being freed is
3593 * from the default heap and allow freeing to it.
3594 */
3595 zone_id_t zid = zone_id_for_element(addr, esize);
3596 if (__probable(zid < MAX_ZONES)) {
3597 zone_security_flags_t zsflags = zone_security_array[zid];
3598 if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3599 return kheap_free(KHEAP_DEFAULT, addr, esize);
3600 }
3601 }
3602 #endif
3603 kfree_type_impl_external(ktv, addr);
3604 }
3605
3606 #pragma mark tests
3607 #if DEBUG || DEVELOPMENT
3608
3609 #include <sys/random.h>
3610
3611 /*
3612 * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3613 *
3614 * Note: Presence of zones with name kalloc.type* is used to
3615 * determine if the feature is on.
3616 */
3617 static int
kalloc_type_feature_on(void)3618 kalloc_type_feature_on(void)
3619 {
3620 boolean_t zone_found = false;
3621 const char kalloc_type_str[] = "kalloc.type";
3622 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3623 zone_t z = kalloc_type_zarray[i];
3624 while (z != NULL) {
3625 zone_found = true;
3626 if (strncmp(z->z_name, kalloc_type_str,
3627 strlen(kalloc_type_str)) != 0) {
3628 return 0;
3629 }
3630 z = z->z_kt_next;
3631 }
3632 }
3633
3634 if (!zone_found) {
3635 return 0;
3636 }
3637
3638 return 1;
3639 }
3640
3641 /*
3642 * Ensure that the policy uses the zone budget completely
3643 */
3644 static int
kalloc_type_test_policy(int64_t in)3645 kalloc_type_test_policy(int64_t in)
3646 {
3647 uint16_t zone_budget = (uint16_t) in;
3648 uint16_t max_bucket_freq = 25;
3649 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3650 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3651 uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3652 uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3653 uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3654 uint16_t wasted_zone_budget = 0, total_types = 0;
3655 uint16_t n_zones = 0, n_zones_cal = 0;
3656 int ret = 0;
3657
3658 /*
3659 * Need a minimum of 2 zones per size class
3660 */
3661 if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3662 return ret;
3663 }
3664 read_random((void *)&random[0], sizeof(random));
3665 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3666 uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3667 uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3668
3669 freq_list[i] = r1 > r2 ? r2 : r1;
3670 freq_total_list[i] = r1 > r2 ? r1 : r2;
3671 }
3672 wasted_zone_budget = kalloc_type_apply_policy(
3673 freq_list, freq_total_list,
3674 zones_per_sig, zones_per_type, zone_budget);
3675
3676 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3677 total_types += freq_total_list[i];
3678 }
3679
3680 n_zones = kmem_get_random16(total_types);
3681 printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3682 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3683 uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3684 freq_total_list[i], total_types,
3685 (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3686
3687 n_zones_cal += n_zones_for_type;
3688
3689 printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3690 }
3691 printf("-----------------------\n%u\t%u\n", total_types,
3692 n_zones_cal);
3693
3694 if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3695 ret = 1;
3696 }
3697 return ret;
3698 }
3699
3700 /*
3701 * Ensure that size of adopters of kalloc_type fit in the zone
3702 * they have been assigned.
3703 */
3704 static int
kalloc_type_check_size(zone_t z)3705 kalloc_type_check_size(zone_t z)
3706 {
3707 kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3708
3709 while (kt_cur != NULL) {
3710 if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3711 return 0;
3712 }
3713 kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3714 }
3715
3716 return 1;
3717 }
3718
3719 struct test_kt_data {
3720 int a;
3721 };
3722
3723 static int
kalloc_type_test_data_redirect(void)3724 kalloc_type_test_data_redirect(void)
3725 {
3726 struct kalloc_type_view ktv_data = {
3727 .kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3728 .kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3729 };
3730 if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3731 printf("%s: data redirect failed\n", __func__);
3732 return 0;
3733 }
3734 return 1;
3735 }
3736
3737 static int
run_kalloc_type_test(int64_t in,int64_t * out)3738 run_kalloc_type_test(int64_t in, int64_t *out)
3739 {
3740 *out = 0;
3741 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3742 zone_t z = kalloc_type_zarray[i];
3743 while (z != NULL) {
3744 if (!kalloc_type_check_size(z)) {
3745 printf("%s: size check failed\n", __func__);
3746 return 0;
3747 }
3748 z = z->z_kt_next;
3749 }
3750 }
3751
3752 if (!kalloc_type_test_policy(in)) {
3753 printf("%s: policy check failed\n", __func__);
3754 return 0;
3755 }
3756
3757 if (!kalloc_type_feature_on()) {
3758 printf("%s: boot-arg is on but feature isn't\n", __func__);
3759 return 0;
3760 }
3761
3762 if (!kalloc_type_test_data_redirect()) {
3763 printf("%s: kalloc_type redirect for all data signature failed\n",
3764 __func__);
3765 return 0;
3766 }
3767
3768 printf("%s: test passed\n", __func__);
3769
3770 *out = 1;
3771 return 0;
3772 }
3773 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3774
3775 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3776 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3777 {
3778 zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3779
3780 return z ? zone_elem_inner_size(z) : round_page(size);
3781 }
3782
3783 static int
run_kalloc_test_kheap(kalloc_heap_t kheap)3784 run_kalloc_test_kheap(kalloc_heap_t kheap)
3785 {
3786 uint64_t *data_ptr;
3787 void *strippedp_old, *strippedp_new;
3788 size_t alloc_size = 0, old_alloc_size = 0;
3789 struct kalloc_result kr = {};
3790
3791 printf("%s: %s test running\n", __func__, kheap->kh_name);
3792
3793 /*
3794 * Test size 0: alloc, free, realloc
3795 */
3796 data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3797 NULL).addr;
3798 if (!data_ptr) {
3799 printf("%s: kalloc 0 returned null\n", __func__);
3800 return 1;
3801 }
3802 kheap_free(kheap, data_ptr, alloc_size);
3803
3804 data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3805 NULL).addr;
3806 alloc_size = sizeof(uint64_t) + 1;
3807 data_ptr = krealloc_ext(kheap, kr.addr, old_alloc_size,
3808 alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3809 if (!data_ptr) {
3810 printf("%s: krealloc -> old size 0 failed\n", __func__);
3811 return 1;
3812 }
3813 *data_ptr = 0;
3814
3815 /*
3816 * Test krealloc: same sizeclass, different size classes, 2pgs,
3817 * VM (with owner)
3818 */
3819 old_alloc_size = alloc_size;
3820 alloc_size++;
3821 kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3822 Z_WAITOK | Z_NOFAIL, NULL);
3823
3824 strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3825 strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3826
3827 if (!kr.addr || (strippedp_old != strippedp_new) ||
3828 (test_bucket_size(kheap, kr.size) !=
3829 test_bucket_size(kheap, old_alloc_size))) {
3830 printf("%s: krealloc -> same size class failed\n", __func__);
3831 return 1;
3832 }
3833 data_ptr = kr.addr;
3834 *data_ptr = 0;
3835
3836 old_alloc_size = alloc_size;
3837 alloc_size *= 2;
3838 kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3839 Z_WAITOK | Z_NOFAIL, NULL);
3840
3841 strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3842 strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3843
3844 if (!kr.addr || (strippedp_old == strippedp_new) ||
3845 (test_bucket_size(kheap, kr.size) ==
3846 test_bucket_size(kheap, old_alloc_size))) {
3847 printf("%s: krealloc -> different size class failed\n", __func__);
3848 return 1;
3849 }
3850 data_ptr = kr.addr;
3851 *data_ptr = 0;
3852
3853 kheap_free(kheap, kr.addr, alloc_size);
3854
3855 alloc_size = 3544;
3856 data_ptr = kalloc_ext(kheap, alloc_size,
3857 Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3858 if (!data_ptr) {
3859 printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3860 __func__);
3861 return 1;
3862 }
3863 *data_ptr = 0;
3864
3865 data_ptr = krealloc_ext(kheap, data_ptr, alloc_size,
3866 PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3867 if (!data_ptr) {
3868 printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3869 return 1;
3870 }
3871 *data_ptr = 0;
3872
3873 data_ptr = krealloc_ext(kheap, data_ptr, PAGE_SIZE * 2,
3874 KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3875 if (!data_ptr) {
3876 printf("%s: krealloc -> VM1 returned not null\n", __func__);
3877 return 1;
3878 }
3879 *data_ptr = 0;
3880
3881 data_ptr = krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 2,
3882 KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3883 *data_ptr = 0;
3884 if (!data_ptr) {
3885 printf("%s: krealloc -> VM2 returned not null\n", __func__);
3886 return 1;
3887 }
3888
3889 krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 4,
3890 0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3891
3892 printf("%s: test passed\n", __func__);
3893 return 0;
3894 }
3895
3896 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3897 run_kalloc_test(int64_t in __unused, int64_t *out)
3898 {
3899 *out = 1;
3900
3901 if (run_kalloc_test_kheap(KHEAP_DATA_BUFFERS) != 0 ||
3902 run_kalloc_test_kheap(KHEAP_DATA_SHARED) != 0) {
3903 *out = 0;
3904 }
3905
3906 return 0;
3907 }
3908 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3909
3910 #endif /* DEBUG || DEVELOPMENT */
3911