1 /*
2 * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/kalloc.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1985
62 *
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
65 */
66
67 #include "mach/vm_types.h"
68 #include <mach/boolean.h>
69 #include <mach/sdt.h>
70 #include <mach/machine/vm_types.h>
71 #include <mach/vm_param.h>
72 #include <kern/misc_protos.h>
73 #include <kern/counter.h>
74 #include <kern/zalloc_internal.h>
75 #include <kern/kalloc.h>
76 #include <kern/ledger.h>
77 #include <kern/backtrace.h>
78 #include <vm/vm_kern_internal.h>
79 #include <vm/vm_object_xnu.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_memtag.h>
82 #include <sys/kdebug.h>
83
84 #include <os/hash.h>
85 #include <san/kasan.h>
86 #include <libkern/section_keywords.h>
87 #include <libkern/prelink.h>
88
89 #if HAS_MTE
90 #include <arm64/mte_xnu.h>
91 #endif /* HAS_MTE */
92
93 SCALABLE_COUNTER_DEFINE(kalloc_large_count);
94 SCALABLE_COUNTER_DEFINE(kalloc_large_total);
95
96 #pragma mark initialization
97
98 /*
99 * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
100 * sized zone. This allocator is built on top of the zone allocator. A zone
101 * is created for each potential size that we are willing to get in small
102 * blocks.
103 *
104 * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
105 */
106
107 /*
108 * The kt_zone_cfg table defines the configuration of zones on various
109 * platforms for kalloc_type fixed size allocations.
110 */
111
112 #if KASAN_CLASSIC
113 #define K_SIZE_CLASS(size) \
114 (((size) & PAGE_MASK) == 0 ? (size) : \
115 ((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
116 #else
117 #define K_SIZE_CLASS(size) (size)
118 #endif
119 static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
120
121 static const uint16_t kt_zone_cfg[] = {
122 K_SIZE_CLASS(16),
123 K_SIZE_CLASS(32),
124 K_SIZE_CLASS(48),
125 K_SIZE_CLASS(64),
126 K_SIZE_CLASS(80),
127 K_SIZE_CLASS(96),
128 K_SIZE_CLASS(128),
129 K_SIZE_CLASS(160),
130 K_SIZE_CLASS(192),
131 K_SIZE_CLASS(224),
132 K_SIZE_CLASS(256),
133 K_SIZE_CLASS(288),
134 K_SIZE_CLASS(368),
135 K_SIZE_CLASS(400),
136 K_SIZE_CLASS(512),
137 K_SIZE_CLASS(576),
138 K_SIZE_CLASS(768),
139 K_SIZE_CLASS(1024),
140 K_SIZE_CLASS(1152),
141 K_SIZE_CLASS(1280),
142 K_SIZE_CLASS(1664),
143 K_SIZE_CLASS(2048),
144 K_SIZE_CLASS(4096),
145 K_SIZE_CLASS(6144),
146 K_SIZE_CLASS(8192),
147 K_SIZE_CLASS(12288),
148 K_SIZE_CLASS(16384),
149 #if __arm64__
150 K_SIZE_CLASS(24576),
151 K_SIZE_CLASS(32768),
152 #endif /* __arm64__ */
153 };
154
155 #define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
156
157 /*
158 * kalloc_type callsites are assigned a zone during early boot. They
159 * use the dlut[] (direct lookup table), indexed by size normalized
160 * to the minimum alignment to find the right zone index quickly.
161 */
162 #define INDEX_ZDLUT(size) (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
163 #define KALLOC_DLUT_SIZE (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
164 #define MAX_SIZE_ZDLUT ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
165 static __startup_data uint8_t kalloc_type_dlut[KALLOC_DLUT_SIZE];
166 static __startup_data uint32_t kheap_zsize[KHEAP_NUM_ZONES];
167
168 #if VM_TAG_SIZECLASSES
169 static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
170 #endif
171
172 const char * const kalloc_heap_names[] = {
173 [KHEAP_ID_NONE] = "",
174 [KHEAP_ID_EARLY] = "early.",
175 [KHEAP_ID_DATA_BUFFERS] = "data.",
176 [KHEAP_ID_DATA_SHARED] = "data_shared.",
177 [KHEAP_ID_KT_VAR] = "",
178 };
179
180 /*
181 * Early heap configuration
182 */
183 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_EARLY[1] = {
184 {
185 .kh_name = "early.kalloc",
186 .kh_heap_id = KHEAP_ID_EARLY,
187 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE,
188 }
189 };
190
191 /*
192 * Bag of bytes heap configuration
193 */
194 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
195 {
196 .kh_name = "data.kalloc",
197 .kh_heap_id = KHEAP_ID_DATA_BUFFERS,
198 .kh_tag = VM_KERN_MEMORY_KALLOC_DATA,
199 }
200 };
201
202 /*
203 * Configuration of variable kalloc type heaps
204 */
205 SECURITY_READ_ONLY_LATE(struct kheap_info)
206 kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
207 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
208 {
209 .kh_name = "kalloc.type.var",
210 .kh_heap_id = KHEAP_ID_KT_VAR,
211 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE
212 }
213 };
214
215 /*
216 * Share heap configuration
217 */
218 SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_SHARED[1] = {
219 {
220 .kh_name = "data_shared.kalloc",
221 .kh_heap_id = KHEAP_ID_DATA_SHARED,
222 .kh_tag = VM_KERN_MEMORY_KALLOC_SHARED,
223 }
224 };
225
226 KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
227
228 __startup_func
229 static void
kalloc_zsize_compute(void)230 kalloc_zsize_compute(void)
231 {
232 uint32_t step = KHEAP_STEP_START;
233 uint32_t size = KHEAP_START_SIZE;
234
235 /*
236 * Manually initialize extra initial zones
237 */
238 kheap_zsize[0] = size / 2;
239 kheap_zsize[1] = size;
240 static_assert(KHEAP_EXTRA_ZONES == 2);
241
242 /*
243 * Compute sizes for remaining zones
244 */
245 for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
246 uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
247
248 kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
249 kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
250
251 step *= 2;
252 size += step;
253 }
254 }
255
256 static zone_t
kalloc_zone_for_size_with_flags(zone_id_t zid,vm_size_t size,zalloc_flags_t flags)257 kalloc_zone_for_size_with_flags(
258 zone_id_t zid,
259 vm_size_t size,
260 zalloc_flags_t flags)
261 {
262 vm_size_t max_size = KHEAP_MAX_SIZE;
263 bool forcopyin = flags & Z_MAY_COPYINMAP;
264 zone_t zone;
265
266 if (flags & Z_KALLOC_ARRAY) {
267 size = roundup(size, KALLOC_ARRAY_GRANULE);
268 }
269
270 if (forcopyin) {
271 #if __x86_64__
272 /*
273 * On Intel, the OSData() ABI used to allocate
274 * from the kernel map starting at PAGE_SIZE.
275 *
276 * If only vm_map_copyin() or a wrapper is used,
277 * then everything will work fine because vm_map_copy_t
278 * will perform an actual copy if the data is smaller
279 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
280 *
281 * However, if anyone is trying to call mach_vm_remap(),
282 * then bad things (TM) happen.
283 *
284 * Avoid this by preserving the ABI and moving
285 * to kalloc_large() earlier.
286 *
287 * Any recent code really ought to use IOMemoryDescriptor
288 * for this purpose however.
289 */
290 max_size = PAGE_SIZE - 1;
291 #endif
292 }
293
294 if (size <= max_size) {
295 uint32_t idx;
296
297 if (size <= KHEAP_START_SIZE) {
298 zid += (size > 16);
299 } else {
300 /*
301 * . log2down(size - 1) is log2up(size) - 1
302 * . (size - 1) >> (log2down(size - 1) - 1)
303 * is either 0x2 or 0x3
304 */
305 idx = kalloc_log2down((uint32_t)(size - 1));
306 zid += KHEAP_EXTRA_ZONES +
307 2 * (idx - KHEAP_START_IDX) +
308 ((uint32_t)(size - 1) >> (idx - 1)) - 2;
309 }
310
311 zone = zone_by_id(zid);
312 #if KASAN_CLASSIC
313 /*
314 * Under kasan classic, certain size classes are a redzone
315 * away from the mathematical formula above, and we need
316 * to "go to the next zone".
317 *
318 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
319 * this will never go to an "invalid" zone that doesn't
320 * belong to the kheap.
321 */
322 if (size > zone_elem_inner_size(zone)) {
323 zone++;
324 }
325 #endif
326 return zone;
327 }
328
329 return ZONE_NULL;
330 }
331
332 zone_t
kalloc_zone_for_size(zone_id_t zid,size_t size)333 kalloc_zone_for_size(zone_id_t zid, size_t size)
334 {
335 return kalloc_zone_for_size_with_flags(zid, size, Z_WAITOK);
336 }
337
338 static inline bool
kheap_size_from_zone(void * addr,vm_size_t size,zalloc_flags_t flags)339 kheap_size_from_zone(
340 void *addr,
341 vm_size_t size,
342 zalloc_flags_t flags)
343 {
344 vm_size_t max_size = KHEAP_MAX_SIZE;
345 bool forcopyin = flags & Z_MAY_COPYINMAP;
346
347 #if __x86_64__
348 /*
349 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
350 * behavior, then the element could have a PAGE_SIZE reported size,
351 * yet still be from a zone for Z_MAY_COPYINMAP.
352 */
353 if (forcopyin) {
354 if (size == PAGE_SIZE &&
355 zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
356 return true;
357 }
358
359 max_size = PAGE_SIZE - 1;
360 }
361 #else
362 #pragma unused(addr, forcopyin)
363 #endif
364
365 return size <= max_size;
366 }
367
368 /*
369 * All data zones shouldn't use the early zone. Therefore set the no early alloc
370 * bit right after creation.
371 */
372 __startup_func
373 static void
kalloc_set_no_early_for_data(zone_kheap_id_t kheap_id,zone_stats_t zstats)374 kalloc_set_no_early_for_data(
375 zone_kheap_id_t kheap_id,
376 zone_stats_t zstats)
377 {
378 if (zone_is_data_kheap(kheap_id)) {
379 zpercpu_foreach(zs, zstats) {
380 os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
381 }
382 }
383 }
384
385 __startup_func
386 static void
kalloc_zone_init(const char * kheap_name,zone_kheap_id_t kheap_id,zone_id_t * kheap_zstart,zone_create_flags_t zc_flags)387 kalloc_zone_init(
388 const char *kheap_name,
389 zone_kheap_id_t kheap_id,
390 zone_id_t *kheap_zstart,
391 zone_create_flags_t zc_flags)
392 {
393 if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
394 zc_flags |= ZC_DATA;
395 }
396
397 if (kheap_id == KHEAP_ID_DATA_SHARED) {
398 zc_flags |= ZC_SHARED_DATA;
399 }
400
401 for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
402 uint32_t size = kheap_zsize[i];
403 char buf[MAX_ZONE_NAME], *z_name;
404 int len;
405
406 len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
407 z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
408 strlcpy(z_name, buf, len + 1);
409
410 (void)zone_create_ext(z_name, size, zc_flags, ZONE_ID_ANY, ^(zone_t z){
411 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
412 uint32_t scale = kalloc_log2down(size / 32);
413
414 if (size == 32 << scale) {
415 z->z_array_size_class = scale;
416 } else {
417 z->z_array_size_class = scale | 0x10;
418 }
419 #endif
420 zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
421 if (i == 0) {
422 *kheap_zstart = zone_index(z);
423 }
424 kalloc_set_no_early_for_data(kheap_id, z->z_stats);
425 });
426 }
427 }
428
429 __startup_func
430 static void
kalloc_heap_init(struct kalloc_heap * kheap)431 kalloc_heap_init(struct kalloc_heap *kheap)
432 {
433 kalloc_zone_init("kalloc", kheap->kh_heap_id, &kheap->kh_zstart,
434 ZC_NONE);
435 /*
436 * Count all the "raw" views for zones in the heap.
437 */
438 zone_view_count += KHEAP_NUM_ZONES;
439 }
440
441 #define KEXT_ALIGN_SHIFT 6
442 #define KEXT_ALIGN_BYTES (1<< KEXT_ALIGN_SHIFT)
443 #define KEXT_ALIGN_MASK (KEXT_ALIGN_BYTES-1)
444 #define kt_scratch_size (256ul << 10)
445 #define KALLOC_TYPE_SECTION(type) \
446 (type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
447
448 /*
449 * Enum to specify the kalloc_type variant being used.
450 */
451 __options_decl(kalloc_type_variant_t, uint16_t, {
452 KTV_FIXED = 0x0001,
453 KTV_VAR = 0x0002,
454 });
455
456 /*
457 * Macros that generate the appropriate kalloc_type variant (i.e fixed or
458 * variable) of the desired variable/function.
459 */
460 #define kalloc_type_var(type, var) \
461 ((type) == KTV_FIXED? \
462 (vm_offset_t) kalloc_type_##var##_fixed: \
463 (vm_offset_t) kalloc_type_##var##_var)
464 #define kalloc_type_func(type, func, ...) \
465 ((type) == KTV_FIXED? \
466 kalloc_type_##func##_fixed(__VA_ARGS__): \
467 kalloc_type_##func##_var(__VA_ARGS__))
468
469 TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
470 TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
471 ZSECURITY_CONFIG_KT_VAR_BUDGET);
472 TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
473 ZSECURITY_CONFIG_KT_BUDGET);
474 TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
475 static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
476
477
478 /**
479 * @const kexts_enroll_data_shared
480 *
481 * @brief
482 * We have two heaps for data allocations:
483 * - KHEAP_DATA_BUFFERS, which is for allocations that never shared.
484 * - KHEAP_DATA_SHARED, which is for allocations that need to be shared.
485 *
486 * This is a control that indicates which heap we expose to kexts via the
487 * exported allocations functions.
488 */
489 STATIC_IF_KEY_DEFINE_TRUE(kexts_enroll_data_shared);
490
491 /**
492 * @const restricted_data_mode
493 *
494 * @brief
495 * This is a control that sets the mode of mapping policies
496 * enforcement on data allocations:
497 * - none: the state before the change (no telemetry, no enforcement).
498 * - telemetry: do not enforce, do emit telemetry
499 * - enforce: type the KHEAP_DATA_BUFFERS pages as restricted mappings.
500 *
501 * Combined with kexts_enroll_data_shared, we can create the modes we need
502 * for none/telemetry/enforcement on core kernel/kexts.
503 *
504 * restricted_data_mode_t is an enum used to specify the mode being used.
505 */
506
507 __options_decl(restricted_data_mode_t, uint8_t, {
508 RESTRICTED_DATA_MODE_NONE = 0x0000,
509 RESTRICTED_DATA_MODE_TELEMETRY = 0x0001,
510 RESTRICTED_DATA_MODE_ENFORCE = 0x0002
511 });
512
513 TUNABLE(restricted_data_mode_t,
514 restricted_data_mode,
515 "restricted_data_mode",
516 #if __x86_64__
517 RESTRICTED_DATA_MODE_NONE
518 #else
519 RESTRICTED_DATA_MODE_TELEMETRY
520 #endif /* __x86_64__ */
521 );
522
523 inline bool
kalloc_is_restricted_data_mode_telemetry(void)524 kalloc_is_restricted_data_mode_telemetry(void)
525 {
526 return restricted_data_mode == RESTRICTED_DATA_MODE_TELEMETRY;
527 }
528
529 inline bool
kalloc_is_restricted_data_mode_enforced(void)530 kalloc_is_restricted_data_mode_enforced(void)
531 {
532 return restricted_data_mode == RESTRICTED_DATA_MODE_ENFORCE;
533 }
534
535 inline bool
kmem_needs_data_share_range(void)536 kmem_needs_data_share_range(void)
537 {
538 /*
539 * The dedicated range is required only for
540 * telemetry reporting, when we need to distinguish
541 * between the two kind of data via kmem ranges.
542 *
543 * Even though this is strictly like checking telemetry
544 * mode, it's better to have well-defined abstraction layer
545 * for that adopted in all the call-sites, to be flexible
546 * w.r.t future changes / unrolling.
547 */
548 return kalloc_is_restricted_data_mode_telemetry();
549 }
550
551 /*
552 * Section start/end for fixed kalloc_type views
553 */
554 extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
555 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
556
557 extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
558 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
559
560 /*
561 * Section start/end for variable kalloc_type views
562 */
563 extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
564 __SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
565
566 extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
567 __SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
568
569 __startup_data
570 static kalloc_type_views_t *kt_buffer = NULL;
571 __startup_data
572 static uint64_t kt_count;
573 __startup_data
574 uint32_t kalloc_type_hash_seed;
575
576 __startup_data
577 static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
578 __startup_data
579 static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
580
581 struct nzones_with_idx {
582 uint16_t nzones;
583 uint16_t idx;
584 };
585 int16_t zone_carry = 0;
586
587 _Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
588 "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
589
590 /*
591 * For use by lldb to iterate over kalloc types
592 */
593 SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
594 SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
595 SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
596
597 #define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
598 static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
599 KMEM_DIRECTION_MASK),
600 "Insufficient bits to represent range and dir for VM allocations");
601 static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
602 "validate idx mask");
603 /* qsort routines */
604 typedef int (*cmpfunc_t)(const void *a, const void *b);
605 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
606
607 static inline uint16_t
kalloc_type_get_idx(uint32_t kt_size)608 kalloc_type_get_idx(uint32_t kt_size)
609 {
610 return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
611 }
612
613 static inline uint32_t
kalloc_type_set_idx(uint32_t kt_size,uint16_t idx)614 kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
615 {
616 return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
617 }
618
619 static void
kalloc_type_build_dlut(void)620 kalloc_type_build_dlut(void)
621 {
622 vm_size_t size = 0;
623 for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
624 uint8_t zindex = 0;
625 while (kt_zone_cfg[zindex] < size) {
626 zindex++;
627 }
628 kalloc_type_dlut[i] = zindex;
629 }
630 }
631
632 static uint32_t
kalloc_type_idx_for_size(uint32_t size)633 kalloc_type_idx_for_size(uint32_t size)
634 {
635 assert(size <= KHEAP_MAX_SIZE);
636 uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
637 return kalloc_type_set_idx(size, idx);
638 }
639
640 static void
kalloc_type_assign_zone_fixed(kalloc_type_view_t * cur,kalloc_type_view_t * end,zone_t z,zone_t sig_zone,zone_t early_zone)641 kalloc_type_assign_zone_fixed(
642 kalloc_type_view_t *cur,
643 kalloc_type_view_t *end,
644 zone_t z,
645 zone_t sig_zone,
646 zone_t early_zone)
647 {
648 /*
649 * Assign the zone created for every kalloc_type_view
650 * of the same unique signature
651 */
652 bool need_raw_view = false;
653
654 while (cur < end) {
655 kalloc_type_view_t kt = *cur;
656 struct zone_view *zv = &kt->kt_zv;
657 zv->zv_zone = z;
658 kalloc_type_flags_t kt_flags = kt->kt_flags;
659 zone_security_flags_t zsflags = zone_security_config(z);
660
661 assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
662 if (!early_zone) {
663 assert(zone_is_data_kheap(zsflags.z_kheap_id));
664 }
665
666 if (kt_flags & KT_SLID) {
667 kt->kt_signature -= vm_kernel_slide;
668 kt->kt_zv.zv_name -= vm_kernel_slide;
669 }
670
671 if ((kt_flags & KT_PRIV_ACCT) ||
672 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
673 zv->zv_stats = zalloc_percpu_permanent_type(
674 struct zone_stats);
675 need_raw_view = true;
676 zone_view_count += 1;
677 } else {
678 zv->zv_stats = z->z_stats;
679 }
680
681 if ((kt_flags & KT_NOEARLY) || !early_zone) {
682 if ((kt_flags & KT_NOEARLY) && !(kt_flags & KT_PRIV_ACCT)) {
683 panic("KT_NOEARLY used w/o private accounting for view %s",
684 zv->zv_name);
685 }
686
687 zpercpu_foreach(zs, zv->zv_stats) {
688 os_atomic_store(&zs->zs_alloc_not_early, 1, relaxed);
689 }
690 }
691
692 if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
693 kt->kt_zearly = early_zone;
694 kt->kt_zsig = sig_zone;
695 /*
696 * If we haven't yet set the signature equivalance then set it
697 * otherwise validate that the zone has the same signature equivalance
698 * as the sig_zone provided
699 */
700 if (!zone_get_sig_eq(z)) {
701 zone_set_sig_eq(z, zone_index(sig_zone));
702 } else {
703 assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
704 }
705 }
706 zv->zv_next = (zone_view_t) z->z_views;
707 zv->zv_zone->z_views = (zone_view_t) kt;
708 cur++;
709 }
710 if (need_raw_view) {
711 zone_view_count += 1;
712 }
713 }
714
715 __startup_func
716 static void
kalloc_type_assign_zone_var(kalloc_type_var_view_t * cur,kalloc_type_var_view_t * end,uint32_t heap_idx)717 kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
718 kalloc_type_var_view_t *end, uint32_t heap_idx)
719 {
720 struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
721 while (cur < end) {
722 kalloc_type_var_view_t kt = *cur;
723 kt->kt_heap_start = cfg->kh_zstart;
724 kalloc_type_flags_t kt_flags = kt->kt_flags;
725
726 if (kt_flags & KT_SLID) {
727 if (kt->kt_sig_hdr) {
728 kt->kt_sig_hdr -= vm_kernel_slide;
729 }
730 kt->kt_sig_type -= vm_kernel_slide;
731 kt->kt_name -= vm_kernel_slide;
732 }
733
734 if ((kt_flags & KT_PRIV_ACCT) ||
735 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
736 kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
737 zone_view_count += 1;
738 }
739
740 kt->kt_next = (zone_view_t) cfg->kt_views;
741 cfg->kt_views = kt;
742 cur++;
743 }
744 }
745
746 __startup_func
747 static inline void
kalloc_type_slide_fixed(vm_offset_t addr)748 kalloc_type_slide_fixed(vm_offset_t addr)
749 {
750 kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
751 ktv->kt_signature += vm_kernel_slide;
752 ktv->kt_zv.zv_name += vm_kernel_slide;
753 ktv->kt_flags |= KT_SLID;
754 }
755
756 __startup_func
757 static inline void
kalloc_type_slide_var(vm_offset_t addr)758 kalloc_type_slide_var(vm_offset_t addr)
759 {
760 kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
761 if (ktv->kt_sig_hdr) {
762 ktv->kt_sig_hdr += vm_kernel_slide;
763 }
764 ktv->kt_sig_type += vm_kernel_slide;
765 ktv->kt_name += vm_kernel_slide;
766 ktv->kt_flags |= KT_SLID;
767 }
768
769 __startup_func
770 static void
kalloc_type_validate_flags(kalloc_type_flags_t kt_flags,const char * kt_name,uuid_string_t kext_uuid)771 kalloc_type_validate_flags(
772 kalloc_type_flags_t kt_flags,
773 const char *kt_name,
774 uuid_string_t kext_uuid)
775 {
776 if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
777 panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
778 "required xnu headers", kt_name, kext_uuid);
779 }
780 }
781
782 static kalloc_type_flags_t
kalloc_type_get_flags_fixed(vm_offset_t addr,uuid_string_t kext_uuid)783 kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
784 {
785 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
786 kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_zv.zv_name, kext_uuid);
787 return ktv->kt_flags;
788 }
789
790 static kalloc_type_flags_t
kalloc_type_get_flags_var(vm_offset_t addr,uuid_string_t kext_uuid)791 kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
792 {
793 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
794 kalloc_type_validate_flags(ktv->kt_flags, ktv->kt_name, kext_uuid);
795 return ktv->kt_flags;
796 }
797
798 /*
799 * Check if signature of type is made up of only data and padding,
800 * which is meant to never be shared.
801 */
802 static bool
kalloc_type_is_data(kalloc_type_flags_t kt_flags)803 kalloc_type_is_data(kalloc_type_flags_t kt_flags)
804 {
805 assert(kt_flags & KT_CHANGED);
806 return kt_flags & KT_DATA_ONLY;
807 }
808
809 /*
810 * Check if signature of type is made up of only pointers
811 */
812 static bool
kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)813 kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
814 {
815 assert(kt_flags & KT_CHANGED2);
816 return kt_flags & KT_PTR_ARRAY;
817 }
818
819 static bool
kalloc_type_from_vm(kalloc_type_flags_t kt_flags)820 kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
821 {
822 assert(kt_flags & KT_CHANGED);
823 return kt_flags & KT_VM;
824 }
825
826 __startup_func
827 static inline vm_size_t
kalloc_type_view_sz_fixed(void)828 kalloc_type_view_sz_fixed(void)
829 {
830 return sizeof(struct kalloc_type_view);
831 }
832
833 __startup_func
834 static inline vm_size_t
kalloc_type_view_sz_var(void)835 kalloc_type_view_sz_var(void)
836 {
837 return sizeof(struct kalloc_type_var_view);
838 }
839
840 __startup_func
841 static inline uint64_t
kalloc_type_view_count(kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end)842 kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
843 vm_offset_t end)
844 {
845 return (end - start) / kalloc_type_func(type, view_sz);
846 }
847
848 __startup_func
849 static inline void
kalloc_type_buffer_copy_fixed(kalloc_type_views_t * buffer,vm_offset_t ktv)850 kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
851 {
852 buffer->ktv_fixed = (kalloc_type_view_t) ktv;
853 }
854
855 __startup_func
856 static inline void
kalloc_type_buffer_copy_var(kalloc_type_views_t * buffer,vm_offset_t ktv)857 kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
858 {
859 buffer->ktv_var = (kalloc_type_var_view_t) ktv;
860 }
861
862 __startup_func
863 static void
kalloc_type_handle_data_view_fixed(vm_offset_t addr)864 kalloc_type_handle_data_view_fixed(vm_offset_t addr)
865 {
866 kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
867 zone_t z = kalloc_zone_for_size(KHEAP_DATA_BUFFERS->kh_zstart,
868 cur_data_view->kt_size);
869 kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
870 NULL);
871 }
872
873 __startup_func
874 static void
kalloc_type_handle_data_view_var(vm_offset_t addr)875 kalloc_type_handle_data_view_var(vm_offset_t addr)
876 {
877 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
878 kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_HEAP);
879 }
880
881 __startup_func
882 static void
kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)883 kalloc_type_handle_data_shared_view_fixed(vm_offset_t addr)
884 {
885 kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
886 zone_t z = kalloc_zone_for_size(KHEAP_DATA_SHARED->kh_zstart,
887 cur_data_view->kt_size);
888 kalloc_type_assign_zone_fixed(&cur_data_view, &cur_data_view + 1, z, NULL,
889 NULL);
890 }
891
892 __startup_func
893 static void
kalloc_type_handle_data_shared_view_var(vm_offset_t addr)894 kalloc_type_handle_data_shared_view_var(vm_offset_t addr)
895 {
896 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
897 kalloc_type_assign_zone_var(&ktv, &ktv + 1, KT_VAR_DATA_SHARED_HEAP);
898 }
899
900 __startup_func
901 static uint32_t
kalloc_type_handle_parray_var(void)902 kalloc_type_handle_parray_var(void)
903 {
904 uint32_t i = 0;
905 kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
906 const char *p_name = kt->kt_name;
907
908 /*
909 * The sorted list of variable kalloc_type_view has pointer arrays at the
910 * beginning. Walk through them and assign a random pointer heap to each
911 * type detected by typename.
912 */
913 while (kalloc_type_is_ptr_array(kt->kt_flags)) {
914 uint32_t heap_id = kmem_get_random16(1) + KT_VAR_PTR_HEAP0;
915 const char *c_name = kt->kt_name;
916 uint32_t p_i = i;
917
918 while (strcmp(c_name, p_name) == 0) {
919 i++;
920 kt = kt_buffer[i].ktv_var;
921 c_name = kt->kt_name;
922 }
923 p_name = c_name;
924 kalloc_type_assign_zone_var(&kt_buffer[p_i].ktv_var,
925 &kt_buffer[i].ktv_var, heap_id);
926 }
927
928 /*
929 * Returns the the index of the first view that isn't a pointer array
930 */
931 return i;
932 }
933
934 __startup_func
935 static uint32_t
kalloc_hash_adjust(uint32_t hash,uint32_t shift)936 kalloc_hash_adjust(uint32_t hash, uint32_t shift)
937 {
938 /*
939 * Limit range_id to ptr ranges
940 */
941 uint32_t range_id = kmem_adjust_range_id(hash);
942 uint32_t direction = hash & 0x8000;
943 return (range_id | KMEM_HASH_SET | direction) << shift;
944 }
945
946 __startup_func
947 static void
kalloc_type_set_type_hash(const char * sig_ty,const char * sig_hdr,kalloc_type_flags_t * kt_flags)948 kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
949 kalloc_type_flags_t *kt_flags)
950 {
951 uint32_t hash = 0;
952
953 assert(sig_ty != NULL);
954 hash = os_hash_jenkins_update(sig_ty, strlen(sig_ty),
955 kalloc_type_hash_seed);
956 if (sig_hdr) {
957 hash = os_hash_jenkins_update(sig_hdr, strlen(sig_hdr), hash);
958 }
959 os_hash_jenkins_finish(hash);
960 hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
961
962 *kt_flags = *kt_flags | kalloc_hash_adjust(hash, 16);
963 }
964
965 __startup_func
966 static void
kalloc_type_set_type_hash_fixed(vm_offset_t addr)967 kalloc_type_set_type_hash_fixed(vm_offset_t addr)
968 {
969 /*
970 * Use backtraces on fixed as we don't have signatures for types that go
971 * to the VM due to rdar://85182551.
972 */
973 (void) addr;
974 }
975
976 __startup_func
977 static void
kalloc_type_set_type_hash_var(vm_offset_t addr)978 kalloc_type_set_type_hash_var(vm_offset_t addr)
979 {
980 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
981 kalloc_type_set_type_hash(ktv->kt_sig_type, ktv->kt_sig_hdr,
982 &ktv->kt_flags);
983 }
984
985 __startup_func
986 static void
kalloc_type_mark_processed_fixed(vm_offset_t addr)987 kalloc_type_mark_processed_fixed(vm_offset_t addr)
988 {
989 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
990 ktv->kt_flags |= KT_PROCESSED;
991 }
992
993 __startup_func
994 static void
kalloc_type_mark_processed_var(vm_offset_t addr)995 kalloc_type_mark_processed_var(vm_offset_t addr)
996 {
997 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
998 ktv->kt_flags |= KT_PROCESSED;
999 }
1000
1001 __startup_func
1002 static void
kalloc_type_update_view_fixed(vm_offset_t addr)1003 kalloc_type_update_view_fixed(vm_offset_t addr)
1004 {
1005 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
1006 ktv->kt_size = kalloc_type_idx_for_size(ktv->kt_size);
1007 }
1008
1009 __startup_func
1010 static void
kalloc_type_update_view_var(vm_offset_t addr)1011 kalloc_type_update_view_var(vm_offset_t addr)
1012 {
1013 (void) addr;
1014 }
1015
1016 __startup_func
1017 static void
kalloc_type_view_copy(const kalloc_type_variant_t type,vm_offset_t start,vm_offset_t end,uint64_t * cur_count,bool slide,uuid_string_t kext_uuid)1018 kalloc_type_view_copy(
1019 const kalloc_type_variant_t type,
1020 vm_offset_t start,
1021 vm_offset_t end,
1022 uint64_t *cur_count,
1023 bool slide,
1024 uuid_string_t kext_uuid)
1025 {
1026 uint64_t count = kalloc_type_view_count(type, start, end);
1027 if (count + *cur_count >= kt_count) {
1028 panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
1029 }
1030 vm_offset_t cur = start;
1031 while (cur < end) {
1032 if (slide) {
1033 kalloc_type_func(type, slide, cur);
1034 }
1035 kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
1036 kext_uuid);
1037 kalloc_type_func(type, mark_processed, cur);
1038 /*
1039 * Skip views that go to the VM
1040 */
1041 if (kalloc_type_from_vm(kt_flags)) {
1042 cur += kalloc_type_func(type, view_sz);
1043 continue;
1044 }
1045
1046 /*
1047 * Check if the signature indicates that the entire allocation is data.
1048 *
1049 * Note that KT_VAR_DATA_HEAP is fake "data" heap, variable kalloc_type handles
1050 * the actual redirection in the entry points kalloc/kfree_type_var_impl.
1051 */
1052 if (kalloc_type_is_data(kt_flags)) {
1053 kalloc_type_func(type, handle_data_view, cur);
1054 cur += kalloc_type_func(type, view_sz);
1055 continue;
1056 }
1057
1058 /*
1059 * Set type hash that is used by kmem_*_guard
1060 */
1061 kalloc_type_func(type, set_type_hash, cur);
1062 kalloc_type_func(type, update_view, cur);
1063 kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
1064 cur += kalloc_type_func(type, view_sz);
1065 *cur_count = *cur_count + 1;
1066 }
1067 }
1068
1069 __startup_func
1070 static uint64_t
kalloc_type_view_parse(const kalloc_type_variant_t type)1071 kalloc_type_view_parse(const kalloc_type_variant_t type)
1072 {
1073 kc_format_t kc_format;
1074 uint64_t cur_count = 0;
1075
1076 if (!PE_get_primary_kc_format(&kc_format)) {
1077 panic("kalloc_type_view_parse: wasn't able to determine kc format");
1078 }
1079
1080 if (kc_format == KCFormatStatic) {
1081 /*
1082 * If kc is static or KCGEN, __kalloc_type sections from kexts and
1083 * xnu are coalesced.
1084 */
1085 kalloc_type_view_copy(type,
1086 kalloc_type_var(type, sec_start),
1087 kalloc_type_var(type, sec_end),
1088 &cur_count, false, NULL);
1089 } else if (kc_format == KCFormatFileset) {
1090 /*
1091 * If kc uses filesets, traverse __kalloc_type section for each
1092 * macho in the BootKC.
1093 */
1094 kernel_mach_header_t *kc_mh = NULL;
1095 kernel_mach_header_t *kext_mh = NULL;
1096
1097 kc_mh = (kernel_mach_header_t *)PE_get_kc_header(KCKindPrimary);
1098 struct load_command *lc =
1099 (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
1100 for (uint32_t i = 0; i < kc_mh->ncmds;
1101 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1102 if (lc->cmd != LC_FILESET_ENTRY) {
1103 continue;
1104 }
1105 struct fileset_entry_command *fse =
1106 (struct fileset_entry_command *)(vm_offset_t)lc;
1107 kext_mh = (kernel_mach_header_t *)fse->vmaddr;
1108 kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
1109 kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1110 if (sect != NULL) {
1111 unsigned long uuidlen = 0;
1112 void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
1113 uuid_string_t kext_uuid_str;
1114 if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
1115 uuid_unparse_upper(*(uuid_t *)kext_uuid, kext_uuid_str);
1116 }
1117 kalloc_type_view_copy(type, sect->addr, sect->addr + sect->size,
1118 &cur_count, false, kext_uuid_str);
1119 }
1120 }
1121 } else if (kc_format == KCFormatKCGEN) {
1122 /*
1123 * Parse __kalloc_type section from xnu
1124 */
1125 kalloc_type_view_copy(type,
1126 kalloc_type_var(type, sec_start),
1127 kalloc_type_var(type, sec_end), &cur_count, false, NULL);
1128
1129 #ifndef __BUILDING_XNU_LIB_UNITTEST__ /* no kexts in unit-test */
1130 /*
1131 * Parse __kalloc_type section for kexts
1132 *
1133 * Note: We don't process the kalloc_type_views for kexts on armv7
1134 * as this platform has insufficient memory for type based
1135 * segregation. kalloc_type_impl_external will direct callsites
1136 * based on their size.
1137 */
1138 kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1139 vm_offset_t cur = 0;
1140 vm_offset_t end = 0;
1141
1142 /*
1143 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1144 * and traverse it.
1145 */
1146 kernel_section_t *prelink_sect = getsectbynamefromheader(
1147 xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1148 assert(prelink_sect);
1149 cur = prelink_sect->addr;
1150 end = prelink_sect->addr + prelink_sect->size;
1151
1152 while (cur < end) {
1153 uint64_t kext_text_sz = 0;
1154 kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1155
1156 if (kext_mh->magic == 0) {
1157 /*
1158 * Assert that we have processed all kexts and all that is left
1159 * is padding
1160 */
1161 assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1162 break;
1163 } else if (kext_mh->magic != MH_MAGIC_64 &&
1164 kext_mh->magic != MH_CIGAM_64) {
1165 panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1166 cur);
1167 }
1168
1169 /*
1170 * Kext macho found, iterate through its segments
1171 */
1172 struct load_command *lc =
1173 (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1174 bool isSplitKext = false;
1175
1176 for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1177 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1178 if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1179 isSplitKext = true;
1180 continue;
1181 } else if (lc->cmd != LC_SEGMENT_64) {
1182 continue;
1183 }
1184
1185 kernel_segment_command_t *seg_cmd =
1186 (struct segment_command_64 *)(vm_offset_t)lc;
1187 /*
1188 * Parse kalloc_type section
1189 */
1190 if (strcmp(seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1191 kernel_section_t *kt_sect = getsectbynamefromseg(seg_cmd,
1192 KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1193 if (kt_sect) {
1194 kalloc_type_view_copy(type, kt_sect->addr + vm_kernel_slide,
1195 kt_sect->addr + kt_sect->size + vm_kernel_slide, &cur_count,
1196 true, NULL);
1197 }
1198 }
1199 /*
1200 * If the kext has a __TEXT segment, that is the only thing that
1201 * will be in the special __PRELINK_TEXT KC segment, so the next
1202 * macho is right after.
1203 */
1204 if (strcmp(seg_cmd->segname, "__TEXT") == 0) {
1205 kext_text_sz = seg_cmd->filesize;
1206 }
1207 }
1208 /*
1209 * If the kext did not have a __TEXT segment (special xnu kexts with
1210 * only a __LINKEDIT segment) then the next macho will be after all the
1211 * header commands.
1212 */
1213 if (!kext_text_sz) {
1214 kext_text_sz = kext_mh->sizeofcmds;
1215 } else if (!isSplitKext) {
1216 panic("kalloc_type_view_parse: No support for non-split seg KCs");
1217 break;
1218 }
1219
1220 cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1221 }
1222 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1223 } else {
1224 /*
1225 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1226 * parsing kalloc_type_view structs during startup.
1227 */
1228 panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1229 " for kc_format = %d\n", kc_format);
1230 }
1231 return cur_count;
1232 }
1233
1234 __startup_func
1235 static int
kalloc_type_cmp_fixed(const void * a,const void * b)1236 kalloc_type_cmp_fixed(const void *a, const void *b)
1237 {
1238 const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1239 const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1240
1241 const uint16_t idxA = kalloc_type_get_idx(ktA->kt_size);
1242 const uint16_t idxB = kalloc_type_get_idx(ktB->kt_size);
1243 /*
1244 * If the kalloc_type_views are in the same kalloc bucket, sort by
1245 * signature else sort by size
1246 */
1247 if (idxA == idxB) {
1248 int result = strcmp(ktA->kt_signature, ktB->kt_signature);
1249 /*
1250 * If the kalloc_type_views have the same signature sort by site
1251 * name
1252 */
1253 if (result == 0) {
1254 return strcmp(ktA->kt_zv.zv_name, ktB->kt_zv.zv_name);
1255 }
1256 return result;
1257 }
1258 const uint32_t sizeA = kalloc_type_get_size(ktA->kt_size);
1259 const uint32_t sizeB = kalloc_type_get_size(ktB->kt_size);
1260 return (int)(sizeA - sizeB);
1261 }
1262
1263 __startup_func
1264 static int
kalloc_type_cmp_var(const void * a,const void * b)1265 kalloc_type_cmp_var(const void *a, const void *b)
1266 {
1267 const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1268 const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1269 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1270 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1271 bool ktA_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1272 bool ktB_ptrArray = kalloc_type_is_ptr_array(ktA->kt_flags);
1273 int result = 0;
1274
1275 /*
1276 * Switched around (B - A) because we want the pointer arrays to be at the
1277 * top
1278 */
1279 result = ktB_ptrArray - ktA_ptrArray;
1280 if (result == 0) {
1281 result = strcmp(ktA_hdr, ktB_hdr);
1282 if (result == 0) {
1283 result = strcmp(ktA->kt_sig_type, ktB->kt_sig_type);
1284 if (result == 0) {
1285 result = strcmp(ktA->kt_name, ktB->kt_name);
1286 }
1287 }
1288 }
1289 return result;
1290 }
1291
1292 __startup_func
1293 static uint16_t *
kalloc_type_create_iterators_fixed(uint16_t * kt_skip_list_start,uint64_t count)1294 kalloc_type_create_iterators_fixed(
1295 uint16_t *kt_skip_list_start,
1296 uint64_t count)
1297 {
1298 uint16_t *kt_skip_list = kt_skip_list_start;
1299 uint16_t p_idx = UINT16_MAX; /* previous size idx */
1300 uint16_t c_idx = 0; /* current size idx */
1301 uint16_t unique_sig = 0;
1302 uint16_t total_sig = 0;
1303 const char *p_sig = NULL;
1304 const char *p_name = "";
1305 const char *c_sig = NULL;
1306 const char *c_name = NULL;
1307
1308 /*
1309 * Walk over each kalloc_type_view
1310 */
1311 for (uint16_t i = 0; i < count; i++) {
1312 kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1313
1314 c_idx = kalloc_type_get_idx(kt->kt_size);
1315 c_sig = kt->kt_signature;
1316 c_name = kt->kt_zv.zv_name;
1317 /*
1318 * When current kalloc_type_view is in a different kalloc size
1319 * bucket than the previous, it means we have processed all in
1320 * the previous size bucket, so store the accumulated values
1321 * and advance the indices.
1322 */
1323 if (p_idx == UINT16_MAX || c_idx != p_idx) {
1324 /*
1325 * Updates for frequency lists
1326 */
1327 if (p_idx != UINT16_MAX) {
1328 kt_freq_list[p_idx] = unique_sig;
1329 kt_freq_list_total[p_idx] = total_sig - unique_sig;
1330 }
1331 unique_sig = 1;
1332 total_sig = 1;
1333
1334 p_idx = c_idx;
1335 p_sig = c_sig;
1336 p_name = c_name;
1337
1338 /*
1339 * Updates to signature skip list
1340 */
1341 *kt_skip_list = i;
1342 kt_skip_list++;
1343
1344 continue;
1345 }
1346
1347 /*
1348 * When current kalloc_type_views is in the kalloc size bucket as
1349 * previous, analyze the siganture to see if it is unique.
1350 *
1351 * Signatures are collapsible if one is a substring of the next.
1352 */
1353 if (strncmp(c_sig, p_sig, strlen(p_sig)) != 0) {
1354 /*
1355 * Unique signature detected. Update counts and advance index
1356 */
1357 unique_sig++;
1358 total_sig++;
1359
1360 *kt_skip_list = i;
1361 kt_skip_list++;
1362 p_sig = c_sig;
1363 p_name = c_name;
1364 continue;
1365 }
1366 /*
1367 * Need this here as we do substring matching for signatures so you
1368 * want to track the longer signature seen rather than the substring
1369 */
1370 p_sig = c_sig;
1371
1372 /*
1373 * Check if current kalloc_type_view corresponds to a new type
1374 */
1375 if (strlen(p_name) != strlen(c_name) || strcmp(p_name, c_name) != 0) {
1376 total_sig++;
1377 p_name = c_name;
1378 }
1379 }
1380 /*
1381 * Final update
1382 */
1383 assert(c_idx == p_idx);
1384 assert(kt_freq_list[c_idx] == 0);
1385 kt_freq_list[c_idx] = unique_sig;
1386 kt_freq_list_total[c_idx] = total_sig - unique_sig;
1387 *kt_skip_list = (uint16_t) count;
1388
1389 return ++kt_skip_list;
1390 }
1391
1392 __startup_func
1393 static uint32_t
kalloc_type_create_iterators_var(uint32_t * kt_skip_list_start,uint32_t buf_start)1394 kalloc_type_create_iterators_var(
1395 uint32_t *kt_skip_list_start,
1396 uint32_t buf_start)
1397 {
1398 uint32_t *kt_skip_list = kt_skip_list_start;
1399 uint32_t n = 0;
1400
1401 kt_skip_list[n] = buf_start;
1402 assert(kt_count > buf_start + 1);
1403 for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1404 kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1405 kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1406 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1407 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1408 assert(ktA->kt_sig_type != NULL);
1409 assert(ktB->kt_sig_type != NULL);
1410 if (strcmp(ktA_hdr, ktB_hdr) != 0 ||
1411 strcmp(ktA->kt_sig_type, ktB->kt_sig_type) != 0) {
1412 n++;
1413 kt_skip_list[n] = i;
1414 }
1415 }
1416 /*
1417 * Final update
1418 */
1419 n++;
1420 kt_skip_list[n] = (uint32_t) kt_count;
1421 return n;
1422 }
1423
1424 __startup_func
1425 static uint16_t
kalloc_type_distribute_budget(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget,uint16_t min_zones_per_size)1426 kalloc_type_distribute_budget(
1427 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1428 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1429 uint16_t zone_budget,
1430 uint16_t min_zones_per_size)
1431 {
1432 uint16_t total_sig = 0;
1433 uint16_t min_sig = 0;
1434 uint16_t assigned_zones = 0;
1435 uint16_t remaining_zones = zone_budget;
1436 uint16_t modulo = 0;
1437
1438 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1439 uint16_t sig_freq = freq_list[i];
1440 uint16_t min_zones = min_zones_per_size;
1441
1442 if (sig_freq < min_zones_per_size) {
1443 min_zones = sig_freq;
1444 }
1445 total_sig += sig_freq;
1446 kt_zones[i] = min_zones;
1447 min_sig += min_zones;
1448 }
1449 if (remaining_zones > total_sig) {
1450 remaining_zones = total_sig;
1451 }
1452 assert(remaining_zones >= min_sig);
1453 remaining_zones -= min_sig;
1454 total_sig -= min_sig;
1455 assigned_zones += min_sig;
1456
1457 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1458 uint16_t freq = freq_list[i];
1459
1460 if (freq < min_zones_per_size) {
1461 continue;
1462 }
1463 uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1464 uint16_t n_zones = (uint16_t) numer / total_sig;
1465
1466 /*
1467 * Accumulate remainder and increment n_zones when it goes above
1468 * denominator
1469 */
1470 modulo += numer % total_sig;
1471 if (modulo >= total_sig) {
1472 n_zones++;
1473 modulo -= total_sig;
1474 }
1475
1476 /*
1477 * Cap the total number of zones to the unique signatures
1478 */
1479 if ((n_zones + min_zones_per_size) > freq) {
1480 uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1481 modulo += (extra_zones * total_sig);
1482 n_zones -= extra_zones;
1483 }
1484 kt_zones[i] += n_zones;
1485 assigned_zones += n_zones;
1486 }
1487
1488 if (kt_options & KT_OPTIONS_DEBUG) {
1489 printf("kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1490 assigned_zones, remaining_zones + min_sig - assigned_zones);
1491 }
1492 return remaining_zones + min_sig - assigned_zones;
1493 }
1494
1495 __startup_func
1496 static int
kalloc_type_cmp_type_zones(const void * a,const void * b)1497 kalloc_type_cmp_type_zones(const void *a, const void *b)
1498 {
1499 const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1500 const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1501
1502 return (int)(B.nzones - A.nzones);
1503 }
1504
1505 __startup_func
1506 static void
kalloc_type_redistribute_budget(uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones[MAX_K_ZONE (kt_zone_cfg)])1507 kalloc_type_redistribute_budget(
1508 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1509 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1510 {
1511 uint16_t count = 0, cur_count = 0;
1512 struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1513 uint16_t top_zone_total = 0;
1514
1515 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1516 uint16_t zones = kt_zones[i];
1517
1518 /*
1519 * If a sizeclass got no zones but has types to divide make a note
1520 * of it
1521 */
1522 if (zones == 0 && (freq_total_list[i] != 0)) {
1523 count++;
1524 }
1525
1526 sorted_zones[i].nzones = kt_zones[i];
1527 sorted_zones[i].idx = i;
1528 }
1529
1530 qsort(&sorted_zones[0], (size_t) MAX_K_ZONE(kt_zone_cfg),
1531 sizeof(struct nzones_with_idx), kalloc_type_cmp_type_zones);
1532
1533 for (uint16_t i = 0; i < 3; i++) {
1534 top_zone_total += sorted_zones[i].nzones;
1535 }
1536
1537 /*
1538 * Borrow zones from the top 3 sizeclasses and redistribute to those
1539 * that didn't get a zone but that types to divide
1540 */
1541 cur_count = count;
1542 for (uint16_t i = 0; i < 3; i++) {
1543 uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1544 uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1545
1546 if (zone_borrow > (zone_available / 2)) {
1547 zone_borrow = zone_available / 2;
1548 }
1549 kt_zones[sorted_zones[i].idx] -= zone_borrow;
1550 cur_count -= zone_borrow;
1551 }
1552
1553 for (uint16_t i = 0; i < 3; i++) {
1554 if (cur_count == 0) {
1555 break;
1556 }
1557 kt_zones[sorted_zones[i].idx]--;
1558 cur_count--;
1559 }
1560
1561 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1562 if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1563 (count > cur_count)) {
1564 kt_zones[i]++;
1565 count--;
1566 }
1567 }
1568 }
1569
1570 static uint16_t
kalloc_type_apply_policy(uint16_t freq_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t freq_total_list[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_sig[MAX_K_ZONE (kt_zone_cfg)],uint16_t kt_zones_type[MAX_K_ZONE (kt_zone_cfg)],uint16_t zone_budget)1571 kalloc_type_apply_policy(
1572 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1573 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1574 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1575 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1576 uint16_t zone_budget)
1577 {
1578 uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1579 uint16_t zbudget_type = zone_budget - zbudget_sig;
1580 uint16_t wasted_zones = 0;
1581
1582 #if DEBUG || DEVELOPMENT
1583 if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1584 __assert_only uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1585 assert(zone_budget + current_zones <= MAX_ZONES);
1586 }
1587 #endif
1588
1589 wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones_sig,
1590 zbudget_sig, 2);
1591 wasted_zones += kalloc_type_distribute_budget(freq_total_list,
1592 kt_zones_type, zbudget_type, 0);
1593 kalloc_type_redistribute_budget(freq_total_list, kt_zones_type);
1594
1595 /*
1596 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1597 */
1598 if (kt_options & KT_OPTIONS_DEBUG) {
1599 printf("Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1600 "zones_type\n");
1601 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1602 printf("%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1603 freq_total_list[i] + freq_list[i], freq_list[i],
1604 kt_zones_sig[i] + kt_zones_type[i],
1605 kt_zones_sig[i], kt_zones_type[i]);
1606 }
1607 }
1608
1609 return wasted_zones;
1610 }
1611
1612
1613 __startup_func
1614 static void
kalloc_type_create_zone_for_size(zone_t * kt_zones_for_size,uint16_t kt_zones,vm_size_t z_size)1615 kalloc_type_create_zone_for_size(
1616 zone_t *kt_zones_for_size,
1617 uint16_t kt_zones,
1618 vm_size_t z_size)
1619 {
1620 zone_t p_zone = NULL;
1621 char *z_name = NULL;
1622 zone_t shared_z = NULL;
1623
1624 for (uint16_t i = 0; i < kt_zones; i++) {
1625 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1626 snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1627 (size_t) z_size);
1628 zone_t z = zone_create(z_name, z_size, ZC_KALLOC_TYPE);
1629 if (i != 0) {
1630 p_zone->z_kt_next = z;
1631 }
1632 p_zone = z;
1633 kt_zones_for_size[i] = z;
1634 }
1635 /*
1636 * Create shared zone for sizeclass if it doesn't already exist
1637 */
1638 if (kt_shared_fixed) {
1639 shared_z = kalloc_zone_for_size(KHEAP_EARLY->kh_zstart, z_size);
1640 if (zone_elem_inner_size(shared_z) != z_size) {
1641 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1642 snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1643 (size_t) z_size);
1644 shared_z = zone_create_ext(z_name, z_size, ZC_NONE, ZONE_ID_ANY,
1645 ^(zone_t zone){
1646 zone_security_array[zone_index(zone)].z_kheap_id = KHEAP_ID_EARLY;
1647 });
1648 }
1649 }
1650 kt_zones_for_size[kt_zones] = shared_z;
1651 }
1652
1653 __startup_func
1654 static uint16_t
kalloc_type_zones_for_type(uint16_t zones_total_type,uint16_t unique_types,uint16_t total_types,bool last_sig)1655 kalloc_type_zones_for_type(
1656 uint16_t zones_total_type,
1657 uint16_t unique_types,
1658 uint16_t total_types,
1659 bool last_sig)
1660 {
1661 uint16_t zones_for_type = 0, n_mod = 0;
1662
1663 if (zones_total_type == 0) {
1664 return 0;
1665 }
1666
1667 zones_for_type = (zones_total_type * unique_types) / total_types;
1668 n_mod = (zones_total_type * unique_types) % total_types;
1669 zone_carry += n_mod;
1670
1671 /*
1672 * Drain carry opportunistically
1673 */
1674 if (((unique_types > 3) && (zone_carry > 0)) ||
1675 (zone_carry >= (int) total_types) ||
1676 (last_sig && (zone_carry > 0))) {
1677 zone_carry -= total_types;
1678 zones_for_type++;
1679 }
1680
1681 if (last_sig) {
1682 assert(zone_carry == 0);
1683 }
1684
1685 return zones_for_type;
1686 }
1687
1688 __startup_func
1689 static uint16_t
kalloc_type_build_skip_list(kalloc_type_view_t * start,kalloc_type_view_t * end,uint16_t * kt_skip_list)1690 kalloc_type_build_skip_list(
1691 kalloc_type_view_t *start,
1692 kalloc_type_view_t *end,
1693 uint16_t *kt_skip_list)
1694 {
1695 kalloc_type_view_t *cur = start;
1696 kalloc_type_view_t prev = *start;
1697 uint16_t i = 0, idx = 0;
1698
1699 kt_skip_list[idx] = i;
1700 idx++;
1701
1702 while (cur < end) {
1703 kalloc_type_view_t kt_cur = *cur;
1704
1705 if (strcmp(prev->kt_zv.zv_name, kt_cur->kt_zv.zv_name) != 0) {
1706 kt_skip_list[idx] = i;
1707
1708 prev = kt_cur;
1709 idx++;
1710 }
1711 i++;
1712 cur++;
1713 }
1714
1715 /*
1716 * Final update
1717 */
1718 kt_skip_list[idx] = i;
1719 return idx;
1720 }
1721
1722 __startup_func
1723 static void
kalloc_type_init_sig_eq(zone_t * zones,uint16_t n_zones,zone_t sig_zone)1724 kalloc_type_init_sig_eq(
1725 zone_t *zones,
1726 uint16_t n_zones,
1727 zone_t sig_zone)
1728 {
1729 for (uint16_t i = 0; i < n_zones; i++) {
1730 zone_t z = zones[i];
1731
1732 assert(!zone_get_sig_eq(z));
1733 zone_set_sig_eq(z, zone_index(sig_zone));
1734 }
1735 }
1736
1737 #ifndef __BUILDING_XNU_LIB_UNITTEST__
1738 #define KT_ZONES_FOR_SIZE_SIZE 32
1739 #else /* __BUILDING_XNU_LIB_UNITTEST__ */
1740 /* different init sequence in unit-test requires a bigger buffer in the kalloc zones initialization */
1741 #define KT_ZONES_FOR_SIZE_SIZE 35
1742 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
1743
1744 __startup_func
1745 static uint16_t
kalloc_type_distribute_zone_for_type(kalloc_type_view_t * start,kalloc_type_view_t * end,bool last_sig,uint16_t zones_total_type,uint16_t total_types,uint16_t * kt_skip_list,zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],uint16_t type_zones_start,zone_t sig_zone,zone_t early_zone)1746 kalloc_type_distribute_zone_for_type(
1747 kalloc_type_view_t *start,
1748 kalloc_type_view_t *end,
1749 bool last_sig,
1750 uint16_t zones_total_type,
1751 uint16_t total_types,
1752 uint16_t *kt_skip_list,
1753 zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE],
1754 uint16_t type_zones_start,
1755 zone_t sig_zone,
1756 zone_t early_zone)
1757 {
1758 uint16_t count = 0, n_zones = 0;
1759 uint16_t *shuffle_buf = NULL;
1760 zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1761
1762 /*
1763 * Assert there is space in buffer
1764 */
1765 count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1766 n_zones = kalloc_type_zones_for_type(zones_total_type, count, total_types,
1767 last_sig);
1768 shuffle_buf = &kt_skip_list[count + 1];
1769
1770 /*
1771 * Initalize signature equivalence zone for type zones
1772 */
1773 kalloc_type_init_sig_eq(type_zones, n_zones, sig_zone);
1774
1775 if (n_zones == 0) {
1776 kalloc_type_assign_zone_fixed(start, end, sig_zone, sig_zone,
1777 early_zone);
1778 return n_zones;
1779 }
1780
1781 /*
1782 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1783 */
1784 if (count == 1) {
1785 kalloc_type_assign_zone_fixed(start, end, type_zones[0], sig_zone,
1786 early_zone);
1787 return n_zones;
1788 }
1789
1790 /*
1791 * Add the signature based zone to n_zones
1792 */
1793 n_zones++;
1794
1795 for (uint16_t i = 0; i < count; i++) {
1796 uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1797 uint16_t type_start = kt_skip_list[i];
1798 kalloc_type_view_t *kt_type_start = &start[type_start];
1799 uint16_t type_end = kt_skip_list[i + 1];
1800 kalloc_type_view_t *kt_type_end = &start[type_end];
1801 zone_t zone;
1802
1803 if (zidx == 0) {
1804 kmem_shuffle(shuffle_buf, n_zones);
1805 }
1806
1807 shuffled_zidx = shuffle_buf[zidx];
1808 zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1809 kalloc_type_assign_zone_fixed(kt_type_start, kt_type_end, zone, sig_zone,
1810 early_zone);
1811 }
1812
1813 return n_zones - 1;
1814 }
1815
1816 __startup_func
1817 static void
kalloc_type_create_zones_fixed(uint16_t * kt_skip_list_start,uint16_t * kt_shuffle_buf)1818 kalloc_type_create_zones_fixed(
1819 uint16_t *kt_skip_list_start,
1820 uint16_t *kt_shuffle_buf)
1821 {
1822 uint16_t *kt_skip_list = kt_skip_list_start;
1823 uint16_t p_j = 0;
1824 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1825 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1826 #if DEBUG || DEVELOPMENT
1827 __assert_only uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1828 (vm_address_t) kt_buffer) / sizeof(uint16_t);
1829 #endif
1830 /*
1831 * Apply policy to determine how many zones to create for each size
1832 * class.
1833 */
1834 kalloc_type_apply_policy(kt_freq_list, kt_freq_list_total,
1835 kt_zones_sig, kt_zones_type, kt_fixed_zones);
1836
1837 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1838 uint16_t n_unique_sig = kt_freq_list[i];
1839 vm_size_t z_size = kt_zone_cfg[i];
1840 uint16_t n_zones_sig = kt_zones_sig[i];
1841 uint16_t n_zones_type = kt_zones_type[i];
1842 uint16_t total_types = kt_freq_list_total[i];
1843 uint16_t type_zones_used = 0;
1844
1845 if (n_unique_sig == 0) {
1846 continue;
1847 }
1848
1849 zone_carry = 0;
1850 assert(n_zones_sig + n_zones_type + 1 <= KT_ZONES_FOR_SIZE_SIZE);
1851 zone_t kt_zones_for_size[KT_ZONES_FOR_SIZE_SIZE] = {};
1852 kalloc_type_create_zone_for_size(kt_zones_for_size,
1853 n_zones_sig + n_zones_type, z_size);
1854
1855 kalloc_type_zarray[i] = kt_zones_for_size[0];
1856 /*
1857 * Ensure that there is enough space to shuffle n_unique_sig
1858 * indices
1859 */
1860 assert(n_unique_sig < kt_shuffle_count);
1861
1862 /*
1863 * Get a shuffled set of signature indices
1864 */
1865 *kt_shuffle_buf = 0;
1866 if (n_unique_sig > 1) {
1867 kmem_shuffle(kt_shuffle_buf, n_unique_sig);
1868 }
1869
1870 for (uint16_t j = 0; j < n_zones_sig; j++) {
1871 zone_t *z_ptr = &kt_zones_for_size[j];
1872
1873 kalloc_type_init_sig_eq(z_ptr, 1, *z_ptr);
1874 }
1875
1876 for (uint16_t j = 0; j < n_unique_sig; j++) {
1877 /*
1878 * For every size that has unique types
1879 */
1880 uint16_t shuffle_idx = kt_shuffle_buf[j];
1881 uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1882 uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1883 zone_t zone = kt_zones_for_size[j % n_zones_sig];
1884 zone_t early_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1885 bool last_sig;
1886
1887 last_sig = (j == (n_unique_sig - 1)) ? true : false;
1888 type_zones_used += kalloc_type_distribute_zone_for_type(
1889 &kt_buffer[cur].ktv_fixed,
1890 &kt_buffer[end].ktv_fixed, last_sig,
1891 n_zones_type, total_types + n_unique_sig,
1892 &kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1893 n_zones_sig + type_zones_used, zone, early_zone);
1894 }
1895 assert(type_zones_used <= n_zones_type);
1896 p_j += n_unique_sig;
1897 }
1898 }
1899
1900 __startup_func
1901 static void
kalloc_type_view_init_fixed(void)1902 kalloc_type_view_init_fixed(void)
1903 {
1904 kalloc_type_hash_seed = (uint32_t) early_random();
1905 kalloc_type_build_dlut();
1906 /*
1907 * Parse __kalloc_type sections and build array of pointers to
1908 * all kalloc type views in kt_buffer.
1909 */
1910 kt_count = kalloc_type_view_parse(KTV_FIXED);
1911 assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1912
1913 #if MACH_ASSERT
1914 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1915 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1916 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1917 #endif
1918
1919 /*
1920 * Sort based on size class and signature
1921 */
1922 qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_view_t),
1923 kalloc_type_cmp_fixed);
1924
1925 /*
1926 * Build a skip list that holds starts of unique signatures and a
1927 * frequency list of number of unique and total signatures per kalloc
1928 * size class
1929 */
1930 uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1931 uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1932 kt_skip_list_start, kt_count);
1933
1934 /*
1935 * Create zones based on signatures
1936 */
1937 kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1938 }
1939
1940 __startup_func
1941 static void
kalloc_type_heap_init(void)1942 kalloc_type_heap_init(void)
1943 {
1944 assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1945 char kh_name[MAX_ZONE_NAME];
1946 uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1947
1948 for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1949 snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1950 kalloc_zone_init((const char *)&kh_name[0], KHEAP_ID_KT_VAR,
1951 &kalloc_type_heap_array[i].kh_zstart, ZC_KALLOC_TYPE);
1952 }
1953 /*
1954 * All variable kalloc type allocations are collapsed into a single
1955 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1956 */
1957 KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1958 zone_view_count += 1;
1959 }
1960
1961 __startup_func
1962 static void
kalloc_type_assign_heap(uint32_t start,uint32_t end,uint32_t heap_id)1963 kalloc_type_assign_heap(
1964 uint32_t start,
1965 uint32_t end,
1966 uint32_t heap_id)
1967 {
1968 bool use_split = kmem_get_random16(1);
1969
1970 if (use_split) {
1971 heap_id = kt_var_heaps;
1972 }
1973 kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
1974 &kt_buffer[end].ktv_var, heap_id);
1975 }
1976
1977 __startup_func
1978 static void
kalloc_type_split_heap(uint32_t start,uint32_t end,uint32_t heap_id)1979 kalloc_type_split_heap(
1980 uint32_t start,
1981 uint32_t end,
1982 uint32_t heap_id)
1983 {
1984 uint32_t count = start;
1985 const char *p_name = NULL;
1986
1987 while (count < end) {
1988 kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1989 const char *c_name = cur->kt_name;
1990
1991 if (!p_name) {
1992 assert(count == start);
1993 p_name = c_name;
1994 }
1995 if (strcmp(c_name, p_name) != 0) {
1996 kalloc_type_assign_heap(start, count, heap_id);
1997 start = count;
1998 p_name = c_name;
1999 }
2000 count++;
2001 }
2002 kalloc_type_assign_heap(start, end, heap_id);
2003 }
2004
2005 __startup_func
2006 static void
kalloc_type_view_init_var(void)2007 kalloc_type_view_init_var(void)
2008 {
2009 uint32_t buf_start = 0, unique_sig = 0;
2010 uint32_t *kt_skip_list_start;
2011 uint16_t *shuffle_buf;
2012 uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
2013 uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
2014 /*
2015 * Pick a random heap to split
2016 */
2017 uint16_t split_heap = kmem_get_random16(flex_heap_count - 1);
2018
2019 /*
2020 * Zones are created prior to parsing the views as zone budget is fixed
2021 * per sizeclass and special types identified while parsing are redirected
2022 * as they are discovered.
2023 */
2024 kalloc_type_heap_init();
2025
2026 /*
2027 * Parse __kalloc_var sections and build array of pointers to views that
2028 * aren't rediected in kt_buffer.
2029 */
2030 kt_count = kalloc_type_view_parse(KTV_VAR);
2031 assert(kt_count < UINT32_MAX);
2032
2033 #if MACH_ASSERT
2034 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
2035 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
2036 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
2037 #endif
2038
2039 /*
2040 * Sort based on size class and signature
2041 */
2042 qsort(kt_buffer, (size_t) kt_count, sizeof(kalloc_type_var_view_t),
2043 kalloc_type_cmp_var);
2044
2045 buf_start = kalloc_type_handle_parray_var();
2046
2047 /*
2048 * Build a skip list that holds starts of unique signatures
2049 */
2050 kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
2051 unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
2052 buf_start);
2053 shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
2054 /*
2055 * If we have only one heap then other elements share heap with pointer
2056 * arrays
2057 */
2058 if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
2059 panic("kt_var_heaps is too small");
2060 }
2061
2062 kmem_shuffle(shuffle_buf, flex_heap_count);
2063 /*
2064 * The index of the heap we decide to split is placed twice in the shuffle
2065 * buffer so that it gets twice the number of signatures that we split
2066 * evenly
2067 */
2068 shuffle_buf[flex_heap_count] = split_heap;
2069 split_heap += (fixed_heaps + 1);
2070
2071 for (uint32_t i = 1; i <= unique_sig; i++) {
2072 uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
2073 fixed_heaps + 1;
2074 uint32_t start = kt_skip_list_start[i - 1];
2075 uint32_t end = kt_skip_list_start[i];
2076
2077 assert(heap_id <= kt_var_heaps);
2078 if (heap_id == split_heap) {
2079 kalloc_type_split_heap(start, end, heap_id);
2080 continue;
2081 }
2082 kalloc_type_assign_zone_var(&kt_buffer[start].ktv_var,
2083 &kt_buffer[end].ktv_var, heap_id);
2084 }
2085 }
2086
2087 __startup_func
2088 static void
kalloc_init(void)2089 kalloc_init(void)
2090 {
2091 /*
2092 * Allocate scratch space to parse kalloc_type_views and create
2093 * other structures necessary to process them.
2094 */
2095 uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
2096
2097 static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
2098 kalloc_zsize_compute();
2099
2100 /* Initialize kalloc data buffers heap */
2101 kalloc_heap_init(KHEAP_DATA_BUFFERS);
2102
2103 /* Initialize kalloc shared data buffers heap */
2104 kalloc_heap_init(KHEAP_DATA_SHARED);
2105
2106 /* Initialize kalloc shared buffers heap */
2107 kalloc_heap_init(KHEAP_EARLY);
2108
2109 kmem_alloc(kernel_map, (vm_offset_t *)&kt_buffer, kt_scratch_size,
2110 KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT | KMA_SPRAYQTN, VM_KERN_MEMORY_KALLOC);
2111
2112 /*
2113 * Handle fixed size views
2114 */
2115 kalloc_type_view_init_fixed();
2116
2117 /*
2118 * Reset
2119 */
2120 bzero(kt_buffer, kt_scratch_size);
2121 kt_count = max_count;
2122
2123 /*
2124 * Handle variable size views
2125 */
2126 kalloc_type_view_init_var();
2127
2128 /*
2129 * Free resources used
2130 */
2131 kmem_free(kernel_map, (vm_offset_t) kt_buffer, kt_scratch_size);
2132 }
2133 STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2134
2135 #pragma mark accessors
2136
2137 #define KFREE_ABSURD_SIZE \
2138 ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2139
2140 static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)2141 KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2142 {
2143 thread_t thr = current_thread();
2144 ledger_debit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2145 }
2146
2147 static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)2148 KALLOC_ZINFO_SFREE(vm_size_t bytes)
2149 {
2150 thread_t thr = current_thread();
2151 ledger_credit_thread(thr, thr->t_ledger, task_ledgers.tkm_shared, bytes);
2152 }
2153
2154 static kmem_guard_t
kalloc_guard(vm_tag_t tag,uint16_t type_hash,const void * owner)2155 kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2156 {
2157 kmem_guard_t guard = {
2158 .kmg_atomic = true,
2159 .kmg_tag = tag,
2160 .kmg_type_hash = type_hash,
2161 .kmg_context = os_hash_kernel_pointer(owner),
2162 };
2163
2164 /*
2165 * TODO: this use is really not sufficiently smart.
2166 */
2167
2168 return guard;
2169 }
2170
2171 #if __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING)
2172
2173 #if __arm64e__
2174 #define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2175
2176 /*
2177 * Zone encoding is:
2178 *
2179 * <PAC SIG><1><1><PTR value><5 bits of size class>
2180 *
2181 * VM encoding is:
2182 *
2183 * <PAC SIG><1><0><PTR value><14 bits of page count>
2184 *
2185 * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2186 * so that PAC authentication extends the proper sign bit.
2187 */
2188
2189 static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2190 #else /* __arm64e__ */
2191 #define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2192
2193 /*
2194 * Zone encoding is:
2195 *
2196 * <TBI><1><PTR value><5 bits of size class>
2197 *
2198 * VM encoding is:
2199 *
2200 * <TBI><0><PTR value><14 bits of page count>
2201 */
2202
2203 static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2204 #endif /* __arm64e__*/
2205
2206 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2207
2208 __attribute__((always_inline))
2209 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2210 __kalloc_array_decode(vm_address_t ptr)
2211 {
2212 struct kalloc_result kr;
2213 vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2214
2215 if (ptr & zone_mask) {
2216 kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2217 ptr &= ~0x1full;
2218 } else if (__probable(ptr)) {
2219 kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2220 ptr &= ~PAGE_MASK;
2221 ptr |= zone_mask;
2222 } else {
2223 kr.size = 0;
2224 }
2225
2226 kr.addr = (void *)ptr;
2227 return kr;
2228 }
2229
2230 static inline void *
__kalloc_array_encode_zone(zone_t z,void * ptr,vm_size_t size __unused)2231 __kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2232 {
2233 return (void *)((vm_address_t)ptr | z->z_array_size_class);
2234 }
2235
2236 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2237 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2238 {
2239 addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2240
2241 return addr | atop(size);
2242 }
2243
2244 #else /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2245
2246 SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2247
2248 /*
2249 * Encoding is:
2250 * bits 0..46: pointer value
2251 * bits 47..47: 0: zones, 1: VM
2252 * bits 48..63: zones: elem size, VM: number of pages
2253 */
2254
2255 #define KALLOC_ARRAY_TYPE_BIT 47
2256 static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2257 static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2258
2259 __attribute__((always_inline))
2260 struct kalloc_result
__kalloc_array_decode(vm_address_t ptr)2261 __kalloc_array_decode(vm_address_t ptr)
2262 {
2263 struct kalloc_result kr;
2264 uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2265
2266 kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2267 if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2268 kr.size <<= PAGE_SHIFT;
2269 }
2270 /* sign extend, so that it also works with NULL */
2271 kr.addr = (void *)((long)(ptr << shift) >> shift);
2272
2273 return kr;
2274 }
2275
2276 static inline void *
__kalloc_array_encode_zone(zone_t z __unused,void * ptr,vm_size_t size)2277 __kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2278 {
2279 vm_address_t addr = (vm_address_t)ptr;
2280
2281 addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2282 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2283
2284 return (void *)addr;
2285 }
2286
2287 static inline vm_address_t
__kalloc_array_encode_vm(vm_address_t addr,vm_size_t size)2288 __kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2289 {
2290 addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2291 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2292
2293 return addr;
2294 }
2295
2296 #endif /* __arm64e__ || ZSECURITY_CONFIG(ZONE_TAGGING) */
2297
2298 vm_size_t
kalloc_next_good_size(vm_size_t size,uint32_t period)2299 kalloc_next_good_size(vm_size_t size, uint32_t period)
2300 {
2301 uint32_t scale = kalloc_log2down((uint32_t)size);
2302 vm_size_t step, size_class;
2303
2304 if (size < KHEAP_STEP_START) {
2305 return KHEAP_STEP_START;
2306 }
2307 if (size < 2 * KHEAP_STEP_START) {
2308 return 2 * KHEAP_STEP_START;
2309 }
2310
2311 if (size < KHEAP_MAX_SIZE) {
2312 step = 1ul << (scale - 1);
2313 } else {
2314 step = round_page(1ul << (scale - kalloc_log2down(period)));
2315 }
2316
2317 size_class = (size + step) & -step;
2318 #if KASAN_CLASSIC
2319 if (size > K_SIZE_CLASS(size_class)) {
2320 return kalloc_next_good_size(size_class, period);
2321 }
2322 size_class = K_SIZE_CLASS(size_class);
2323 #endif
2324 return size_class;
2325 }
2326
2327
2328 #pragma mark kalloc
2329
2330 static inline kalloc_heap_t
kalloc_type_get_heap(kalloc_type_flags_t kt_flags)2331 kalloc_type_get_heap(kalloc_type_flags_t kt_flags)
2332 {
2333 /*
2334 * Redirect data-only views
2335 */
2336 if (kalloc_type_is_data(kt_flags)) {
2337 /*
2338 * There are kexts that allocate arrays of data types (uint8_t etc.)
2339 * and use krealloc_data / kfree_data to free it; therefore,
2340 * until adoption will land, we need to use shared heap for now.
2341 */
2342 return GET_KEXT_KHEAP_DATA();
2343 }
2344
2345 if (kt_flags & KT_PROCESSED) {
2346 return KHEAP_KT_VAR;
2347 }
2348
2349 return KHEAP_DEFAULT;
2350 }
2351
2352
2353 __attribute__((noinline))
2354 static struct kalloc_result
kalloc_large(kalloc_heap_t kheap,vm_size_t req_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)2355 kalloc_large(
2356 kalloc_heap_t kheap,
2357 vm_size_t req_size,
2358 zalloc_flags_t flags,
2359 uint16_t kt_hash,
2360 void *owner __unused)
2361 {
2362 kma_flags_t kma_flags = KMA_KASAN_GUARD;
2363 vm_tag_t tag;
2364 vm_offset_t addr, size;
2365
2366 if (flags & Z_NOFAIL) {
2367 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2368 (size_t)req_size);
2369 }
2370
2371 /*
2372 * kmem_alloc could block so we return if noblock
2373 *
2374 * also, reject sizes larger than our address space is quickly,
2375 * as kt_size or IOMallocArraySize() expect this.
2376 */
2377 if ((flags & Z_NOWAIT) ||
2378 (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2379 return (struct kalloc_result){ };
2380 }
2381
2382 if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2383 return (struct kalloc_result){ };
2384 }
2385
2386 /*
2387 * (73465472) on Intel we didn't use to pass this flag,
2388 * which in turned allowed kalloc_large() memory to be shared
2389 * with user directly.
2390 *
2391 * We're bound by this unfortunate ABI.
2392 */
2393 if ((flags & Z_MAY_COPYINMAP) == 0) {
2394 #ifndef __x86_64__
2395 kma_flags |= KMA_KOBJECT;
2396 #endif
2397 } else {
2398 assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
2399 }
2400 if (flags & Z_NOPAGEWAIT) {
2401 kma_flags |= KMA_NOPAGEWAIT;
2402 }
2403 if (flags & Z_ZERO) {
2404 kma_flags |= KMA_ZERO;
2405 }
2406 if (kheap == KHEAP_DATA_BUFFERS) {
2407 kma_flags |= KMA_DATA;
2408 } else if (kheap == KHEAP_DATA_SHARED) {
2409 kma_flags |= KMA_DATA_SHARED;
2410 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2411 kma_flags |= KMA_SPRAYQTN;
2412 }
2413 if (flags & Z_NOSOFTLIMIT) {
2414 kma_flags |= KMA_NOSOFTLIMIT;
2415 }
2416
2417
2418 tag = zalloc_flags_get_tag(flags);
2419 if (flags & Z_VM_TAG_BT_BIT) {
2420 tag = vm_tag_bt() ?: tag;
2421 }
2422 if (tag == VM_KERN_MEMORY_NONE) {
2423 tag = kheap->kh_tag;
2424 }
2425
2426 size = round_page(req_size);
2427 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2428 req_size = round_page(size);
2429 }
2430
2431 addr = kmem_alloc_guard(kernel_map, req_size, 0,
2432 kma_flags, kalloc_guard(tag, kt_hash, owner)).kmr_address;
2433
2434 if (addr != 0) {
2435 counter_inc(&kalloc_large_count);
2436 counter_add(&kalloc_large_total, size);
2437 KALLOC_ZINFO_SALLOC(size);
2438 if (flags & Z_KALLOC_ARRAY) {
2439 addr = __kalloc_array_encode_vm(addr, req_size);
2440 }
2441 } else {
2442 addr = 0;
2443 }
2444
2445 DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2446 return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2447 }
2448
2449 #if KASAN
2450
2451 static inline void
kalloc_mark_unused_space(void * addr,vm_size_t size,vm_size_t used)2452 kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2453 {
2454 #if KASAN_CLASSIC
2455 /*
2456 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2457 * tagging of the memory region is performed here.
2458 */
2459 kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2460 __builtin_frame_address(0));
2461 #endif /* KASAN_CLASSIC */
2462
2463 #if KASAN_TBI
2464 kasan_tbi_retag_unused_space(addr, size, used ? :1);
2465 #endif /* KASAN_TBI */
2466 }
2467 #endif /* KASAN */
2468
2469 static inline struct kalloc_result
kalloc_zone(zone_t z,zone_stats_t zstats,zalloc_flags_t flags,vm_size_t req_size)2470 kalloc_zone(
2471 zone_t z,
2472 zone_stats_t zstats,
2473 zalloc_flags_t flags,
2474 vm_size_t req_size)
2475 {
2476 struct kalloc_result kr;
2477 vm_size_t esize;
2478
2479 kr = zalloc_ext(z, zstats ?: z->z_stats, flags | Z_SKIP_KASAN);
2480 esize = kr.size;
2481
2482 if (__probable(kr.addr)) {
2483 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2484 req_size = esize;
2485 } else {
2486 kr.size = req_size;
2487 }
2488 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2489 kr.addr = zone_element_pgz_oob_adjust(kr.addr, req_size, esize);
2490 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2491
2492 #if KASAN
2493 kalloc_mark_unused_space(kr.addr, esize, kr.size);
2494 #endif /* KASAN */
2495
2496 if (flags & Z_KALLOC_ARRAY) {
2497 kr.addr = __kalloc_array_encode_zone(z, kr.addr, kr.size);
2498 }
2499 }
2500
2501 DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2502 return kr;
2503 }
2504
2505 static zone_id_t
kalloc_use_early_heap(kalloc_heap_t kheap,zone_stats_t zstats,zone_id_t zstart,zalloc_flags_t * flags)2506 kalloc_use_early_heap(
2507 kalloc_heap_t kheap,
2508 zone_stats_t zstats,
2509 zone_id_t zstart,
2510 zalloc_flags_t *flags)
2511 {
2512 if (!zone_is_data_kheap(kheap->kh_heap_id)) {
2513 zone_stats_t zstats_cpu = zpercpu_get(zstats);
2514
2515 if (os_atomic_load(&zstats_cpu->zs_alloc_not_early, relaxed) == 0) {
2516 *flags |= Z_SET_NOTEARLY;
2517 return KHEAP_EARLY->kh_zstart;
2518 }
2519 }
2520
2521 return zstart;
2522 }
2523
2524 #undef kalloc_ext
2525
2526 __mockable struct kalloc_result
kalloc_ext(void * kheap_or_kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2527 kalloc_ext(
2528 void *kheap_or_kt_view,
2529 vm_size_t size,
2530 zalloc_flags_t flags,
2531 void *owner)
2532 {
2533 kalloc_type_var_view_t kt_view;
2534 kalloc_heap_t kheap;
2535 zone_stats_t zstats = NULL;
2536 zone_t z;
2537 uint16_t kt_hash;
2538 zone_id_t zstart;
2539
2540 if (kt_is_var_view(kheap_or_kt_view)) {
2541 kt_view = kt_demangle_var_view(kheap_or_kt_view);
2542 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2543 /*
2544 * Use stats from view if present, else use stats from kheap.
2545 * KHEAP_KT_VAR accumulates stats for all allocations going to
2546 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2547 * use stats from the respective zones.
2548 */
2549 zstats = kt_view->kt_stats;
2550 kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2551 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
2552 } else {
2553 kt_view = NULL;
2554 kheap = kheap_or_kt_view;
2555 kt_hash = kheap->kh_type_hash;
2556 zstart = kheap->kh_zstart;
2557 }
2558
2559 if (!zstats) {
2560 zstats = kheap->kh_stats;
2561 }
2562
2563 zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
2564 z = kalloc_zone_for_size_with_flags(zstart, size, flags);
2565 if (z) {
2566 return kalloc_zone(z, zstats, flags, size);
2567 } else {
2568 return kalloc_large(kheap, size, flags, kt_hash, owner);
2569 }
2570 }
2571
2572 #if XNU_PLATFORM_MacOSX
2573 void *
2574 kalloc_external(vm_size_t size);
2575 void *
kalloc_external(vm_size_t size)2576 kalloc_external(vm_size_t size)
2577 {
2578 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2579 return kheap_alloc(KHEAP_DEFAULT, size, flags);
2580 }
2581 #endif /* XNU_PLATFORM_MacOSX */
2582
2583 void *
2584 kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2585 void *
kalloc_data_external(vm_size_t size,zalloc_flags_t flags)2586 kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2587 {
2588 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2589 return kheap_alloc(GET_KEXT_KHEAP_DATA(), size, flags);
2590 }
2591
2592 void *
2593 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags);
2594 void *
kalloc_shared_data_external(vm_size_t size,zalloc_flags_t flags)2595 kalloc_shared_data_external(vm_size_t size, zalloc_flags_t flags)
2596 {
2597 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
2598 return kheap_alloc(KHEAP_DATA_SHARED, size, flags);
2599 }
2600
2601 __abortlike
2602 static void
kalloc_data_require_panic(void * addr,vm_size_t size)2603 kalloc_data_require_panic(void *addr, vm_size_t size)
2604 {
2605 zone_id_t zid = zone_id_for_element(addr, size);
2606
2607 if (zid != ZONE_ID_INVALID) {
2608 zone_t z = &zone_array[zid];
2609 zone_security_flags_t zsflags = zone_security_array[zid];
2610
2611 if (!zone_is_data_kheap(zsflags.z_kheap_id)) {
2612 panic("kalloc_data_require failed: address %p in [%s%s]",
2613 addr, zone_heap_name(z), zone_name(z));
2614 }
2615
2616 panic("kalloc_data_require failed: address %p in [%s%s], "
2617 "size too large %zd > %zd", addr,
2618 zone_heap_name(z), zone_name(z),
2619 (size_t)size, (size_t)zone_elem_inner_size(z));
2620 } else {
2621 panic("kalloc_data_require failed: address %p not in zone native map",
2622 addr);
2623 }
2624 }
2625
2626 __abortlike
2627 static void
kalloc_non_data_require_panic(void * addr,vm_size_t size)2628 kalloc_non_data_require_panic(void *addr, vm_size_t size)
2629 {
2630 zone_id_t zid = zone_id_for_element(addr, size);
2631
2632 if (zid != ZONE_ID_INVALID) {
2633 zone_t z = &zone_array[zid];
2634 zone_security_flags_t zsflags = zone_security_array[zid];
2635
2636 switch (zsflags.z_kheap_id) {
2637 case KHEAP_ID_NONE:
2638 case KHEAP_ID_DATA_BUFFERS:
2639 case KHEAP_ID_DATA_SHARED:
2640 case KHEAP_ID_KT_VAR:
2641 panic("kalloc_non_data_require failed: address %p in [%s%s]",
2642 addr, zone_heap_name(z), zone_name(z));
2643 default:
2644 break;
2645 }
2646
2647 panic("kalloc_non_data_require failed: address %p in [%s%s], "
2648 "size too large %zd > %zd", addr,
2649 zone_heap_name(z), zone_name(z),
2650 (size_t)size, (size_t)zone_elem_inner_size(z));
2651 } else {
2652 panic("kalloc_non_data_require failed: address %p not in zone native map",
2653 addr);
2654 }
2655 }
2656
2657 void
kalloc_data_require(void * addr,vm_size_t size)2658 kalloc_data_require(void *addr, vm_size_t size)
2659 {
2660 zone_id_t zid = zone_id_for_element(addr, size);
2661
2662 if (zid != ZONE_ID_INVALID) {
2663 zone_t z = &zone_array[zid];
2664 zone_security_flags_t zsflags = zone_security_array[zid];
2665 if (zone_is_data_kheap(zsflags.z_kheap_id) &&
2666 size <= zone_elem_inner_size(z)) {
2667 return;
2668 }
2669 } else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2670 (vm_address_t)addr, size)) {
2671 return;
2672 } else if (kmem_needs_data_share_range() &&
2673 kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2674 (vm_address_t)addr, size)) {
2675 return;
2676 }
2677
2678 kalloc_data_require_panic(addr, size);
2679 }
2680
2681 void
kalloc_non_data_require(void * addr,vm_size_t size)2682 kalloc_non_data_require(void *addr, vm_size_t size)
2683 {
2684 zone_id_t zid = zone_id_for_element(addr, size);
2685
2686 if (zid != ZONE_ID_INVALID) {
2687 zone_t z = &zone_array[zid];
2688 zone_security_flags_t zsflags = zone_security_array[zid];
2689 switch (zsflags.z_kheap_id) {
2690 case KHEAP_ID_NONE:
2691 if (!zsflags.z_kalloc_type) {
2692 break;
2693 }
2694 OS_FALLTHROUGH;
2695 case KHEAP_ID_KT_VAR:
2696 if (size < zone_elem_inner_size(z)) {
2697 return;
2698 }
2699 break;
2700 default:
2701 break;
2702 }
2703 } else if (!kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2704 (vm_address_t)addr, size)) {
2705 return;
2706 } else if (kmem_needs_data_share_range() &&
2707 !kmem_range_id_contains(KMEM_RANGE_ID_DATA_SHARED,
2708 (vm_address_t)addr, size)) {
2709 return;
2710 }
2711
2712 kalloc_non_data_require_panic(addr, size);
2713 }
2714
2715 bool
kalloc_is_data_buffers(void * addr,vm_size_t size)2716 kalloc_is_data_buffers(void *addr, vm_size_t size)
2717 {
2718 zone_id_t zid = zone_id_for_element(addr, size);
2719
2720 /*
2721 * If we do not use dedicated data share range,
2722 * there is no way to fully distinguish between
2723 * shared and buffers heaps.
2724 *
2725 * When kmem_needs_data_share_range() == true, the
2726 * KMEM_RANGE_ID_DATA range is strictly for DATA_BUFFERS,
2727 * and KMEM_RANGE_ID_DATA_SHARED is strictly for DATA_SHARED.
2728 */
2729 assert(kmem_needs_data_share_range());
2730
2731 if (zid != ZONE_ID_INVALID) {
2732 zone_t z = &zone_array[zid];
2733 zone_security_flags_t zsflags = zone_security_array[zid];
2734 if (zone_is_data_buffers_kheap(zsflags.z_kheap_id) &&
2735 size <= zone_elem_inner_size(z)) {
2736 return true;
2737 }
2738 } else if (kmem_range_id_contains(KMEM_RANGE_ID_DATA,
2739 (vm_address_t)addr, size)) {
2740 return true;
2741 }
2742
2743 return false;
2744 }
2745
2746 __mockable void *
kalloc_type_impl_external(kalloc_type_view_t kt_view,zalloc_flags_t flags)2747 kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2748 {
2749 /*
2750 * Callsites from a kext that aren't in the BootKC on macOS or
2751 * any callsites on armv7 are not processed during startup,
2752 * default to using kheap_alloc
2753 *
2754 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2755 * NULL as we need to use the vm for the allocation
2756 *
2757 */
2758 if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2759 kalloc_heap_t kheap;
2760 vm_size_t size;
2761
2762 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2763 size = kalloc_type_get_size(kt_view->kt_size);
2764 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2765 return kalloc_ext(kheap, size, flags, NULL).addr;
2766 }
2767
2768 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2769 return kalloc_type_impl(kt_view, flags);
2770 }
2771
2772 void *
2773 kalloc_type_var_impl_external(
2774 kalloc_type_var_view_t kt_view,
2775 vm_size_t size,
2776 zalloc_flags_t flags,
2777 void *owner);
2778 void *
kalloc_type_var_impl_external(kalloc_type_var_view_t kt_view,vm_size_t size,zalloc_flags_t flags,void * owner)2779 kalloc_type_var_impl_external(
2780 kalloc_type_var_view_t kt_view,
2781 vm_size_t size,
2782 zalloc_flags_t flags,
2783 void *owner)
2784 {
2785 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2786 return kalloc_type_var_impl(kt_view, size, flags, owner);
2787 }
2788
2789 #pragma mark kfree
2790
2791 __abortlike
2792 static void
kfree_heap_confusion_panic(kalloc_heap_t kheap,void * data,size_t size,zone_t z)2793 kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2794 {
2795 zone_security_flags_t zsflags = zone_security_config(z);
2796 const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2797
2798 if (zsflags.z_kalloc_type) {
2799 panic_include_kalloc_types = true;
2800 kalloc_type_src_zone = z;
2801 panic("kfree: addr %p found in kalloc type zone '%s'"
2802 "but being freed to %s heap", data, z->z_name, kheap_name);
2803 }
2804
2805 if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2806 panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2807 data, size, zone_heap_name(z), z->z_name);
2808 } else {
2809 panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2810 data, size, zone_heap_name(z), kheap_name);
2811 }
2812 }
2813
2814 __abortlike
2815 static void
kfree_size_confusion_panic(zone_t z,void * data,size_t oob_offs,size_t size,size_t zsize)2816 kfree_size_confusion_panic(zone_t z, void *data,
2817 size_t oob_offs, size_t size, size_t zsize)
2818 {
2819 if (z) {
2820 panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2821 "with elem_size %zd",
2822 data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2823 } else {
2824 panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2825 data, size, oob_offs);
2826 }
2827 }
2828
2829 __abortlike
2830 static void
kfree_size_invalid_panic(void * data,size_t size)2831 kfree_size_invalid_panic(void *data, size_t size)
2832 {
2833 panic("kfree: addr %p trying to free with nonsensical size %zd",
2834 data, size);
2835 }
2836
2837 __abortlike
2838 static void
kfree_size_require_panic(void * data,size_t size,size_t min_size,size_t max_size)2839 kfree_size_require_panic(void *data, size_t size, size_t min_size,
2840 size_t max_size)
2841 {
2842 panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2843 data, size, min_size, max_size);
2844 }
2845
2846 static void
kfree_size_require(kalloc_heap_t kheap,void * addr,vm_size_t min_size,vm_size_t max_size)2847 kfree_size_require(
2848 kalloc_heap_t kheap,
2849 void *addr,
2850 vm_size_t min_size,
2851 vm_size_t max_size)
2852 {
2853 assert3u(min_size, <=, max_size);
2854 zone_t max_zone = kalloc_zone_for_size(kheap->kh_zstart, max_size);
2855 vm_size_t max_zone_size = zone_elem_inner_size(max_zone);
2856 vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2857 if (elem_size > max_zone_size || elem_size < min_size) {
2858 kfree_size_require_panic(addr, elem_size, min_size, max_zone_size);
2859 }
2860 }
2861
2862 static void
kfree_large(vm_offset_t addr,vm_size_t size,kmf_flags_t flags,void * owner)2863 kfree_large(
2864 vm_offset_t addr,
2865 vm_size_t size,
2866 kmf_flags_t flags,
2867 void *owner)
2868 {
2869 size = kmem_free_guard(kernel_map, addr, size,
2870 flags | KMF_TAG | KMF_KASAN_GUARD,
2871 kalloc_guard(VM_KERN_MEMORY_NONE, 0, owner));
2872
2873 counter_dec(&kalloc_large_count);
2874 counter_add(&kalloc_large_total, -(uint64_t)size);
2875 KALLOC_ZINFO_SFREE(size);
2876 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2877 }
2878
2879 static void
kfree_zone(void * kheap_or_kt_view __unsafe_indexable,void * data,vm_size_t size,zone_t z,vm_size_t zsize)2880 kfree_zone(
2881 void *kheap_or_kt_view __unsafe_indexable,
2882 void *data,
2883 vm_size_t size,
2884 zone_t z,
2885 vm_size_t zsize)
2886 {
2887 zone_security_flags_t zsflags = zone_security_config(z);
2888 kalloc_type_var_view_t kt_view;
2889 kalloc_heap_t kheap;
2890 zone_stats_t zstats = NULL;
2891
2892 if (kt_is_var_view(kheap_or_kt_view)) {
2893 kt_view = kt_demangle_var_view(kheap_or_kt_view);
2894 kheap = kalloc_type_get_heap(kt_view->kt_flags);
2895 /*
2896 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2897 * we will end up having incorrect stats. Cross frees may happen on
2898 * macOS due to allocation from an unprocessed view and free from
2899 * a processed view or vice versa.
2900 */
2901 zstats = kt_view->kt_stats;
2902 } else {
2903 kt_view = NULL;
2904 kheap = kheap_or_kt_view;
2905 }
2906
2907 if (!zstats) {
2908 zstats = kheap->kh_stats;
2909 }
2910
2911 zsflags = zone_security_config(z);
2912 if (kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED) {
2913 if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2914 kfree_heap_confusion_panic(kheap, data, size, z);
2915 }
2916 } else {
2917 if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2918 (zsflags.z_kheap_id != KHEAP_ID_EARLY)) {
2919 kfree_heap_confusion_panic(kheap, data, size, z);
2920 }
2921 }
2922
2923 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2924
2925 /* needs to be __nosan because the user size might be partial */
2926 __nosan_bzero(data, zsize);
2927 zfree_ext(z, zstats ?: z->z_stats, data, ZFREE_PACK_SIZE(zsize, size));
2928 }
2929
2930 __mockable void
kfree_ext(void * kheap_or_kt_view,void * data,vm_size_t size)2931 kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2932 {
2933 vm_size_t bucket_size;
2934 zone_t z;
2935
2936 if (data == NULL) {
2937 return;
2938 }
2939
2940 if (size > KFREE_ABSURD_SIZE) {
2941 kfree_size_invalid_panic(data, size);
2942 }
2943
2944 if (size <= KHEAP_MAX_SIZE) {
2945 vm_size_t oob_offs;
2946
2947 bucket_size = zone_element_size(data, &z, true, &oob_offs);
2948 if (size + oob_offs > bucket_size || bucket_size == 0) {
2949 kfree_size_confusion_panic(z, data,
2950 oob_offs, size, bucket_size);
2951 }
2952
2953 data = (char *)data - oob_offs;
2954 kfree_zone(kheap_or_kt_view, data, size, z, bucket_size);
2955 } else {
2956 kfree_large((vm_offset_t)data, size, KMF_NONE, NULL);
2957 }
2958 }
2959
2960 void
kfree_addr_ext(kalloc_heap_t kheap,void * data)2961 kfree_addr_ext(kalloc_heap_t kheap, void *data)
2962 {
2963 vm_offset_t oob_offs;
2964 vm_size_t size, usize = 0;
2965 zone_t z;
2966
2967 if (data == NULL) {
2968 return;
2969 }
2970
2971 size = zone_element_size(data, &z, true, &oob_offs);
2972 if (size) {
2973 #if KASAN_CLASSIC
2974 usize = kasan_user_size((vm_offset_t)data);
2975 #endif
2976 data = (char *)data - oob_offs;
2977 kfree_zone(kheap, data, usize, z, size);
2978 } else {
2979 kfree_large((vm_offset_t)data, 0, KMF_GUESS_SIZE, NULL);
2980 }
2981 }
2982
2983 #if XNU_PLATFORM_MacOSX
2984 void
2985 kfree_external(void *addr, vm_size_t size);
2986 void
kfree_external(void * addr,vm_size_t size)2987 kfree_external(void *addr, vm_size_t size)
2988 {
2989 kalloc_heap_t kheap = KHEAP_DEFAULT;
2990
2991 kfree_ext(kheap, addr, size);
2992 }
2993 #endif /* XNU_PLATFORM_MacOSX */
2994
2995 void
2996 (kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2997 vm_size_t min_sz, vm_size_t max_sz)
2998 {
2999 if (__improbable(addr == NULL)) {
3000 return;
3001 }
3002 kfree_size_require(kheap, addr, min_sz, max_sz);
3003 kfree_addr_ext(kheap, addr);
3004 }
3005
3006 __mockable void *
kalloc_type_impl_internal(kalloc_type_view_t kt_view,zalloc_flags_t flags)3007 kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
3008 {
3009 zone_stats_t zs = kt_view->kt_zv.zv_stats;
3010 zone_t z = kt_view->kt_zv.zv_zone;
3011 zone_stats_t zs_cpu = zpercpu_get(zs);
3012
3013 if ((flags & Z_SET_NOTEARLY) ||
3014 os_atomic_load(&zs_cpu->zs_alloc_not_early, relaxed)) {
3015 return zalloc_ext(z, zs, flags).addr;
3016 }
3017
3018 assert(!zone_is_data_kheap(zone_security_config(z).z_kheap_id));
3019 return zalloc_ext(kt_view->kt_zearly, zs, flags | Z_SET_NOTEARLY).addr;
3020 }
3021
3022 void
kfree_type_impl_external(kalloc_type_view_t kt_view,void * ptr)3023 kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
3024 {
3025 /*
3026 * If callsite is from a kext that isn't in the BootKC, it wasn't
3027 * processed during startup so default to using kheap_alloc
3028 *
3029 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
3030 * NULL as we need to use the vm for the allocation/free
3031 */
3032 if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
3033 kalloc_heap_t kheap;
3034 vm_size_t size;
3035
3036 size = kalloc_type_get_size(kt_view->kt_size);
3037 kheap = kalloc_type_get_heap(kt_view->kt_flags);
3038 return kheap_free(kheap, ptr, size);
3039 }
3040 return kfree_type_impl(kt_view, ptr);
3041 }
3042
3043 void
3044 kfree_type_var_impl_external(
3045 kalloc_type_var_view_t kt_view,
3046 void *ptr,
3047 vm_size_t size);
3048 void
kfree_type_var_impl_external(kalloc_type_var_view_t kt_view,void * ptr,vm_size_t size)3049 kfree_type_var_impl_external(
3050 kalloc_type_var_view_t kt_view,
3051 void *ptr,
3052 vm_size_t size)
3053 {
3054 return kfree_type_var_impl(kt_view, ptr, size);
3055 }
3056
3057 void
3058 kfree_data_external(void *ptr, vm_size_t size);
3059 void
kfree_data_external(void * ptr,vm_size_t size)3060 kfree_data_external(void *ptr, vm_size_t size)
3061 {
3062 return kheap_free(GET_KEXT_KHEAP_DATA(), ptr, size);
3063 }
3064
3065 void
3066 kfree_data_addr_external(void *ptr);
3067 void
kfree_data_addr_external(void * ptr)3068 kfree_data_addr_external(void *ptr)
3069 {
3070 return kheap_free_addr(GET_KEXT_KHEAP_DATA(), ptr);
3071 }
3072
3073 void
3074 kfree_shared_data_external(void *ptr, vm_size_t size);
3075 void
kfree_shared_data_external(void * ptr,vm_size_t size)3076 kfree_shared_data_external(void *ptr, vm_size_t size)
3077 {
3078 return kheap_free(KHEAP_DATA_SHARED, ptr, size);
3079 }
3080
3081 void
3082 kfree_shared_data_addr_external(void *ptr);
3083 void
kfree_shared_data_addr_external(void * ptr)3084 kfree_shared_data_addr_external(void *ptr)
3085 {
3086 return kheap_free_addr(KHEAP_DATA_SHARED, ptr);
3087 }
3088
3089 #pragma mark krealloc
3090
3091 __abortlike
3092 static void
krealloc_size_invalid_panic(void * data,size_t size)3093 krealloc_size_invalid_panic(void *data, size_t size)
3094 {
3095 panic("krealloc: addr %p trying to free with nonsensical size %zd",
3096 data, size);
3097 }
3098
3099
3100 __attribute__((noinline))
3101 static struct kalloc_result
krealloc_large(kalloc_heap_t kheap,vm_offset_t addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,uint16_t kt_hash,void * owner __unused)3102 krealloc_large(
3103 kalloc_heap_t kheap,
3104 vm_offset_t addr,
3105 vm_size_t old_size,
3106 vm_size_t new_size,
3107 zalloc_flags_t flags,
3108 uint16_t kt_hash,
3109 void *owner __unused)
3110 {
3111 kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_KASAN_GUARD;
3112 vm_size_t new_req_size = new_size;
3113 vm_size_t old_req_size = old_size;
3114 uint64_t delta;
3115 kmem_return_t kmr;
3116 vm_tag_t tag;
3117
3118 if (flags & Z_NOFAIL) {
3119 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
3120 (size_t)new_req_size);
3121 }
3122
3123 /*
3124 * kmem_alloc could block so we return if noblock
3125 *
3126 * also, reject sizes larger than our address space is quickly,
3127 * as kt_size or IOMallocArraySize() expect this.
3128 */
3129 if ((flags & Z_NOWAIT) ||
3130 (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
3131 return (struct kalloc_result){ };
3132 }
3133
3134 /*
3135 * (73465472) on Intel we didn't use to pass this flag,
3136 * which in turned allowed kalloc_large() memory to be shared
3137 * with user directly.
3138 *
3139 * We're bound by this unfortunate ABI.
3140 */
3141 if ((flags & Z_MAY_COPYINMAP) == 0) {
3142 #ifndef __x86_64__
3143 kmr_flags |= KMR_KOBJECT;
3144 #endif
3145 } else {
3146 assert(kheap == KHEAP_DATA_BUFFERS || kheap == KHEAP_DATA_SHARED);
3147 }
3148 if (flags & Z_NOPAGEWAIT) {
3149 kmr_flags |= KMR_NOPAGEWAIT;
3150 }
3151 if (flags & Z_ZERO) {
3152 kmr_flags |= KMR_ZERO;
3153 }
3154 if (kheap == KHEAP_DATA_BUFFERS) {
3155 kmr_flags |= KMR_DATA;
3156 } else if (kheap == KHEAP_DATA_SHARED) {
3157 kmr_flags |= KMR_DATA_SHARED;
3158 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
3159 kmr_flags |= KMR_SPRAYQTN;
3160 }
3161 if (flags & Z_REALLOCF) {
3162 kmr_flags |= KMR_REALLOCF;
3163 }
3164
3165 #if ZSECURITY_CONFIG(ZONE_TAGGING)
3166 krealloc_enforce_large_tagging_policy(&kmr_flags, kheap);
3167 #endif /* ZSECURITY_CONFIG(ZONE_TAGGING) */
3168
3169 tag = zalloc_flags_get_tag(flags);
3170 if (flags & Z_VM_TAG_BT_BIT) {
3171 tag = vm_tag_bt() ?: tag;
3172 }
3173 if (tag == VM_KERN_MEMORY_NONE) {
3174 tag = kheap->kh_tag;
3175 }
3176
3177 kmr = kmem_realloc_guard(kernel_map, addr, old_req_size, new_req_size,
3178 kmr_flags, kalloc_guard(tag, kt_hash, owner));
3179
3180 new_size = round_page(new_req_size);
3181 old_size = round_page(old_req_size);
3182
3183 if (kmr.kmr_address != 0) {
3184 delta = (uint64_t)(new_size - old_size);
3185 } else if (flags & Z_REALLOCF) {
3186 counter_dec(&kalloc_large_count);
3187 delta = (uint64_t)(-old_size);
3188 } else {
3189 delta = 0;
3190 }
3191
3192 counter_add(&kalloc_large_total, delta);
3193 KALLOC_ZINFO_SALLOC(delta);
3194
3195 if (addr != 0 || (flags & Z_REALLOCF)) {
3196 DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
3197 void*, addr);
3198 }
3199 if (__improbable(kmr.kmr_address == 0)) {
3200 return (struct kalloc_result){ };
3201 }
3202
3203 DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
3204 void*, kmr.kmr_address);
3205
3206 if (flags & Z_KALLOC_ARRAY) {
3207 kmr.kmr_address = __kalloc_array_encode_vm(kmr.kmr_address,
3208 new_req_size);
3209 }
3210 return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
3211 }
3212
3213 #undef krealloc_ext
3214
3215 struct kalloc_result
krealloc_ext(void * kheap_or_kt_view __unsafe_indexable,void * addr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags,void * owner)3216 krealloc_ext(
3217 void *kheap_or_kt_view __unsafe_indexable,
3218 void *addr,
3219 vm_size_t old_size,
3220 vm_size_t new_size,
3221 zalloc_flags_t flags,
3222 void *owner)
3223 {
3224 vm_size_t old_bucket_size, new_bucket_size, min_size;
3225 kalloc_type_var_view_t kt_view;
3226 kalloc_heap_t kheap;
3227 zone_stats_t zstats = NULL;
3228 struct kalloc_result kr;
3229 vm_offset_t oob_offs = 0;
3230 zone_t old_z, new_z;
3231 uint16_t kt_hash = 0;
3232 zone_id_t zstart;
3233
3234 if (old_size > KFREE_ABSURD_SIZE) {
3235 krealloc_size_invalid_panic(addr, old_size);
3236 }
3237
3238 if (addr == NULL && new_size == 0) {
3239 return (struct kalloc_result){ };
3240 }
3241
3242 if (kt_is_var_view(kheap_or_kt_view)) {
3243 kt_view = kt_demangle_var_view(kheap_or_kt_view);
3244 kheap = kalloc_type_get_heap(kt_view->kt_flags);
3245 /*
3246 * Similar to kalloc_ext: Use stats from view if present,
3247 * else use stats from kheap.
3248 *
3249 * krealloc_type isn't exposed to kexts, so we don't need to
3250 * handle cross frees and can rely on stats from view or kheap.
3251 */
3252 zstats = kt_view->kt_stats;
3253 kt_hash = KT_GET_HASH(kt_view->kt_flags);
3254 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
3255 } else {
3256 kt_view = NULL;
3257 kheap = kheap_or_kt_view;
3258 kt_hash = kheap->kh_type_hash;
3259 zstart = kheap->kh_zstart;
3260 }
3261
3262 if (!zstats) {
3263 zstats = kheap->kh_stats;
3264 }
3265 /*
3266 * Find out the size of the bucket in which the new sized allocation
3267 * would land. If it matches the bucket of the original allocation,
3268 * simply return the same address.
3269 */
3270 if (new_size == 0) {
3271 new_z = ZONE_NULL;
3272 new_bucket_size = new_size = 0;
3273 } else {
3274 zstart = kalloc_use_early_heap(kheap, zstats, zstart, &flags);
3275 new_z = kalloc_zone_for_size_with_flags(zstart, new_size, flags);
3276 new_bucket_size = new_z ? zone_elem_inner_size(new_z) : round_page(new_size);
3277 }
3278 #if !KASAN_CLASSIC
3279 if (flags & Z_FULLSIZE) {
3280 new_size = new_bucket_size;
3281 }
3282 #endif /* !KASAN_CLASSIC */
3283
3284 if (addr == NULL) {
3285 old_z = ZONE_NULL;
3286 old_size = old_bucket_size = 0;
3287 } else if (kheap_size_from_zone(addr, old_size, flags)) {
3288 old_bucket_size = zone_element_size(addr, &old_z, true, &oob_offs);
3289 if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3290 kfree_size_confusion_panic(old_z, addr,
3291 oob_offs, old_size, old_bucket_size);
3292 }
3293 __builtin_assume(old_z != ZONE_NULL);
3294 } else {
3295 old_z = ZONE_NULL;
3296 old_bucket_size = round_page(old_size);
3297 }
3298 min_size = MIN(old_size, new_size);
3299
3300 if (old_bucket_size == new_bucket_size && old_z) {
3301 kr.addr = (char *)addr - oob_offs;
3302 kr.size = new_size;
3303 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3304 kr.addr = zone_element_pgz_oob_adjust(kr.addr,
3305 new_size, new_bucket_size);
3306 if (kr.addr != addr) {
3307 memmove(kr.addr, addr, min_size);
3308 bzero((char *)kr.addr + min_size,
3309 kr.size - min_size);
3310 }
3311 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3312 #if KASAN
3313 /*
3314 * On KASAN kernels, treat a reallocation effectively as a new
3315 * allocation and add a sanity check around the existing one
3316 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3317 * to much extra work, on KASAN_TBI, assign a new tag both to the
3318 * buffer and to the potential free space.
3319 */
3320 #if KASAN_CLASSIC
3321 kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3322 kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3323 KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3324 #endif /* KASAN_CLASSIC */
3325 #if KASAN_TBI
3326 /*
3327 * Validate the current buffer, then generate a new tag,
3328 * even if the address is stable, it's a "new" allocation.
3329 */
3330 __asan_loadN((vm_offset_t)addr, old_size);
3331 kr.addr = vm_memtag_generate_and_store_tag(kr.addr, kr.size);
3332 kasan_tbi_retag_unused_space(kr.addr, new_bucket_size, kr.size);
3333 #endif /* KASAN_TBI */
3334 #endif /* KASAN */
3335 goto out_success;
3336 }
3337
3338 #if !KASAN
3339 /*
3340 * Fallthrough to krealloc_large() for KASAN,
3341 * because we can't use kasan_check_alloc()
3342 * on kalloc_large() memory.
3343 *
3344 * kmem_realloc_guard() will perform all the validations,
3345 * and re-tagging.
3346 */
3347 if (old_bucket_size == new_bucket_size) {
3348 kr.addr = (char *)addr - oob_offs;
3349 kr.size = new_size;
3350 goto out_success;
3351 }
3352 #endif
3353
3354 if (addr && !old_z && new_size && !new_z) {
3355 return krealloc_large(kheap, (vm_offset_t)addr,
3356 old_size, new_size, flags, kt_hash, owner);
3357 }
3358
3359 if (!new_size) {
3360 kr.addr = NULL;
3361 kr.size = 0;
3362 } else if (new_z) {
3363 kr = kalloc_zone(new_z, zstats,
3364 flags & ~Z_KALLOC_ARRAY, new_size);
3365 } else if (old_z || addr == NULL) {
3366 kr = kalloc_large(kheap, new_size,
3367 flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3368 }
3369
3370 if (addr && kr.addr) {
3371 __nosan_memcpy(kr.addr, addr, min_size);
3372 }
3373
3374 if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3375 if (old_z) {
3376 kfree_zone(kheap_or_kt_view,
3377 (char *)addr - oob_offs, old_size,
3378 old_z, old_bucket_size);
3379 } else {
3380 kfree_large((vm_offset_t)addr, old_size, KMF_NONE, owner);
3381 }
3382 }
3383
3384 if (__improbable(kr.addr == NULL)) {
3385 return kr;
3386 }
3387
3388 out_success:
3389 if ((flags & Z_KALLOC_ARRAY) == 0) {
3390 return kr;
3391 }
3392
3393 if (new_z) {
3394 kr.addr = __kalloc_array_encode_zone(new_z,
3395 kr.addr, kr.size);
3396 } else {
3397 kr.addr = (void *)__kalloc_array_encode_vm((vm_offset_t)kr.addr,
3398 kr.size);
3399 }
3400 return kr;
3401 }
3402
3403 void *
3404 krealloc_data_external(
3405 void *ptr,
3406 vm_size_t old_size,
3407 vm_size_t new_size,
3408 zalloc_flags_t flags);
3409 void *
krealloc_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3410 krealloc_data_external(
3411 void *ptr,
3412 vm_size_t old_size,
3413 vm_size_t new_size,
3414 zalloc_flags_t flags)
3415 {
3416 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3417 return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3418 }
3419
3420 void *
3421 krealloc_shared_data_external(
3422 void *ptr,
3423 vm_size_t old_size,
3424 vm_size_t new_size,
3425 zalloc_flags_t flags);
3426 void *
krealloc_shared_data_external(void * ptr,vm_size_t old_size,vm_size_t new_size,zalloc_flags_t flags)3427 krealloc_shared_data_external(
3428 void *ptr,
3429 vm_size_t old_size,
3430 vm_size_t new_size,
3431 zalloc_flags_t flags)
3432 {
3433 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_SHARED);
3434 return krealloc_ext(GET_KEXT_KHEAP_DATA(), ptr, old_size, new_size, flags, NULL).addr;
3435 }
3436
3437 __startup_func
3438 static void
kheap_init(kalloc_heap_t parent_heap,kalloc_heap_t kheap)3439 kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3440 {
3441 kheap->kh_zstart = parent_heap->kh_zstart;
3442 kheap->kh_heap_id = parent_heap->kh_heap_id;
3443 kheap->kh_tag = parent_heap->kh_tag;
3444 kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
3445 zone_view_count += 1;
3446 }
3447
3448 __startup_func
3449 static void
kheap_init_data(kalloc_heap_t kheap)3450 kheap_init_data(kalloc_heap_t kheap)
3451 {
3452 kheap_init(KHEAP_DATA_BUFFERS, kheap);
3453 kheap->kh_views = KHEAP_DATA_BUFFERS->kh_views;
3454 KHEAP_DATA_BUFFERS->kh_views = kheap;
3455 }
3456
3457 __startup_func
3458 static void
kheap_init_data_shared(kalloc_heap_t kheap)3459 kheap_init_data_shared(kalloc_heap_t kheap)
3460 {
3461 kheap_init(KHEAP_DATA_SHARED, kheap);
3462 kheap->kh_views = KHEAP_DATA_SHARED->kh_views;
3463 KHEAP_DATA_SHARED->kh_views = kheap;
3464 }
3465
3466 __startup_func
3467 static void
kheap_init_var(kalloc_heap_t kheap)3468 kheap_init_var(kalloc_heap_t kheap)
3469 {
3470 uint16_t idx;
3471 struct kheap_info *parent_heap;
3472
3473 kheap_init(KHEAP_KT_VAR, kheap);
3474 idx = kmem_get_random16(kt_var_heaps - kt_var_ptr_heaps - 1) +
3475 KT_VAR__FIRST_FLEXIBLE_HEAP;
3476 parent_heap = &kalloc_type_heap_array[idx];
3477 kheap->kh_zstart = parent_heap->kh_zstart;
3478 kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3479 (uint32_t) early_random(), 0);
3480 kheap->kh_views = parent_heap->kh_views;
3481 parent_heap->kh_views = kheap;
3482 }
3483
3484 __startup_func
3485 void
kheap_startup_init(kalloc_heap_t kheap)3486 kheap_startup_init(kalloc_heap_t kheap)
3487 {
3488 switch (kheap->kh_heap_id) {
3489 case KHEAP_ID_DATA_BUFFERS:
3490 kheap_init_data(kheap);
3491 break;
3492 case KHEAP_ID_DATA_SHARED:
3493 kheap_init_data_shared(kheap);
3494 break;
3495 case KHEAP_ID_KT_VAR:
3496 kheap_init_var(kheap);
3497 break;
3498 default:
3499 panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3500 kheap->kh_heap_id);
3501 }
3502 }
3503
3504 #pragma mark IOKit/libkern helpers
3505
3506 #if XNU_PLATFORM_MacOSX
3507
3508 void *
3509 kern_os_malloc_external(size_t size);
3510 void *
kern_os_malloc_external(size_t size)3511 kern_os_malloc_external(size_t size)
3512 {
3513 if (size == 0) {
3514 return NULL;
3515 }
3516
3517 return kheap_alloc(KERN_OS_MALLOC, size,
3518 Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3519 }
3520
3521 void
3522 kern_os_free_external(void *addr);
3523 void
kern_os_free_external(void * addr)3524 kern_os_free_external(void *addr)
3525 {
3526 kheap_free_addr(KERN_OS_MALLOC, addr);
3527 }
3528
3529 void *
3530 kern_os_realloc_external(void *addr, size_t nsize);
3531 void *
kern_os_realloc_external(void * addr,size_t nsize)3532 kern_os_realloc_external(void *addr, size_t nsize)
3533 {
3534 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3535 vm_size_t osize, oob_offs = 0;
3536
3537 if (addr == NULL) {
3538 return kern_os_malloc_external(nsize);
3539 }
3540
3541 osize = zone_element_size(addr, NULL, false, &oob_offs);
3542 if (osize == 0) {
3543 osize = kmem_size_guard(kernel_map, (vm_offset_t)addr,
3544 kalloc_guard(VM_KERN_MEMORY_LIBKERN, 0, NULL));
3545 #if KASAN_CLASSIC
3546 } else {
3547 osize = kasan_user_size((vm_offset_t)addr);
3548 #endif
3549 }
3550 return __kheap_realloc(KERN_OS_MALLOC, addr, osize - oob_offs, nsize, flags, NULL);
3551 }
3552
3553 #endif /* XNU_PLATFORM_MacOSX */
3554
3555 void
kern_os_zfree(zone_t zone,void * addr,vm_size_t size)3556 kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3557 {
3558 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3559 #pragma unused(size)
3560 zfree(zone, addr);
3561 #else
3562 if (zone_owns(zone, addr)) {
3563 zfree(zone, addr);
3564 } else {
3565 /*
3566 * Third party kexts might not know about the operator new
3567 * and be allocated from the default heap
3568 */
3569 printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3570 zone->z_name);
3571 kheap_free(KHEAP_DEFAULT, addr, size);
3572 }
3573 #endif
3574 }
3575
3576 bool
IOMallocType_from_vm(kalloc_type_view_t ktv)3577 IOMallocType_from_vm(kalloc_type_view_t ktv)
3578 {
3579 return kalloc_type_from_vm(ktv->kt_flags);
3580 }
3581
3582 void
kern_os_typed_free(kalloc_type_view_t ktv,void * addr,vm_size_t esize)3583 kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3584 {
3585 #if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3586 #pragma unused(esize)
3587 #else
3588 /*
3589 * For third party kexts that have been compiled with sdk pre macOS 11,
3590 * an allocation of an OSObject that is defined in xnu or first pary
3591 * kexts, by directly calling new will lead to using the default heap
3592 * as it will call OSObject_operator_new_external. If this object
3593 * is freed by xnu, it panics as xnu uses the typed free which
3594 * requires the object to have been allocated in a kalloc.type zone.
3595 * To workaround this issue, detect if the allocation being freed is
3596 * from the default heap and allow freeing to it.
3597 */
3598 zone_id_t zid = zone_id_for_element(addr, esize);
3599 if (__probable(zid < MAX_ZONES)) {
3600 zone_security_flags_t zsflags = zone_security_array[zid];
3601 if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3602 return kheap_free(KHEAP_DEFAULT, addr, esize);
3603 }
3604 }
3605 #endif
3606 kfree_type_impl_external(ktv, addr);
3607 }
3608
3609 #pragma mark tests
3610 #if DEBUG || DEVELOPMENT
3611
3612 #include <sys/random.h>
3613
3614 /*
3615 * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3616 *
3617 * Note: Presence of zones with name kalloc.type* is used to
3618 * determine if the feature is on.
3619 */
3620 static int
kalloc_type_feature_on(void)3621 kalloc_type_feature_on(void)
3622 {
3623 boolean_t zone_found = false;
3624 const char kalloc_type_str[] = "kalloc.type";
3625 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3626 zone_t z = kalloc_type_zarray[i];
3627 while (z != NULL) {
3628 zone_found = true;
3629 if (strncmp(z->z_name, kalloc_type_str,
3630 strlen(kalloc_type_str)) != 0) {
3631 return 0;
3632 }
3633 z = z->z_kt_next;
3634 }
3635 }
3636
3637 if (!zone_found) {
3638 return 0;
3639 }
3640
3641 return 1;
3642 }
3643
3644 /*
3645 * Ensure that the policy uses the zone budget completely
3646 */
3647 static int
kalloc_type_test_policy(int64_t in)3648 kalloc_type_test_policy(int64_t in)
3649 {
3650 uint16_t zone_budget = (uint16_t) in;
3651 uint16_t max_bucket_freq = 25;
3652 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3653 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3654 uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3655 uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3656 uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3657 uint16_t wasted_zone_budget = 0, total_types = 0;
3658 uint16_t n_zones = 0, n_zones_cal = 0;
3659 int ret = 0;
3660
3661 /*
3662 * Need a minimum of 2 zones per size class
3663 */
3664 if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3665 return ret;
3666 }
3667 read_random((void *)&random[0], sizeof(random));
3668 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3669 uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3670 uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3671
3672 freq_list[i] = r1 > r2 ? r2 : r1;
3673 freq_total_list[i] = r1 > r2 ? r1 : r2;
3674 }
3675 wasted_zone_budget = kalloc_type_apply_policy(
3676 freq_list, freq_total_list,
3677 zones_per_sig, zones_per_type, zone_budget);
3678
3679 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3680 total_types += freq_total_list[i];
3681 }
3682
3683 n_zones = kmem_get_random16(total_types);
3684 printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3685 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3686 uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3687 freq_total_list[i], total_types,
3688 (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3689
3690 n_zones_cal += n_zones_for_type;
3691
3692 printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3693 }
3694 printf("-----------------------\n%u\t%u\n", total_types,
3695 n_zones_cal);
3696
3697 if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3698 ret = 1;
3699 }
3700 return ret;
3701 }
3702
3703 /*
3704 * Ensure that size of adopters of kalloc_type fit in the zone
3705 * they have been assigned.
3706 */
3707 static int
kalloc_type_check_size(zone_t z)3708 kalloc_type_check_size(zone_t z)
3709 {
3710 kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3711
3712 while (kt_cur != NULL) {
3713 if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3714 return 0;
3715 }
3716 kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3717 }
3718
3719 return 1;
3720 }
3721
3722 struct test_kt_data {
3723 int a;
3724 };
3725
3726 static int
kalloc_type_test_data_redirect(void)3727 kalloc_type_test_data_redirect(void)
3728 {
3729 struct kalloc_type_view ktv_data = {
3730 .kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3731 .kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3732 };
3733 if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3734 printf("%s: data redirect failed\n", __func__);
3735 return 0;
3736 }
3737 return 1;
3738 }
3739
3740 static int
run_kalloc_type_test(int64_t in,int64_t * out)3741 run_kalloc_type_test(int64_t in, int64_t *out)
3742 {
3743 *out = 0;
3744 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3745 zone_t z = kalloc_type_zarray[i];
3746 while (z != NULL) {
3747 if (!kalloc_type_check_size(z)) {
3748 printf("%s: size check failed\n", __func__);
3749 return 0;
3750 }
3751 z = z->z_kt_next;
3752 }
3753 }
3754
3755 if (!kalloc_type_test_policy(in)) {
3756 printf("%s: policy check failed\n", __func__);
3757 return 0;
3758 }
3759
3760 if (!kalloc_type_feature_on()) {
3761 printf("%s: boot-arg is on but feature isn't\n", __func__);
3762 return 0;
3763 }
3764
3765 if (!kalloc_type_test_data_redirect()) {
3766 printf("%s: kalloc_type redirect for all data signature failed\n",
3767 __func__);
3768 return 0;
3769 }
3770
3771 printf("%s: test passed\n", __func__);
3772
3773 *out = 1;
3774 return 0;
3775 }
3776 SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3777
3778 static vm_size_t
test_bucket_size(kalloc_heap_t kheap,vm_size_t size)3779 test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3780 {
3781 zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3782
3783 return z ? zone_elem_inner_size(z) : round_page(size);
3784 }
3785
3786 static int
run_kalloc_test_kheap(kalloc_heap_t kheap)3787 run_kalloc_test_kheap(kalloc_heap_t kheap)
3788 {
3789 uint64_t *data_ptr;
3790 void *strippedp_old, *strippedp_new;
3791 size_t alloc_size = 0, old_alloc_size = 0;
3792 struct kalloc_result kr = {};
3793
3794 printf("%s: %s test running\n", __func__, kheap->kh_name);
3795
3796 /*
3797 * Test size 0: alloc, free, realloc
3798 */
3799 data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3800 NULL).addr;
3801 if (!data_ptr) {
3802 printf("%s: kalloc 0 returned null\n", __func__);
3803 return 1;
3804 }
3805 kheap_free(kheap, data_ptr, alloc_size);
3806
3807 data_ptr = kalloc_ext(kheap, alloc_size, Z_WAITOK | Z_NOFAIL,
3808 NULL).addr;
3809 alloc_size = sizeof(uint64_t) + 1;
3810 data_ptr = krealloc_ext(kheap, kr.addr, old_alloc_size,
3811 alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3812 if (!data_ptr) {
3813 printf("%s: krealloc -> old size 0 failed\n", __func__);
3814 return 1;
3815 }
3816 *data_ptr = 0;
3817
3818 /*
3819 * Test krealloc: same sizeclass, different size classes, 2pgs,
3820 * VM (with owner)
3821 */
3822 old_alloc_size = alloc_size;
3823 alloc_size++;
3824 kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3825 Z_WAITOK | Z_NOFAIL, NULL);
3826
3827 strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3828 strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3829
3830 if (!kr.addr || (strippedp_old != strippedp_new) ||
3831 (test_bucket_size(kheap, kr.size) !=
3832 test_bucket_size(kheap, old_alloc_size))) {
3833 printf("%s: krealloc -> same size class failed\n", __func__);
3834 return 1;
3835 }
3836 data_ptr = kr.addr;
3837 *data_ptr = 0;
3838
3839 old_alloc_size = alloc_size;
3840 alloc_size *= 2;
3841 kr = krealloc_ext(kheap, data_ptr, old_alloc_size, alloc_size,
3842 Z_WAITOK | Z_NOFAIL, NULL);
3843
3844 strippedp_old = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)data_ptr);
3845 strippedp_new = (void *)vm_memtag_canonicalize_kernel((vm_offset_t)kr.addr);
3846
3847 if (!kr.addr || (strippedp_old == strippedp_new) ||
3848 (test_bucket_size(kheap, kr.size) ==
3849 test_bucket_size(kheap, old_alloc_size))) {
3850 printf("%s: krealloc -> different size class failed\n", __func__);
3851 return 1;
3852 }
3853 data_ptr = kr.addr;
3854 *data_ptr = 0;
3855
3856 kheap_free(kheap, kr.addr, alloc_size);
3857
3858 alloc_size = 3544;
3859 data_ptr = kalloc_ext(kheap, alloc_size,
3860 Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3861 if (!data_ptr) {
3862 printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3863 __func__);
3864 return 1;
3865 }
3866 *data_ptr = 0;
3867
3868 data_ptr = krealloc_ext(kheap, data_ptr, alloc_size,
3869 PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3870 if (!data_ptr) {
3871 printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3872 return 1;
3873 }
3874 *data_ptr = 0;
3875
3876 data_ptr = krealloc_ext(kheap, data_ptr, PAGE_SIZE * 2,
3877 KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3878 if (!data_ptr) {
3879 printf("%s: krealloc -> VM1 returned not null\n", __func__);
3880 return 1;
3881 }
3882 *data_ptr = 0;
3883
3884 data_ptr = krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 2,
3885 KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3886 *data_ptr = 0;
3887 if (!data_ptr) {
3888 printf("%s: krealloc -> VM2 returned not null\n", __func__);
3889 return 1;
3890 }
3891
3892 krealloc_ext(kheap, data_ptr, KHEAP_MAX_SIZE * 4,
3893 0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3894
3895 printf("%s: test passed\n", __func__);
3896 return 0;
3897 }
3898
3899 static int
run_kalloc_test(int64_t in __unused,int64_t * out)3900 run_kalloc_test(int64_t in __unused, int64_t *out)
3901 {
3902 *out = 1;
3903
3904 if (run_kalloc_test_kheap(KHEAP_DATA_BUFFERS) != 0 ||
3905 run_kalloc_test_kheap(KHEAP_DATA_SHARED) != 0) {
3906 *out = 0;
3907 }
3908
3909 return 0;
3910 }
3911 SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3912
3913 #endif /* DEBUG || DEVELOPMENT */
3914