1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65
66 #include <os/atomic_private.h>
67 #include <os/base.h> /* OS_PTRAUTH_SIGNED_PTR */
68 #include <sys/queue.h>
69 #include <vm/vm_map_internal.h>
70
71 #if KASAN
72 #include <san/kasan.h>
73 #include <kern/spl.h>
74 #endif /* !KASAN */
75
76 /*
77 * Disable zalloc zero validation under kasan as it is
78 * double-duty with what kasan already does.
79 */
80 #if KASAN
81 #define ZALLOC_ENABLE_ZERO_CHECK 0
82 #else
83 #define ZALLOC_ENABLE_ZERO_CHECK 1
84 #endif
85
86 #if KASAN
87 #define ZALLOC_ENABLE_LOGGING 0
88 #elif DEBUG || DEVELOPMENT
89 #define ZALLOC_ENABLE_LOGGING 1
90 #else
91 #define ZALLOC_ENABLE_LOGGING 0
92 #endif
93
94 /*!
95 * @file <kern/zalloc_internal.h>
96 *
97 * @abstract
98 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
99 * kalloc subsystems.
100 */
101
102 __BEGIN_DECLS
103
104 #pragma GCC visibility push(hidden)
105
106 /*
107 * A zone is a collection of fixed size blocks for which there
108 * is fast allocation/deallocation access. Kernel routines can
109 * use zones to manage data structures dynamically, creating a zone
110 * for each type of data structure to be managed.
111 *
112 */
113
114 /*!
115 * @typedef zone_pva_t
116 *
117 * @brief
118 * Type used to point to a page virtual address in the zone allocator.
119 *
120 * @description
121 * - Valid pages have the top bit set.
122 * - 0 represents the "NULL" page
123 * - non 0 values with the top bit cleared represent queue heads,
124 * indexed from the beginning of the __DATA section of the kernel.
125 * (see zone_pageq_base).
126 */
127 typedef struct zone_packed_virtual_address {
128 uint32_t packed_address;
129 } zone_pva_t;
130
131 /*!
132 * @struct zone_stats
133 *
134 * @abstract
135 * Per-cpu structure used for basic zone stats.
136 *
137 * @discussion
138 * The values aren't scaled for per-cpu zones.
139 */
140 struct zone_stats {
141 uint64_t zs_mem_allocated;
142 uint64_t zs_mem_freed;
143 uint64_t zs_alloc_fail;
144 uint32_t zs_alloc_rr; /* allocation rr bias */
145 uint32_t _Atomic zs_alloc_not_early;
146 };
147
148 typedef struct zone_magazine *zone_magazine_t;
149
150 /*!
151 * @struct zone_depot
152 *
153 * @abstract
154 * Holds a list of full and empty magazines.
155 *
156 * @discussion
157 * The data structure is a "STAILQ" and an "SLIST" combined with counters
158 * to know their lengths in O(1). Here is a graphical example:
159 *
160 * zd_full = 3
161 * zd_empty = 1
162 * ╭─── zd_head
163 * │ ╭─ zd_tail
164 * │ ╰────────────────────────────────────╮
165 * │ ╭───────╮ ╭───────╮ ╭───────╮ v ╭───────╮
166 * ╰───>│███████┼──>│███████┼──>│███████┼──>│ ┼─> X
167 * ╰───────╯ ╰───────╯ ╰───────╯ ╰───────╯
168 */
169 struct zone_depot {
170 uint32_t zd_full;
171 uint32_t zd_empty;
172 zone_magazine_t zd_head;
173 zone_magazine_t *zd_tail;
174 };
175
176 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
177 #define Z_MAGIC_QUO(s) (((1ull << 32) - 1) / (uint64_t)(s) + 1)
178 #define Z_MAGIC_ALIGNED(s) (~0u / (uint32_t)(s) + 1)
179
180 /*
181 * Returns (offs / size) if offs is small enough
182 * and magic = Z_MAGIC_QUO(size)
183 */
184 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)185 Z_FAST_QUO(uint64_t offs, uint64_t magic)
186 {
187 return (offs * magic) >> 32;
188 }
189
190 /*
191 * Returns (offs % size) if offs is small enough
192 * and magic = Z_MAGIC_QUO(size)
193 */
194 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)195 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
196 {
197 uint32_t lowbits = (uint32_t)(offs * magic);
198
199 return (lowbits * size) >> 32;
200 }
201
202 /*
203 * Returns whether (offs % size) == 0 if offs is small enough
204 * and magic = Z_MAGIC_ALIGNED(size)
205 */
206 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)207 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
208 {
209 return (uint32_t)(offs * magic) < magic;
210 }
211
212 struct zone_size_params {
213 uint32_t z_align_magic; /* magic to use with Z_FAST_ALIGNED() */
214 uint32_t z_elem_size; /* size of an element */
215 };
216
217 struct zone_expand {
218 struct zone_expand *ze_next;
219 thread_t ze_thread;
220 bool ze_pg_wait;
221 bool ze_vm_priv;
222 bool ze_clear_priv;
223 };
224
225 #define Z_WMA_UNIT (1u << 8)
226 #define Z_WMA_MIX(base, e) ((3 * (base) + (e) * Z_WMA_UNIT) / 4)
227
228 struct zone {
229 /*
230 * Readonly / rarely written fields
231 */
232
233 /*
234 * The first 4 fields match a zone_view.
235 *
236 * z_self points back to the zone when the zone is initialized,
237 * or is NULL else.
238 */
239 struct zone *z_self;
240 zone_stats_t z_stats;
241 const char *z_name;
242 struct zone_view *z_views;
243 struct zone_expand *z_expander;
244
245 uint64_t z_quo_magic;
246 uint32_t z_align_magic;
247 uint16_t z_elem_size;
248 uint16_t z_elem_offs;
249 uint16_t z_chunk_pages;
250 uint16_t z_chunk_elems;
251
252 uint32_t /* 32 bits */
253 /*
254 * Lifecycle state (Mutable after creation)
255 */
256 z_destroyed :1, /* zone is (being) destroyed */
257 z_async_refilling :1, /* asynchronous allocation pending? */
258 z_depot_cleanup :1, /* per cpu depots need cleaning */
259 z_expanding_wait :1, /* is thread waiting for expansion? */
260 z_exhausted_wait :1, /* are threads waiting for exhaustion end */
261 z_exhausts :1, /* whether the zone exhausts by design */
262
263 /*
264 * Behavior configuration bits
265 */
266 z_percpu :1, /* the zone is percpu */
267 z_smr :1, /* the zone uses SMR */
268 z_permanent :1, /* the zone allocations are permanent */
269 z_nocaching :1, /* disallow zone caching for this zone */
270 collectable :1, /* garbage collect empty pages */
271 no_callout :1,
272 z_destructible :1, /* zone can be zdestroy()ed */
273
274 _reserved :6,
275
276 /*
277 * Debugging features
278 */
279 z_pgz_tracked :1, /* this zone is tracked by pgzalloc */
280 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
281 z_kasan_fakestacks :1,
282 z_kasan_quarantine :1, /* whether to use the kasan quarantine */
283 z_tags_sizeclass :6, /* idx into zone_tags_sizeclasses to associate
284 * sizeclass for a particualr kalloc tag */
285 z_uses_tags :1,
286 z_log_on :1, /* zone logging was enabled by boot-arg */
287 z_tbi_tag :1; /* Zone supports tbi tagging */
288
289 uint8_t z_cacheline1[0] __attribute__((aligned(64)));
290
291 /*
292 * Zone caching / recirculation cacheline
293 *
294 * z_recirc* fields are protected by the recirculation lock.
295 *
296 * z_recirc_cont_wma:
297 * weighted moving average of the number of contentions per second,
298 * in Z_WMA_UNIT units (fixed point decimal).
299 *
300 * z_recirc_cont_cur:
301 * count of recorded contentions that will be fused
302 * in z_recirc_cont_wma at the next period.
303 *
304 * Note: if caching is disabled,
305 * this field is used under the zone lock.
306 *
307 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
308 * tracks the history of the minimum values of z_elems_free over time
309 * with "min" being the minimum it hit for the current period,
310 * and "wma" the weighted moving average of those value
311 * (in Z_WMA_UNIT units).
312 *
313 * This field is used if z_pcpu_cache is NULL,
314 * otherwise it aliases with z_recirc_empty_{min,wma}
315 *
316 * z_recirc_{full,empty}_{min,wma}:
317 * tracks the history of the the minimum number of full/empty
318 * magazines in the depot over time, with "min" being the minimum
319 * it hit for the current period, and "wma" the weighted moving
320 * average of those value (in Z_WMA_UNIT units).
321 */
322 struct zone_cache *__zpercpu OS_PTRAUTH_SIGNED_PTR("zone.z_pcpu_cache") z_pcpu_cache;
323 struct zone_depot z_recirc;
324
325 hw_lck_ticket_t z_recirc_lock;
326 uint32_t z_recirc_full_min;
327 uint32_t z_recirc_full_wma;
328 union {
329 uint32_t z_recirc_empty_min;
330 uint32_t z_elems_free_min;
331 };
332 union {
333 uint32_t z_recirc_empty_wma;
334 uint32_t z_elems_free_wma;
335 };
336 uint32_t z_recirc_cont_cur;
337 uint32_t z_recirc_cont_wma;
338
339 uint16_t z_depot_size;
340 uint16_t z_depot_limit;
341
342 uint8_t z_cacheline2[0] __attribute__((aligned(64)));
343
344 /*
345 * often mutated fields
346 */
347
348 hw_lck_ticket_t z_lock;
349
350 /*
351 * Page accounting (wired / VA)
352 *
353 * Those numbers are unscaled for z_percpu zones
354 * (zone_scale_for_percpu() needs to be used to find the true value).
355 */
356 uint32_t z_wired_max; /* how large can this zone grow */
357 uint32_t z_wired_hwm; /* z_wired_cur high watermark */
358 uint32_t z_wired_cur; /* number of pages used by this zone */
359 uint32_t z_wired_empty; /* pages collectable by GC */
360 uint32_t z_va_cur; /* amount of VA used by this zone */
361
362 /*
363 * list of metadata structs, which maintain per-page free element lists
364 */
365 zone_pva_t z_pageq_empty; /* populated, completely empty pages */
366 zone_pva_t z_pageq_partial;/* populated, partially filled pages */
367 zone_pva_t z_pageq_full; /* populated, completely full pages */
368 zone_pva_t z_pageq_va; /* non-populated VA pages */
369
370 /*
371 * Zone statistics
372 *
373 * z_elems_avail:
374 * number of elements in the zone (at all).
375 */
376 uint32_t z_elems_free; /* Number of free elements */
377 uint32_t z_elems_avail; /* Number of elements available */
378 uint32_t z_elems_rsv;
379 uint32_t z_array_size_class;
380
381 struct zone *z_kt_next;
382
383 uint8_t z_cacheline3[0] __attribute__((aligned(64)));
384
385 #if KASAN_CLASSIC
386 uint16_t z_kasan_redzone;
387 spl_t z_kasan_spl;
388 #endif
389
390 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
391 /*
392 * the allocation logs are used when:
393 *
394 * - zlog<n>= boot-args are used (and then z_log_on is set)
395 *
396 * - the leak detection was triggered for the zone.
397 * In that case, the log can't ever be freed,
398 * but it can be enabled/disabled dynamically.
399 */
400 struct btlog *z_btlog;
401 struct btlog *z_btlog_disabled;
402 #endif
403 } __attribute__((aligned((64))));
404
405 /*!
406 * @typedef zone_security_flags_t
407 *
408 * @brief
409 * Type used to store the immutable security properties of a zone.
410 *
411 * @description
412 * These properties influence the security nature of a zone and can't be
413 * modified after lockdown.
414 */
415 typedef struct zone_security_flags {
416 uint32_t
417 /*
418 * Security sensitive configuration bits
419 */
420 z_submap_idx :8, /* a Z_SUBMAP_IDX_* value */
421 z_kheap_id :3, /* zone_kheap_id_t when part of a kalloc heap */
422 z_kalloc_type :1, /* zones that does types based seggregation */
423 z_lifo :1, /* depot and recirculation layer are LIFO */
424 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
425 z_submap_from_end :1, /* allocate from the left or the right ? */
426 z_noencrypt :1, /* do not encrypt pages when hibernating */
427 z_tag :1, /* zone supports TBI tagging */
428 z_unused :15;
429 /*
430 * Signature equivalance zone
431 */
432 zone_id_t z_sig_eq;
433 } zone_security_flags_t;
434
435
436 /*
437 * Zsecurity config to enable strict free of iokit objects to zone
438 * or heap they were allocated from.
439 *
440 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
441 * not break third party kexts that haven't yet been recompiled
442 * to use the new iokit macros.
443 */
444 #if XNU_PLATFORM_MacOSX && __x86_64__
445 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE OFF
446 #else
447 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE ON
448 #endif
449
450 /*
451 * Zsecurity config to enable the read-only allocator
452 */
453 #if KASAN_CLASSIC
454 # define ZSECURITY_CONFIG_READ_ONLY OFF
455 #else
456 # define ZSECURITY_CONFIG_READ_ONLY ON
457 #endif
458
459 /*
460 * Zsecurity config to enable making heap feng-shui
461 * less reliable.
462 */
463 #if KASAN_CLASSIC
464 # define ZSECURITY_CONFIG_SAD_FENG_SHUI OFF
465 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 1
466 #else
467 # define ZSECURITY_CONFIG_SAD_FENG_SHUI ON
468 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 4
469 #endif
470
471 /*
472 * Zsecurity config to enable adjusting of elements
473 * with PGZ-OOB to right-align them in their space.
474 */
475 #if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING
476 # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST OFF
477 #else
478 # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST ON
479 #endif
480
481 /*
482 * Zsecurity config to enable kalloc type segregation
483 */
484 #if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
485 # define ZSECURITY_CONFIG_KT_BUDGET 120
486 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 6
487 #else
488 # define ZSECURITY_CONFIG_KT_BUDGET 260
489 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 6
490 #endif
491
492 /*
493 * Zsecurity config to enable (KASAN) tagging of memory allocations
494 */
495 #if CONFIG_KERNEL_TAGGING
496 # define ZSECURITY_CONFIG_ZONE_TAGGING ON
497 #else
498 # define ZSECURITY_CONFIG_ZONE_TAGGING OFF
499 #endif
500
501
502 __options_decl(kalloc_type_options_t, uint64_t, {
503 /*
504 * kalloc type option to switch default accounting to private.
505 */
506 KT_OPTIONS_ACCT = 0x00000001,
507 /*
508 * kalloc type option to print additional stats regarding zone
509 * budget distribution and signatures.
510 */
511 KT_OPTIONS_DEBUG = 0x00000002,
512 /*
513 * kalloc type option to allow loose freeing between heaps
514 */
515 KT_OPTIONS_LOOSE_FREE = 0x00000004,
516 });
517
518 __enum_decl(kt_var_heap_id_t, uint32_t, {
519 /*
520 * Fake "data" heap used to link views of data-only allocation that
521 * have been redirected to KHEAP_DATA_BUFFERS
522 */
523 KT_VAR_DATA_HEAP,
524 /*
525 * Heaps for pointer arrays
526 */
527 KT_VAR_PTR_HEAP0,
528 KT_VAR_PTR_HEAP1,
529 /*
530 * Indicating first additional heap added
531 */
532 KT_VAR__FIRST_FLEXIBLE_HEAP,
533 });
534
535 /*
536 * Zone submap indices
537 *
538 * Z_SUBMAP_IDX_VM
539 * this map has the special property that its allocations
540 * can be done without ever locking the submap, and doesn't use
541 * VM entries in the map (which limits certain VM map operations on it).
542 *
543 * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
544 *
545 * On LP64 it is also used to restrict VM allocations on LP64 lower
546 * in the kernel VA space, for pointer packing purposes.
547 *
548 * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
549 * used for unrestricted allocations
550 *
551 * Z_SUBMAP_IDX_DATA
552 * used to sequester bags of bytes from all other allocations and allow VA reuse
553 * within the map
554 *
555 * Z_SUBMAP_IDX_READ_ONLY
556 * used for the read-only allocator
557 */
558 __enum_decl(zone_submap_idx_t, uint32_t, {
559 Z_SUBMAP_IDX_VM,
560 Z_SUBMAP_IDX_READ_ONLY,
561 Z_SUBMAP_IDX_GENERAL_0,
562 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
563 Z_SUBMAP_IDX_GENERAL_1,
564 Z_SUBMAP_IDX_GENERAL_2,
565 Z_SUBMAP_IDX_GENERAL_3,
566 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
567 Z_SUBMAP_IDX_DATA,
568
569 Z_SUBMAP_IDX_COUNT,
570 });
571
572 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
573
574 /*
575 * Variable kalloc_type heap config
576 */
577 struct kheap_info {
578 zone_id_t kh_zstart;
579 kalloc_heap_t kh_views;
580 kalloc_type_var_view_t kt_views;
581 };
582 typedef union kalloc_type_views {
583 struct kalloc_type_view *ktv_fixed;
584 struct kalloc_type_var_view *ktv_var;
585 } kalloc_type_views_t;
586
587 #define KT_VAR_MAX_HEAPS 8
588 #define MAX_ZONES 690
589 extern struct kheap_info kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
590 extern zone_id_t _Atomic num_zones;
591 extern uint32_t zone_view_count;
592 extern struct zone zone_array[MAX_ZONES];
593 extern struct zone_size_params zone_ro_size_params[ZONE_ID__LAST_RO + 1];
594 extern zone_security_flags_t zone_security_array[];
595 extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
596 extern mach_memory_info_t *panic_kext_memory_info;
597 extern vm_size_t panic_kext_memory_size;
598 extern vm_offset_t panic_fault_address;
599 extern uint16_t _zc_mag_size;
600
601 #define zone_index_foreach(i) \
602 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
603 i < num_zones_##i; i++)
604
605 #define zone_foreach(z) \
606 for (zone_t z = &zone_array[1], \
607 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
608 z < last_zone_##z; z++)
609
610 __abortlike
611 extern void zone_invalid_panic(zone_t zone);
612
613 __pure2
614 static inline zone_id_t
zone_index(zone_t z)615 zone_index(zone_t z)
616 {
617 unsigned long delta;
618 uint64_t quo;
619
620 delta = (unsigned long)z - (unsigned long)zone_array;
621 if (delta >= MAX_ZONES * sizeof(*z)) {
622 zone_invalid_panic(z);
623 }
624 quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
625 __builtin_assume(quo < MAX_ZONES);
626 return (zone_id_t)quo;
627 }
628
629 __pure2
630 static inline bool
zone_is_ro(zone_t zone)631 zone_is_ro(zone_t zone)
632 {
633 return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
634 zone <= &zone_array[ZONE_ID__LAST_RO];
635 }
636
637 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)638 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
639 {
640 return atop(addr ^ (addr + size - 1)) != 0;
641 }
642
643 __pure2
644 static inline uint16_t
zone_elem_redzone(zone_t zone)645 zone_elem_redzone(zone_t zone)
646 {
647 #if KASAN_CLASSIC
648 return zone->z_kasan_redzone;
649 #else
650 (void)zone;
651 return 0;
652 #endif
653 }
654
655 __pure2
656 static inline uint16_t
zone_elem_inner_offs(zone_t zone)657 zone_elem_inner_offs(zone_t zone)
658 {
659 return zone->z_elem_offs;
660 }
661
662 __pure2
663 static inline uint16_t
zone_elem_outer_offs(zone_t zone)664 zone_elem_outer_offs(zone_t zone)
665 {
666 return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
667 }
668
669 __pure2
670 static inline vm_offset_t
zone_elem_inner_size(zone_t zone)671 zone_elem_inner_size(zone_t zone)
672 {
673 return zone->z_elem_size;
674 }
675
676 __pure2
677 static inline vm_offset_t
zone_elem_outer_size(zone_t zone)678 zone_elem_outer_size(zone_t zone)
679 {
680 return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
681 }
682
683 __pure2
684 static inline zone_security_flags_t
zone_security_config(zone_t z)685 zone_security_config(zone_t z)
686 {
687 zone_id_t zid = zone_index(z);
688 return zone_security_array[zid];
689 }
690
691 static inline uint32_t
zone_count_free(zone_t zone)692 zone_count_free(zone_t zone)
693 {
694 return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
695 }
696
697 static inline uint32_t
zone_count_allocated(zone_t zone)698 zone_count_allocated(zone_t zone)
699 {
700 return zone->z_elems_avail - zone_count_free(zone);
701 }
702
703 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)704 zone_scale_for_percpu(zone_t zone, vm_size_t size)
705 {
706 if (zone->z_percpu) {
707 size *= zpercpu_count();
708 }
709 return size;
710 }
711
712 static inline vm_size_t
zone_size_wired(zone_t zone)713 zone_size_wired(zone_t zone)
714 {
715 /*
716 * this either require the zone lock,
717 * or to be used for statistics purposes only.
718 */
719 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
720 return zone_scale_for_percpu(zone, size);
721 }
722
723 static inline vm_size_t
zone_size_free(zone_t zone)724 zone_size_free(zone_t zone)
725 {
726 return zone_scale_for_percpu(zone,
727 zone_elem_inner_size(zone) * zone_count_free(zone));
728 }
729
730 /* Under KASAN builds, this also accounts for quarantined elements. */
731 static inline vm_size_t
zone_size_allocated(zone_t zone)732 zone_size_allocated(zone_t zone)
733 {
734 return zone_scale_for_percpu(zone,
735 zone_elem_inner_size(zone) * zone_count_allocated(zone));
736 }
737
738 static inline vm_size_t
zone_size_wasted(zone_t zone)739 zone_size_wasted(zone_t zone)
740 {
741 return zone_size_wired(zone) - zone_scale_for_percpu(zone,
742 zone_elem_outer_size(zone) * zone->z_elems_avail);
743 }
744
745 __pure2
746 static inline bool
zone_exhaustible(zone_t zone)747 zone_exhaustible(zone_t zone)
748 {
749 return zone->z_wired_max != ~0u;
750 }
751
752 __pure2
753 static inline bool
zone_exhausted(zone_t zone)754 zone_exhausted(zone_t zone)
755 {
756 return zone->z_wired_cur >= zone->z_wired_max;
757 }
758
759 /*
760 * Set and get the signature equivalance for the given zone
761 */
762 extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq);
763 extern zone_id_t zone_get_sig_eq(zone_t zone);
764 /*
765 * Return the accumulated allocated memory on the given zone stats
766 */
767 static inline vm_size_t
zone_stats_get_mem_allocated(zone_stats_t stats)768 zone_stats_get_mem_allocated(zone_stats_t stats)
769 {
770 return stats->zs_mem_allocated;
771 }
772
773 /*
774 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
775 * userspace reboot is needed. The only other way to query for this information
776 * is via mach_memory_info() which is unavailable on release kernels.
777 */
778 extern uint64_t get_zones_collectable_bytes(void);
779
780 /*!
781 * @enum zone_gc_level_t
782 *
783 * @const ZONE_GC_TRIM
784 * Request a trimming GC: it will trim allocations in excess
785 * of the working set size estimate only.
786 *
787 * @const ZONE_GC_DRAIN
788 * Request a draining GC: this is an aggressive mode that will
789 * cause all caches to be drained and all free pages returned to the system.
790 *
791 * @const ZONE_GC_JETSAM
792 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
793 * @c ZONE_GC_DRAIN depending on the state of the zone map.
794 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
795 * request a @c ZONE_GC_JETSAM level.
796 */
797 __enum_closed_decl(zone_gc_level_t, uint32_t, {
798 ZONE_GC_TRIM,
799 ZONE_GC_DRAIN,
800 ZONE_GC_JETSAM,
801 });
802
803 /*!
804 * @function zone_gc
805 *
806 * @brief
807 * Reduces memory used by zones by trimming caches and freelists.
808 *
809 * @discussion
810 * @c zone_gc() is called:
811 * - by the pageout daemon when the system needs more free pages.
812 * - by the VM when contiguous page allocation requests get stuck
813 * (see vm_page_find_contiguous()).
814 *
815 * @param level The zone GC level requested.
816 */
817 extern void zone_gc(zone_gc_level_t level);
818
819 #define ZONE_WSS_UPDATE_PERIOD 15
820 /*!
821 * @function compute_zone_working_set_size
822 *
823 * @brief
824 * Recomputes the working set size for every zone
825 *
826 * @discussion
827 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
828 * computing an exponential moving average with a weight of 75%,
829 * so that the history of the last minute is the dominating factor.
830 */
831 extern void compute_zone_working_set_size(void *);
832
833 /* Debug logging for zone-map-exhaustion jetsams. */
834 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
835 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
836
837 /* Bootstrap zone module (create zone zone) */
838 extern void zone_bootstrap(void);
839
840 /* Force-enable caching on a zone, generally unsafe to call directly */
841 extern void zone_enable_caching(zone_t zone);
842
843 /*!
844 * @function zone_early_mem_init
845 *
846 * @brief
847 * Steal memory from pmap (prior to initialization of zalloc)
848 * for the special vm zones that allow bootstrap memory and store
849 * the range so as to facilitate range checking in zfree.
850 *
851 * @param size the size to steal (must be a page multiple)
852 */
853 __startup_func
854 extern vm_offset_t zone_early_mem_init(
855 vm_size_t size);
856
857 /*!
858 * @function zone_get_early_alloc_size
859 *
860 * @brief
861 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
862 * of the allocation granule for the zone with the given creation flags and
863 * element size.
864 */
865 __startup_func
866 extern vm_size_t zone_get_early_alloc_size(
867 const char *name __unused,
868 vm_size_t elem_size,
869 zone_create_flags_t flags,
870 vm_size_t min_elems);
871
872 /*!
873 * @function zone_cram_early
874 *
875 * @brief
876 * Cram memory allocated with @c zone_early_mem_init() into a zone.
877 *
878 * @param zone The zone to cram memory into.
879 * @param newmem The base address for the memory to cram.
880 * @param size The size of the memory to cram into the zone.
881 */
882 __startup_func
883 extern void zone_cram_early(
884 zone_t zone,
885 vm_offset_t newmem,
886 vm_size_t size);
887
888 extern bool zone_maps_owned(
889 vm_address_t addr,
890 vm_size_t size);
891
892 #if KASAN_LIGHT
893 extern bool kasan_zone_maps_owned(
894 vm_address_t addr,
895 vm_size_t size);
896 #endif /* KASAN_LIGHT */
897
898 extern void zone_map_sizes(
899 vm_map_size_t *psize,
900 vm_map_size_t *pfree,
901 vm_map_size_t *plargest_free);
902
903 extern bool
904 zone_map_nearing_exhaustion(void);
905
906 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)907 zalloc_flags_get_tag(zalloc_flags_t flags)
908 {
909 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
910 }
911
912 extern struct kalloc_result zalloc_ext(
913 zone_t zone,
914 zone_stats_t zstats,
915 zalloc_flags_t flags);
916
917 #if KASAN
918 #define ZFREE_PACK_SIZE(esize, usize) (((uint64_t)(usize) << 32) | (esize))
919 #define ZFREE_ELEM_SIZE(combined) ((uint32_t)(combined))
920 #define ZFREE_USER_SIZE(combined) ((combined) >> 32)
921 #else
922 #define ZFREE_PACK_SIZE(esize, usize) (esize)
923 #define ZFREE_ELEM_SIZE(combined) (combined)
924 #endif
925
926 extern void zfree_ext(
927 zone_t zone,
928 zone_stats_t zstats,
929 void *addr,
930 uint64_t combined_size);
931
932 extern zone_id_t zone_id_for_element(
933 void *addr,
934 vm_size_t esize);
935
936 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
937 extern void *zone_element_pgz_oob_adjust(
938 void *addr,
939 vm_size_t req_size,
940 vm_size_t elem_size);
941 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
942
943 extern void zone_element_bounds_check(
944 vm_address_t addr,
945 vm_size_t len);
946
947 extern vm_size_t zone_element_size(
948 void *addr,
949 zone_t *z,
950 bool clear_oob,
951 vm_offset_t *oob_offs);
952
953 /*!
954 * @function zone_spans_ro_va
955 *
956 * @abstract
957 * This function is used to check whether the specified address range
958 * spans through the read-only zone range.
959 *
960 * @discussion
961 * This only checks for the range specified within ZONE_ADDR_READONLY.
962 * The parameters addr_start and addr_end are stripped off of PAC bits
963 * before the check is made.
964 */
965 extern bool zone_spans_ro_va(
966 vm_offset_t addr_start,
967 vm_offset_t addr_end);
968
969 /*!
970 * @function __zalloc_ro_mut_atomic
971 *
972 * @abstract
973 * This function is called from the pmap to perform the specified atomic
974 * operation on memory from the read-only allocator.
975 *
976 * @discussion
977 * This function is for internal use only and should not be called directly.
978 */
979 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)980 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
981 {
982 #define __ZALLOC_RO_MUT_OP(op, op2) \
983 case ZRO_ATOMIC_##op##_8: \
984 return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
985 case ZRO_ATOMIC_##op##_16: \
986 return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
987 case ZRO_ATOMIC_##op##_32: \
988 return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
989 case ZRO_ATOMIC_##op##_64: \
990 return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
991
992 switch (op) {
993 __ZALLOC_RO_MUT_OP(OR, or_orig);
994 __ZALLOC_RO_MUT_OP(XOR, xor_orig);
995 __ZALLOC_RO_MUT_OP(AND, and_orig);
996 __ZALLOC_RO_MUT_OP(ADD, add_orig);
997 __ZALLOC_RO_MUT_OP(XCHG, xchg);
998 default:
999 panic("%s: Invalid atomic operation: %d", __func__, op);
1000 }
1001
1002 #undef __ZALLOC_RO_MUT_OP
1003 }
1004
1005 /*!
1006 * @function zone_owns
1007 *
1008 * @abstract
1009 * This function is a soft version of zone_require that checks if a given
1010 * pointer belongs to the specified zone and should not be used outside
1011 * allocator code.
1012 *
1013 * @discussion
1014 * Note that zone_owns() can only work with:
1015 * - zones not allowing foreign memory
1016 * - zones in the general submap.
1017 *
1018 * @param zone the zone the address needs to belong to.
1019 * @param addr the element address to check.
1020 */
1021 extern bool zone_owns(
1022 zone_t zone,
1023 void *addr);
1024
1025 /**!
1026 * @function zone_submap
1027 *
1028 * @param zsflags the security flags of a specified zone.
1029 * @returns the zone (sub)map this zone allocates from.
1030 */
1031 __pure2
1032 extern vm_map_t zone_submap(
1033 zone_security_flags_t zsflags);
1034
1035 #ifndef VM_TAG_SIZECLASSES
1036 #error MAX_TAG_ZONES
1037 #endif
1038 #if VM_TAG_SIZECLASSES
1039
1040 extern uint16_t zone_index_from_tag_index(
1041 uint32_t tag_zone_index);
1042
1043 #endif /* VM_TAG_SIZECLASSES */
1044
1045 extern lck_grp_t zone_locks_grp;
1046
1047 static inline void
zone_lock(zone_t zone)1048 zone_lock(zone_t zone)
1049 {
1050 #if KASAN_FAKESTACK
1051 spl_t s = 0;
1052 if (zone->z_kasan_fakestacks) {
1053 s = splsched();
1054 }
1055 #endif /* KASAN_FAKESTACK */
1056 hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
1057 #if KASAN_FAKESTACK
1058 zone->z_kasan_spl = s;
1059 #endif /* KASAN_FAKESTACK */
1060 }
1061
1062 static inline void
zone_unlock(zone_t zone)1063 zone_unlock(zone_t zone)
1064 {
1065 #if KASAN_FAKESTACK
1066 spl_t s = zone->z_kasan_spl;
1067 zone->z_kasan_spl = 0;
1068 #endif /* KASAN_FAKESTACK */
1069 hw_lck_ticket_unlock(&zone->z_lock);
1070 #if KASAN_FAKESTACK
1071 if (zone->z_kasan_fakestacks) {
1072 splx(s);
1073 }
1074 #endif /* KASAN_FAKESTACK */
1075 }
1076
1077 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
1078
1079 int track_this_zone(const char *zonename, const char *logname);
1080 extern bool panic_include_kalloc_types;
1081 extern zone_t kalloc_type_src_zone;
1082 extern zone_t kalloc_type_dst_zone;
1083
1084 #if DEBUG || DEVELOPMENT
1085 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1086 #endif /* DEBUG || DEVELOPMENT */
1087
1088 #pragma GCC visibility pop
1089
1090 __END_DECLS
1091
1092 #endif /* _KERN_ZALLOC_INTERNAL_H_ */
1093