xref: /xnu-12377.41.6/osfmk/kern/zalloc_internal.h (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61 
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65 
66 #include <os/atomic_private.h>
67 #include <os/base.h> /* OS_PTRAUTH_SIGNED_PTR */
68 #include <sys/queue.h>
69 #include <vm/vm_map_internal.h>
70 
71 #if KASAN
72 #include <san/kasan.h>
73 #include <kern/spl.h>
74 #endif /* !KASAN */
75 
76 /*
77  * Disable zalloc zero validation under kasan as it is
78  * double-duty with what kasan already does.
79  */
80 #if KASAN
81 #define ZALLOC_ENABLE_ZERO_CHECK        0
82 #else
83 #define ZALLOC_ENABLE_ZERO_CHECK        1
84 #endif
85 
86 #if KASAN
87 #define ZALLOC_ENABLE_LOGGING           0
88 #elif DEBUG || DEVELOPMENT
89 #define ZALLOC_ENABLE_LOGGING           1
90 #else
91 #define ZALLOC_ENABLE_LOGGING           0
92 #endif
93 
94 /*!
95  * @file <kern/zalloc_internal.h>
96  *
97  * @abstract
98  * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
99  * kalloc subsystems.
100  */
101 
102 __BEGIN_DECLS
103 
104 __exported_push_hidden
105 
106 /*
107  *	A zone is a collection of fixed size blocks for which there
108  *	is fast allocation/deallocation access.  Kernel routines can
109  *	use zones to manage data structures dynamically, creating a zone
110  *	for each type of data structure to be managed.
111  *
112  */
113 
114 /*!
115  * @typedef zone_pva_t
116  *
117  * @brief
118  * Type used to point to a page virtual address in the zone allocator.
119  *
120  * @description
121  * - Valid pages have the top bit set.
122  * - 0 represents the "NULL" page
123  * - non 0 values with the top bit cleared represent queue heads,
124  *   indexed from the beginning of the __DATA section of the kernel.
125  *   (see zone_pageq_base).
126  */
127 typedef struct zone_packed_virtual_address {
128 	uint32_t packed_address;
129 } zone_pva_t;
130 
131 /*!
132  * @struct zone_stats
133  *
134  * @abstract
135  * Per-cpu structure used for basic zone stats.
136  *
137  * @discussion
138  * The values aren't scaled for per-cpu zones.
139  */
140 struct zone_stats {
141 	uint64_t            zs_mem_allocated;
142 	uint64_t            zs_mem_freed;
143 	uint64_t            zs_alloc_fail;
144 	uint32_t            zs_alloc_rr;     /* allocation rr bias */
145 	uint32_t _Atomic    zs_alloc_not_early;
146 };
147 
148 typedef struct zone_magazine *zone_magazine_t;
149 
150 /*!
151  * @struct zone_depot
152  *
153  * @abstract
154  * Holds a list of full and empty magazines.
155  *
156  * @discussion
157  * The data structure is a "STAILQ" and an "SLIST" combined with counters
158  * to know their lengths in O(1). Here is a graphical example:
159  *
160  *      zd_full = 3
161  *      zd_empty = 1
162  * ╭─── zd_head
163  * │ ╭─ zd_tail
164  * │ ╰────────────────────────────────────╮
165  * │    ╭───────╮   ╭───────╮   ╭───────╮ v ╭───────╮
166  * ╰───>│███████┼──>│███████┼──>│███████┼──>│       ┼─> X
167  *      ╰───────╯   ╰───────╯   ╰───────╯   ╰───────╯
168  */
169 struct zone_depot {
170 	uint32_t            zd_full;
171 	uint32_t            zd_empty;
172 	zone_magazine_t     zd_head;
173 	zone_magazine_t    *zd_tail;
174 };
175 
176 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
177 #define Z_MAGIC_QUO(s)      (((1ull << 32) - 1) / (uint64_t)(s) + 1)
178 #define Z_MAGIC_ALIGNED(s)  (~0u / (uint32_t)(s) + 1)
179 
180 /*
181  * Returns (offs / size) if offs is small enough
182  * and magic = Z_MAGIC_QUO(size)
183  */
184 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)185 Z_FAST_QUO(uint64_t offs, uint64_t magic)
186 {
187 	return (offs * magic) >> 32;
188 }
189 
190 /*
191  * Returns (offs % size) if offs is small enough
192  * and magic = Z_MAGIC_QUO(size)
193  */
194 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)195 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
196 {
197 	uint32_t lowbits = (uint32_t)(offs * magic);
198 
199 	return (lowbits * size) >> 32;
200 }
201 
202 /*
203  * Returns whether (offs % size) == 0 if offs is small enough
204  * and magic = Z_MAGIC_ALIGNED(size)
205  */
206 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)207 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
208 {
209 	return (uint32_t)(offs * magic) < magic;
210 }
211 
212 struct zone_size_params {
213 	uint32_t            z_align_magic;  /* magic to use with Z_FAST_ALIGNED()  */
214 	uint32_t            z_elem_size;    /* size of an element                  */
215 };
216 
217 struct zone_expand {
218 	struct zone_expand *ze_next;
219 	thread_t            ze_thread;
220 	bool                ze_pg_wait;
221 	bool                ze_vm_priv;
222 	bool                ze_clear_priv;
223 };
224 
225 #define Z_WMA_UNIT (1u << 8)
226 #define Z_WMA_MIX(base, e)  ((3 * (base) + (e) * Z_WMA_UNIT) / 4)
227 
228 struct zone {
229 	/*
230 	 * Readonly / rarely written fields
231 	 */
232 
233 	/*
234 	 * The first 4 fields match a zone_view.
235 	 *
236 	 * z_self points back to the zone when the zone is initialized,
237 	 * or is NULL else.
238 	 */
239 	struct zone        *z_self;
240 	zone_stats_t        z_stats;
241 	const char         *z_name;
242 	struct zone_view   *z_views;
243 	struct zone_expand *z_expander;
244 
245 	uint64_t            z_quo_magic;
246 	uint32_t            z_align_magic;
247 	uint16_t            z_elem_size;
248 	uint16_t            z_elem_offs;
249 	uint16_t            z_chunk_pages;
250 	uint16_t            z_chunk_elems;
251 
252 	uint32_t /* 32 bits */
253 	/*
254 	 * Lifecycle state (Mutable after creation)
255 	 */
256 	    z_destroyed        :1,  /* zone is (being) destroyed */
257 	    z_async_refilling  :1,  /* asynchronous allocation pending? */
258 	    z_depot_cleanup    :1,  /* per cpu depots need cleaning */
259 	    z_expanding_wait   :1,  /* is thread waiting for expansion? */
260 	    z_exhausted_wait   :1,  /* are threads waiting for exhaustion end */
261 	    z_exhausts         :1,  /* whether the zone exhausts by design */
262 
263 	/*
264 	 * Behavior configuration bits
265 	 */
266 	    z_percpu           :1,  /* the zone is percpu */
267 	    z_smr              :1,  /* the zone uses SMR */
268 	    z_permanent        :1,  /* the zone allocations are permanent */
269 	    z_nocaching        :1,  /* disallow zone caching for this zone */
270 	    collectable        :1,  /* garbage collect empty pages */
271 	    no_callout         :1,
272 	    z_destructible     :1,  /* zone can be zdestroy()ed  */
273 
274 	    _reserved          :8,
275 
276 	/*
277 	 * Debugging features
278 	 */
279 	    z_kasan_fakestacks :1,
280 	    z_kasan_quarantine :1,  /* whether to use the kasan quarantine */
281 	    z_tags_sizeclass   :6,  /* idx into zone_tags_sizeclasses to associate
282 	                             * sizeclass for a particualr kalloc tag */
283 	    z_uses_tags        :1,
284 	    z_log_on           :1,  /* zone logging was enabled by boot-arg */
285 	    z_tbi_tag          :1;  /* Zone supports tbi tagging */
286 
287 	uint8_t             z_cacheline1[0] __attribute__((aligned(64)));
288 
289 	/*
290 	 * Zone caching / recirculation cacheline
291 	 *
292 	 * z_recirc* fields are protected by the recirculation lock.
293 	 *
294 	 * z_recirc_cont_wma:
295 	 *   weighted moving average of the number of contentions per second,
296 	 *   in Z_WMA_UNIT units (fixed point decimal).
297 	 *
298 	 * z_recirc_cont_cur:
299 	 *   count of recorded contentions that will be fused
300 	 *   in z_recirc_cont_wma at the next period.
301 	 *
302 	 *   Note: if caching is disabled,
303 	 *   this field is used under the zone lock.
304 	 *
305 	 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
306 	 *   tracks the history of the minimum values of z_elems_free over time
307 	 *   with "min" being the minimum it hit for the current period,
308 	 *   and "wma" the weighted moving average of those value
309 	 *   (in Z_WMA_UNIT units).
310 	 *
311 	 *   This field is used if z_pcpu_cache is NULL,
312 	 *   otherwise it aliases with z_recirc_empty_{min,wma}
313 	 *
314 	 * z_recirc_{full,empty}_{min,wma}:
315 	 *   tracks the history of the the minimum number of full/empty
316 	 *   magazines in the depot over time, with "min" being the minimum
317 	 *   it hit for the current period, and "wma" the weighted moving
318 	 *   average of those value (in Z_WMA_UNIT units).
319 	 */
320 	struct zone_cache  *__zpercpu OS_PTRAUTH_SIGNED_PTR("zone.z_pcpu_cache") z_pcpu_cache;
321 	struct zone_depot   z_recirc;
322 
323 	hw_lck_ticket_t     z_recirc_lock;
324 	uint32_t            z_recirc_full_min;
325 	uint32_t            z_recirc_full_wma;
326 	union {
327 		uint32_t    z_recirc_empty_min;
328 		uint32_t    z_elems_free_min;
329 	};
330 	union {
331 		uint32_t    z_recirc_empty_wma;
332 		uint32_t    z_elems_free_wma;
333 	};
334 	uint32_t            z_recirc_cont_cur;
335 	uint32_t            z_recirc_cont_wma;
336 
337 	uint16_t            z_depot_size;
338 	uint16_t            z_depot_limit;
339 
340 	uint8_t             z_cacheline2[0] __attribute__((aligned(64)));
341 
342 	/*
343 	 * often mutated fields
344 	 */
345 
346 	hw_lck_ticket_t     z_lock;
347 
348 	/*
349 	 * Page accounting (wired / VA)
350 	 *
351 	 * Those numbers are unscaled for z_percpu zones
352 	 * (zone_scale_for_percpu() needs to be used to find the true value).
353 	 */
354 	uint32_t            z_wired_max;    /* how large can this zone grow        */
355 	uint32_t            z_wired_hwm;    /* z_wired_cur high watermark          */
356 	uint32_t            z_wired_cur;    /* number of pages used by this zone   */
357 	uint32_t            z_wired_empty;  /* pages collectable by GC             */
358 	uint32_t            z_va_cur;       /* amount of VA used by this zone      */
359 
360 	/*
361 	 * list of metadata structs, which maintain per-page free element lists
362 	 */
363 	zone_pva_t          z_pageq_empty;  /* populated, completely empty pages   */
364 	zone_pva_t          z_pageq_partial;/* populated, partially filled pages   */
365 	zone_pva_t          z_pageq_full;   /* populated, completely full pages    */
366 	zone_pva_t          z_pageq_va;     /* non-populated VA pages              */
367 
368 	/*
369 	 * Zone statistics
370 	 *
371 	 * z_elems_avail:
372 	 *   number of elements in the zone (at all).
373 	 */
374 	uint32_t            z_elems_free;   /* Number of free elements             */
375 	uint32_t            z_elems_avail;  /* Number of elements available        */
376 	uint32_t            z_elems_rsv;
377 	uint32_t            z_array_size_class;
378 
379 	struct zone        *z_kt_next;
380 
381 	uint8_t             z_cacheline3[0] __attribute__((aligned(64)));
382 
383 #if KASAN_CLASSIC
384 	uint16_t            z_kasan_redzone;
385 	spl_t               z_kasan_spl;
386 #endif
387 
388 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
389 	/*
390 	 * the allocation logs are used when:
391 	 *
392 	 * - zlog<n>= boot-args are used (and then z_log_on is set)
393 	 *
394 	 * - the leak detection was triggered for the zone.
395 	 *   In that case, the log can't ever be freed,
396 	 *   but it can be enabled/disabled dynamically.
397 	 */
398 	struct btlog       *z_btlog;
399 	struct btlog       *z_btlog_disabled;
400 #endif
401 } __attribute__((aligned((64))));
402 
403 /*!
404  * @typedef zone_security_flags_t
405  *
406  * @brief
407  * Type used to store the immutable security properties of a zone.
408  *
409  * @description
410  * These properties influence the security nature of a zone and can't be
411  * modified after lockdown.
412  */
413 typedef struct zone_security_flags {
414 	uint32_t
415 	/*
416 	 * Security sensitive configuration bits
417 	 */
418 	    z_submap_idx       :8,  /* a Z_SUBMAP_IDX_* value */
419 	    z_kheap_id         :3,  /* zone_kheap_id_t when part of a kalloc heap */
420 	    z_kalloc_type      :1,  /* zones that does types based seggregation */
421 	    z_lifo             :1,  /* depot and recirculation layer are LIFO */
422 	    z_submap_from_end  :1,  /* allocate from the left or the right ? */
423 	    z_noencrypt        :1,  /* do not encrypt pages when hibernating */
424 	    z_tag              :1,  /* zone supports TBI tagging */
425 	    z_unused           :16;
426 	/*
427 	 * Signature equivalance zone
428 	 */
429 	zone_id_t           z_sig_eq;
430 } zone_security_flags_t;
431 
432 
433 /*
434  * Zsecurity config to enable strict free of iokit objects to zone
435  * or heap they were allocated from.
436  *
437  * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
438  * not break third party kexts that haven't yet been recompiled
439  * to use the new iokit macros.
440  */
441 #if XNU_PLATFORM_MacOSX && __x86_64__
442 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           OFF
443 #else
444 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           ON
445 #endif
446 
447 /*
448  * Zsecurity config to enable the read-only allocator
449  */
450 #if KASAN_CLASSIC
451 #   define ZSECURITY_CONFIG_READ_ONLY                   OFF
452 #else
453 #   define ZSECURITY_CONFIG_READ_ONLY                   ON
454 #endif
455 
456 /*
457  * Zsecurity config to enable making heap feng-shui
458  * less reliable.
459  */
460 #if KASAN_CLASSIC
461 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               OFF
462 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             1
463 #else
464 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               ON
465 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             4
466 #endif
467 
468 /*
469  * Zsecurity config to enable adjusting of elements
470  * with PGZ-OOB to right-align them in their space.
471  */
472 #if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING
473 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              OFF
474 #else
475 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              ON
476 #endif
477 
478 /*
479  * Zsecurity config to enable kalloc type segregation
480  */
481 #if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
482 #   define ZSECURITY_CONFIG_KT_BUDGET                   120
483 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
484 #else
485 #   define ZSECURITY_CONFIG_KT_BUDGET                   260
486 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
487 #endif
488 
489 /*
490  * Zsecurity config to enable (KASAN) tagging of memory allocations
491  */
492 #if CONFIG_KERNEL_TAGGING
493 #   define ZSECURITY_CONFIG_ZONE_TAGGING                ON
494 #else
495 #   define ZSECURITY_CONFIG_ZONE_TAGGING                OFF
496 #endif
497 
498 
499 __options_decl(kalloc_type_options_t, uint64_t, {
500 	/*
501 	 * kalloc type option to switch default accounting to private.
502 	 */
503 	KT_OPTIONS_ACCT                         = 0x00000001,
504 	/*
505 	 * kalloc type option to print additional stats regarding zone
506 	 * budget distribution and signatures.
507 	 */
508 	KT_OPTIONS_DEBUG                        = 0x00000002,
509 	/*
510 	 * kalloc type option to allow loose freeing between heaps
511 	 */
512 	KT_OPTIONS_LOOSE_FREE                   = 0x00000004,
513 });
514 
515 __enum_decl(kt_var_heap_id_t, uint32_t, {
516 	/*
517 	 * Fake "data" heap used to link views of data-only allocation that
518 	 * have been redirected to KHEAP_DATA_BUFFERS
519 	 */
520 	KT_VAR_DATA_HEAP,
521 	/*
522 	 * Fake "data" heap used to link views of data-only allocation that
523 	 * have been redirected to KHEAP_DATA_SHARED
524 	 */
525 	KT_VAR_DATA_SHARED_HEAP,
526 	/*
527 	 * Heaps for pointer arrays
528 	 */
529 	KT_VAR_PTR_HEAP0,
530 	KT_VAR_PTR_HEAP1,
531 	/*
532 	 * Indicating first additional heap added
533 	 */
534 	KT_VAR__FIRST_FLEXIBLE_HEAP,
535 });
536 
537 /*
538  * Zone submap indices
539  *
540  * Z_SUBMAP_IDX_VM
541  * this map has the special property that its allocations
542  * can be done without ever locking the submap, and doesn't use
543  * VM entries in the map (which limits certain VM map operations on it).
544  *
545  * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
546  *
547  * On LP64 it is also used to restrict VM allocations on LP64 lower
548  * in the kernel VA space, for pointer packing purposes.
549  *
550  * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
551  * used for unrestricted allocations
552  *
553  * Z_SUBMAP_IDX_DATA
554  * used to sequester bags of bytes from all other allocations and allow VA reuse
555  * within the map
556  *
557  * Z_SUBMAP_IDX_READ_ONLY
558  * used for the read-only allocator
559  */
560 __enum_decl(zone_submap_idx_t, uint32_t, {
561 	Z_SUBMAP_IDX_VM,
562 	Z_SUBMAP_IDX_READ_ONLY,
563 	Z_SUBMAP_IDX_GENERAL_0,
564 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
565 	Z_SUBMAP_IDX_GENERAL_1,
566 	Z_SUBMAP_IDX_GENERAL_2,
567 	Z_SUBMAP_IDX_GENERAL_3,
568 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
569 	Z_SUBMAP_IDX_DATA,
570 
571 	Z_SUBMAP_IDX_COUNT,
572 });
573 
574 #define KALLOC_MINALIGN     (1 << KALLOC_LOG2_MINALIGN)
575 
576 /*
577  * Variable kalloc_type heap config
578  */
579 struct kheap_info {
580 	zone_id_t               kh_zstart;
581 	kalloc_heap_t           kh_views;
582 	kalloc_type_var_view_t  kt_views;
583 };
584 typedef union kalloc_type_views {
585 	struct kalloc_type_view     *ktv_fixed;
586 	struct kalloc_type_var_view *ktv_var;
587 } kalloc_type_views_t;
588 
589 #define KT_VAR_MAX_HEAPS 8
590 #define MAX_ZONES       690
591 extern struct kheap_info        kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
592 extern zone_id_t _Atomic        num_zones;
593 extern uint32_t                 zone_view_count;
594 extern struct zone              zone_array[MAX_ZONES];
595 extern struct zone_size_params  zone_ro_size_params[ZONE_ID__LAST_RO + 1];
596 extern zone_security_flags_t    zone_security_array[];
597 extern const char * const       kalloc_heap_names[KHEAP_ID_COUNT];
598 extern mach_memory_info_t      *panic_kext_memory_info;
599 extern vm_size_t                panic_kext_memory_size;
600 extern vm_offset_t              panic_fault_address;
601 extern uint16_t                 _zc_mag_size;
602 
603 #define zone_index_foreach(i) \
604 	for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
605 	    i < num_zones_##i; i++)
606 
607 #define zone_foreach(z) \
608 	for (zone_t z = &zone_array[1], \
609 	    last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
610 	    z < last_zone_##z; z++)
611 
612 __abortlike
613 extern void zone_invalid_panic(zone_t zone);
614 
615 __pure2
616 static inline zone_id_t
zone_index(zone_t z)617 zone_index(zone_t z)
618 {
619 	unsigned long delta;
620 	uint64_t quo;
621 
622 	delta = (unsigned long)z - (unsigned long)zone_array;
623 	if (delta >= MAX_ZONES * sizeof(*z)) {
624 		zone_invalid_panic(z);
625 	}
626 	quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
627 	__builtin_assume(quo < MAX_ZONES);
628 	return (zone_id_t)quo;
629 }
630 
631 __pure2
632 static inline bool
zone_is_ro(zone_t zone)633 zone_is_ro(zone_t zone)
634 {
635 	return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
636 	       zone <= &zone_array[ZONE_ID__LAST_RO];
637 }
638 
639 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)640 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
641 {
642 	return atop(addr ^ (addr + size - 1)) != 0;
643 }
644 
645 __pure2
646 static inline uint16_t
zone_elem_redzone(zone_t zone)647 zone_elem_redzone(zone_t zone)
648 {
649 #if KASAN_CLASSIC
650 	return zone->z_kasan_redzone;
651 #else
652 	(void)zone;
653 	return 0;
654 #endif
655 }
656 
657 __pure2
658 static inline uint16_t
zone_elem_inner_offs(zone_t zone)659 zone_elem_inner_offs(zone_t zone)
660 {
661 	return zone->z_elem_offs;
662 }
663 
664 __pure2
665 static inline uint16_t
zone_elem_outer_offs(zone_t zone)666 zone_elem_outer_offs(zone_t zone)
667 {
668 	return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
669 }
670 
671 __pure2
672 static inline vm_offset_t
zone_elem_inner_size(zone_t zone)673 zone_elem_inner_size(zone_t zone)
674 {
675 	return zone->z_elem_size;
676 }
677 
678 __pure2
679 static inline vm_offset_t
zone_elem_outer_size(zone_t zone)680 zone_elem_outer_size(zone_t zone)
681 {
682 	return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
683 }
684 
685 __pure2
686 static inline zone_security_flags_t
zone_security_config(zone_t z)687 zone_security_config(zone_t z)
688 {
689 	zone_id_t zid = zone_index(z);
690 	return zone_security_array[zid];
691 }
692 
693 static inline uint32_t
zone_count_free(zone_t zone)694 zone_count_free(zone_t zone)
695 {
696 	return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
697 }
698 
699 static inline uint32_t
zone_count_allocated(zone_t zone)700 zone_count_allocated(zone_t zone)
701 {
702 	return zone->z_elems_avail - zone_count_free(zone);
703 }
704 
705 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)706 zone_scale_for_percpu(zone_t zone, vm_size_t size)
707 {
708 	if (zone->z_percpu) {
709 		size *= zpercpu_count();
710 	}
711 	return size;
712 }
713 
714 static inline vm_size_t
zone_size_wired(zone_t zone)715 zone_size_wired(zone_t zone)
716 {
717 	/*
718 	 * this either require the zone lock,
719 	 * or to be used for statistics purposes only.
720 	 */
721 	vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
722 	return zone_scale_for_percpu(zone, size);
723 }
724 
725 static inline vm_size_t
zone_size_free(zone_t zone)726 zone_size_free(zone_t zone)
727 {
728 	return zone_scale_for_percpu(zone,
729 	           zone_elem_inner_size(zone) * zone_count_free(zone));
730 }
731 
732 /* Under KASAN builds, this also accounts for quarantined elements. */
733 static inline vm_size_t
zone_size_allocated(zone_t zone)734 zone_size_allocated(zone_t zone)
735 {
736 	return zone_scale_for_percpu(zone,
737 	           zone_elem_inner_size(zone) * zone_count_allocated(zone));
738 }
739 
740 static inline vm_size_t
zone_size_wasted(zone_t zone)741 zone_size_wasted(zone_t zone)
742 {
743 	return zone_size_wired(zone) - zone_scale_for_percpu(zone,
744 	           zone_elem_outer_size(zone) * zone->z_elems_avail);
745 }
746 
747 __pure2
748 static inline bool
zone_exhaustible(zone_t zone)749 zone_exhaustible(zone_t zone)
750 {
751 	return zone->z_wired_max != ~0u;
752 }
753 
754 __pure2
755 static inline bool
zone_exhausted(zone_t zone)756 zone_exhausted(zone_t zone)
757 {
758 	return zone->z_wired_cur >= zone->z_wired_max;
759 }
760 
761 /*
762  * Set and get the signature equivalance for the given zone
763  */
764 extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq);
765 extern zone_id_t zone_get_sig_eq(zone_t zone);
766 /*
767  * Return the accumulated allocated memory on the given zone stats
768  */
769 static inline vm_size_t
zone_stats_get_mem_allocated(zone_stats_t stats)770 zone_stats_get_mem_allocated(zone_stats_t stats)
771 {
772 	return stats->zs_mem_allocated;
773 }
774 
775 /*
776  * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
777  * userspace reboot is needed. The only other way to query for this information
778  * is via mach_memory_info() which is unavailable on release kernels.
779  */
780 extern uint64_t get_zones_collectable_bytes(void);
781 
782 /*!
783  * @enum zone_gc_level_t
784  *
785  * @const ZONE_GC_TRIM
786  * Request a trimming GC: it will trim allocations in excess
787  * of the working set size estimate only.
788  *
789  * @const ZONE_GC_DRAIN
790  * Request a draining GC: this is an aggressive mode that will
791  * cause all caches to be drained and all free pages returned to the system.
792  *
793  * @const ZONE_GC_JETSAM
794  * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
795  * @c ZONE_GC_DRAIN depending on the state of the zone map.
796  * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
797  * request a @c ZONE_GC_JETSAM level.
798  */
799 __enum_closed_decl(zone_gc_level_t, uint32_t, {
800 	ZONE_GC_TRIM,
801 	ZONE_GC_DRAIN,
802 	ZONE_GC_JETSAM,
803 });
804 
805 /*!
806  * @function zone_gc
807  *
808  * @brief
809  * Reduces memory used by zones by trimming caches and freelists.
810  *
811  * @discussion
812  * @c zone_gc() is called:
813  * - by the pageout daemon when the system needs more free pages.
814  * - by the VM when contiguous page allocation requests get stuck
815  *   (see vm_page_find_contiguous()).
816  *
817  * @param level         The zone GC level requested.
818  */
819 extern void     zone_gc(zone_gc_level_t level);
820 
821 #define ZONE_WSS_UPDATE_PERIOD  15
822 /*!
823  * @function compute_zone_working_set_size
824  *
825  * @brief
826  * Recomputes the working set size for every zone
827  *
828  * @discussion
829  * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
830  * computing an exponential moving average with a weight of 75%,
831  * so that the history of the last minute is the dominating factor.
832  */
833 extern void     compute_zone_working_set_size(void *);
834 
835 /* Debug logging for zone-map-exhaustion jetsams. */
836 extern void     get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
837 extern void     get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
838 
839 /* Bootstrap zone module (create zone zone) */
840 extern void     zone_bootstrap(void);
841 
842 /* Force-enable caching on a zone, generally unsafe to call directly */
843 extern void     zone_enable_caching(zone_t zone);
844 
845 /*!
846  * @function zone_early_mem_init
847  *
848  * @brief
849  * Steal memory from pmap (prior to initialization of zalloc)
850  * for the special vm zones that allow bootstrap memory and store
851  * the range so as to facilitate range checking in zfree.
852  *
853  * @param size              the size to steal (must be a page multiple)
854  */
855 __startup_func
856 extern vm_offset_t zone_early_mem_init(
857 	vm_size_t       size);
858 
859 /*!
860  * @function zone_get_early_alloc_size
861  *
862  * @brief
863  * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
864  * of the allocation granule for the zone with the given creation flags and
865  * element size.
866  */
867 __startup_func
868 extern vm_size_t zone_get_early_alloc_size(
869 	const char          *name __unused,
870 	vm_size_t            elem_size,
871 	zone_create_flags_t  flags,
872 	vm_size_t            min_elems);
873 
874 /*!
875  * @function zone_cram_early
876  *
877  * @brief
878  * Cram memory allocated with @c zone_early_mem_init() into a zone.
879  *
880  * @param zone          The zone to cram memory into.
881  * @param newmem        The base address for the memory to cram.
882  * @param size          The size of the memory to cram into the zone.
883  */
884 __startup_func
885 extern void     zone_cram_early(
886 	zone_t          zone,
887 	vm_offset_t     newmem,
888 	vm_size_t       size);
889 
890 extern bool     zone_maps_owned(
891 	vm_address_t    addr,
892 	vm_size_t       size);
893 
894 #if KASAN_LIGHT
895 extern bool     kasan_zone_maps_owned(
896 	vm_address_t    addr,
897 	vm_size_t       size);
898 #endif /* KASAN_LIGHT */
899 
900 extern void     zone_map_sizes(
901 	vm_map_size_t  *psize,
902 	vm_map_size_t  *pfree,
903 	vm_map_size_t  *plargest_free);
904 
905 extern bool
906 zone_map_nearing_exhaustion(void);
907 
908 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)909 zalloc_flags_get_tag(zalloc_flags_t flags)
910 {
911 	return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
912 }
913 
914 extern struct kalloc_result zalloc_ext(
915 	zone_t          zone,
916 	zone_stats_t    zstats,
917 	zalloc_flags_t  flags);
918 
919 #if KASAN
920 #define ZFREE_PACK_SIZE(esize, usize)   (((uint64_t)(usize) << 32) | (esize))
921 #define ZFREE_ELEM_SIZE(combined)       ((uint32_t)(combined))
922 #define ZFREE_USER_SIZE(combined)       ((combined) >> 32)
923 #else
924 #define ZFREE_PACK_SIZE(esize, usize)   (esize)
925 #define ZFREE_ELEM_SIZE(combined)       (combined)
926 #endif
927 
928 extern void     zfree_ext(
929 	zone_t          zone,
930 	zone_stats_t    zstats,
931 	void           *addr,
932 	uint64_t        combined_size);
933 
934 extern zone_id_t zone_id_for_element(
935 	void           *addr,
936 	vm_size_t       esize);
937 
938 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
939 extern void *zone_element_pgz_oob_adjust(
940 	void           *addr,
941 	vm_size_t       req_size,
942 	vm_size_t       elem_size);
943 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
944 
945 extern void zone_element_bounds_check(
946 	vm_address_t    addr,
947 	vm_size_t       len);
948 
949 extern vm_size_t zone_element_size(
950 	void           *addr,
951 	zone_t         *z,
952 	bool            clear_oob,
953 	vm_offset_t    *oob_offs);
954 
955 /*!
956  * @function zone_spans_ro_va
957  *
958  * @abstract
959  * This function is used to check whether the specified address range
960  * spans through the read-only zone range.
961  *
962  * @discussion
963  * This only checks for the range specified within ZONE_ADDR_READONLY.
964  * The parameters addr_start and addr_end are stripped off of PAC bits
965  * before the check is made.
966  */
967 extern bool zone_spans_ro_va(
968 	vm_offset_t     addr_start,
969 	vm_offset_t     addr_end);
970 
971 /*!
972  * @function __zalloc_ro_mut_atomic
973  *
974  * @abstract
975  * This function is called from the pmap to perform the specified atomic
976  * operation on memory from the read-only allocator.
977  *
978  * @discussion
979  * This function is for internal use only and should not be called directly.
980  */
981 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)982 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
983 {
984 #define __ZALLOC_RO_MUT_OP(op, op2) \
985 	case ZRO_ATOMIC_##op##_8: \
986 	        return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
987 	case ZRO_ATOMIC_##op##_16: \
988 	        return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
989 	case ZRO_ATOMIC_##op##_32: \
990 	        return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
991 	case ZRO_ATOMIC_##op##_64: \
992 	        return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
993 
994 	switch (op) {
995 		__ZALLOC_RO_MUT_OP(OR, or_orig);
996 		__ZALLOC_RO_MUT_OP(XOR, xor_orig);
997 		__ZALLOC_RO_MUT_OP(AND, and_orig);
998 		__ZALLOC_RO_MUT_OP(ADD, add_orig);
999 		__ZALLOC_RO_MUT_OP(XCHG, xchg);
1000 	default:
1001 		panic("%s: Invalid atomic operation: %d", __func__, op);
1002 	}
1003 
1004 #undef __ZALLOC_RO_MUT_OP
1005 }
1006 
1007 /*!
1008  * @function zone_owns
1009  *
1010  * @abstract
1011  * This function is a soft version of zone_require that checks if a given
1012  * pointer belongs to the specified zone and should not be used outside
1013  * allocator code.
1014  *
1015  * @discussion
1016  * Note that zone_owns() can only work with:
1017  * - zones not allowing foreign memory
1018  * - zones in the general submap.
1019  *
1020  * @param zone          the zone the address needs to belong to.
1021  * @param addr          the element address to check.
1022  */
1023 extern bool     zone_owns(
1024 	zone_t          zone,
1025 	void           *addr);
1026 
1027 /**!
1028  * @function zone_submap
1029  *
1030  * @param zsflags       the security flags of a specified zone.
1031  * @returns             the zone (sub)map this zone allocates from.
1032  */
1033 __pure2
1034 extern vm_map_t zone_submap(
1035 	zone_security_flags_t   zsflags);
1036 
1037 #ifndef VM_TAG_SIZECLASSES
1038 #error MAX_TAG_ZONES
1039 #endif
1040 #if VM_TAG_SIZECLASSES
1041 
1042 extern uint16_t zone_index_from_tag_index(
1043 	uint32_t        tag_zone_index);
1044 
1045 #endif /* VM_TAG_SIZECLASSES */
1046 
1047 extern lck_grp_t zone_locks_grp;
1048 
1049 static inline void
zone_lock(zone_t zone)1050 zone_lock(zone_t zone)
1051 {
1052 #if KASAN_FAKESTACK
1053 	spl_t s = 0;
1054 	if (zone->z_kasan_fakestacks) {
1055 		s = splsched();
1056 	}
1057 #endif /* KASAN_FAKESTACK */
1058 	hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
1059 #if KASAN_FAKESTACK
1060 	zone->z_kasan_spl = s;
1061 #endif /* KASAN_FAKESTACK */
1062 }
1063 
1064 static inline void
zone_unlock(zone_t zone)1065 zone_unlock(zone_t zone)
1066 {
1067 #if KASAN_FAKESTACK
1068 	spl_t s = zone->z_kasan_spl;
1069 	zone->z_kasan_spl = 0;
1070 #endif /* KASAN_FAKESTACK */
1071 	hw_lck_ticket_unlock(&zone->z_lock);
1072 #if KASAN_FAKESTACK
1073 	if (zone->z_kasan_fakestacks) {
1074 		splx(s);
1075 	}
1076 #endif /* KASAN_FAKESTACK */
1077 }
1078 
1079 int track_this_zone(const char *zonename, const char *logname);
1080 extern bool panic_include_kalloc_types;
1081 extern zone_t kalloc_type_src_zone;
1082 extern zone_t kalloc_type_dst_zone;
1083 
1084 #if DEBUG || DEVELOPMENT
1085 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1086 #endif /* DEBUG || DEVELOPMENT */
1087 
1088 __exported_pop
1089 
1090 __END_DECLS
1091 
1092 #endif  /* _KERN_ZALLOC_INTERNAL_H_ */
1093