xref: /xnu-10002.61.3/osfmk/kern/zalloc_internal.h (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61 
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65 
66 #include <os/atomic_private.h>
67 #include <sys/queue.h>
68 #include <vm/vm_map_internal.h>
69 
70 #if KASAN
71 #include <san/kasan.h>
72 #include <kern/spl.h>
73 #endif /* !KASAN */
74 
75 /*
76  * Disable zalloc zero validation under kasan as it is
77  * double-duty with what kasan already does.
78  */
79 #if KASAN
80 #define ZALLOC_ENABLE_ZERO_CHECK        0
81 #else
82 #define ZALLOC_ENABLE_ZERO_CHECK        1
83 #endif
84 
85 #if KASAN
86 #define ZALLOC_ENABLE_LOGGING           0
87 #elif DEBUG || DEVELOPMENT
88 #define ZALLOC_ENABLE_LOGGING           1
89 #else
90 #define ZALLOC_ENABLE_LOGGING           0
91 #endif
92 
93 /*!
94  * @file <kern/zalloc_internal.h>
95  *
96  * @abstract
97  * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
98  * kalloc subsystems.
99  */
100 
101 __BEGIN_DECLS
102 
103 #pragma GCC visibility push(hidden)
104 
105 /*
106  *	A zone is a collection of fixed size blocks for which there
107  *	is fast allocation/deallocation access.  Kernel routines can
108  *	use zones to manage data structures dynamically, creating a zone
109  *	for each type of data structure to be managed.
110  *
111  */
112 
113 /*!
114  * @typedef zone_pva_t
115  *
116  * @brief
117  * Type used to point to a page virtual address in the zone allocator.
118  *
119  * @description
120  * - Valid pages have the top bit set.
121  * - 0 represents the "NULL" page
122  * - non 0 values with the top bit cleared represent queue heads,
123  *   indexed from the beginning of the __DATA section of the kernel.
124  *   (see zone_pageq_base).
125  */
126 typedef struct zone_packed_virtual_address {
127 	uint32_t packed_address;
128 } zone_pva_t;
129 
130 /*!
131  * @struct zone_stats
132  *
133  * @abstract
134  * Per-cpu structure used for basic zone stats.
135  *
136  * @discussion
137  * The values aren't scaled for per-cpu zones.
138  */
139 struct zone_stats {
140 	uint64_t            zs_mem_allocated;
141 	uint64_t            zs_mem_freed;
142 	uint64_t            zs_alloc_fail;
143 	uint32_t            zs_alloc_rr;     /* allocation rr bias */
144 	uint32_t _Atomic    zs_alloc_not_shared;
145 };
146 
147 typedef struct zone_magazine *zone_magazine_t;
148 
149 /*!
150  * @struct zone_depot
151  *
152  * @abstract
153  * Holds a list of full and empty magazines.
154  *
155  * @discussion
156  * The data structure is a "STAILQ" and an "SLIST" combined with counters
157  * to know their lengths in O(1). Here is a graphical example:
158  *
159  *      zd_full = 3
160  *      zd_empty = 1
161  * ╭─── zd_head
162  * │ ╭─ zd_tail
163  * │ ╰────────────────────────────────────╮
164  * │    ╭───────╮   ╭───────╮   ╭───────╮ v ╭───────╮
165  * ╰───>│███████┼──>│███████┼──>│███████┼──>│       ┼─> X
166  *      ╰───────╯   ╰───────╯   ╰───────╯   ╰───────╯
167  */
168 struct zone_depot {
169 	uint32_t            zd_full;
170 	uint32_t            zd_empty;
171 	zone_magazine_t     zd_head;
172 	zone_magazine_t    *zd_tail;
173 };
174 
175 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
176 #define Z_MAGIC_QUO(s)      (((1ull << 32) - 1) / (uint64_t)(s) + 1)
177 #define Z_MAGIC_ALIGNED(s)  (~0u / (uint32_t)(s) + 1)
178 
179 /*
180  * Returns (offs / size) if offs is small enough
181  * and magic = Z_MAGIC_QUO(size)
182  */
183 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)184 Z_FAST_QUO(uint64_t offs, uint64_t magic)
185 {
186 	return (offs * magic) >> 32;
187 }
188 
189 /*
190  * Returns (offs % size) if offs is small enough
191  * and magic = Z_MAGIC_QUO(size)
192  */
193 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)194 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
195 {
196 	uint32_t lowbits = (uint32_t)(offs * magic);
197 
198 	return (lowbits * size) >> 32;
199 }
200 
201 /*
202  * Returns whether (offs % size) == 0 if offs is small enough
203  * and magic = Z_MAGIC_ALIGNED(size)
204  */
205 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)206 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
207 {
208 	return (uint32_t)(offs * magic) < magic;
209 }
210 
211 struct zone_size_params {
212 	uint32_t            z_align_magic;  /* magic to use with Z_FAST_ALIGNED()  */
213 	uint32_t            z_elem_size;    /* size of an element                  */
214 };
215 
216 struct zone_expand {
217 	struct zone_expand *ze_next;
218 	thread_t            ze_thread;
219 	bool                ze_pg_wait;
220 	bool                ze_vm_priv;
221 	bool                ze_clear_priv;
222 };
223 
224 #define Z_WMA_UNIT (1u << 8)
225 #define Z_WMA_MIX(base, e)  ((3 * (base) + (e) * Z_WMA_UNIT) / 4)
226 
227 struct zone {
228 	/*
229 	 * Readonly / rarely written fields
230 	 */
231 
232 	/*
233 	 * The first 4 fields match a zone_view.
234 	 *
235 	 * z_self points back to the zone when the zone is initialized,
236 	 * or is NULL else.
237 	 */
238 	struct zone        *z_self;
239 	zone_stats_t        z_stats;
240 	const char         *z_name;
241 	struct zone_view   *z_views;
242 	struct zone_expand *z_expander;
243 
244 	uint64_t            z_quo_magic;
245 	uint32_t            z_align_magic;
246 	uint16_t            z_elem_size;
247 	uint16_t            z_elem_offs;
248 	uint16_t            z_chunk_pages;
249 	uint16_t            z_chunk_elems;
250 
251 	uint32_t /* 32 bits */
252 	/*
253 	 * Lifecycle state (Mutable after creation)
254 	 */
255 	    z_destroyed        :1,  /* zone is (being) destroyed */
256 	    z_async_refilling  :1,  /* asynchronous allocation pending? */
257 	    z_depot_cleanup    :1,  /* per cpu depots need cleaning */
258 	    z_expanding_wait   :1,  /* is thread waiting for expansion? */
259 	    z_exhausted_wait   :1,  /* are threads waiting for exhaustion end */
260 	    z_exhausts         :1,  /* whether the zone exhausts by design */
261 
262 	/*
263 	 * Behavior configuration bits
264 	 */
265 	    z_percpu           :1,  /* the zone is percpu */
266 	    z_smr              :1,  /* the zone uses SMR */
267 	    z_permanent        :1,  /* the zone allocations are permanent */
268 	    z_nocaching        :1,  /* disallow zone caching for this zone */
269 	    collectable        :1,  /* garbage collect empty pages */
270 	    no_callout         :1,
271 	    z_destructible     :1,  /* zone can be zdestroy()ed  */
272 
273 	    _reserved          :6,
274 
275 	/*
276 	 * Debugging features
277 	 */
278 	    z_pgz_tracked      :1,  /* this zone is tracked by pgzalloc */
279 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
280 	    z_kasan_fakestacks :1,
281 	    z_kasan_quarantine :1,  /* whether to use the kasan quarantine */
282 	    z_tags_sizeclass   :6,  /* idx into zone_tags_sizeclasses to associate
283 	                             * sizeclass for a particualr kalloc tag */
284 	    z_uses_tags        :1,
285 	    z_log_on           :1,  /* zone logging was enabled by boot-arg */
286 	    z_tbi_tag          :1;  /* Zone supports tbi tagging */
287 
288 	uint8_t             z_cacheline1[0] __attribute__((aligned(64)));
289 
290 	/*
291 	 * Zone caching / recirculation cacheline
292 	 *
293 	 * z_recirc* fields are protected by the recirculation lock.
294 	 *
295 	 * z_recirc_cont_wma:
296 	 *   weighted moving average of the number of contentions per second,
297 	 *   in Z_WMA_UNIT units (fixed point decimal).
298 	 *
299 	 * z_recirc_cont_cur:
300 	 *   count of recorded contentions that will be fused
301 	 *   in z_recirc_cont_wma at the next period.
302 	 *
303 	 *   Note: if caching is disabled,
304 	 *   this field is used under the zone lock.
305 	 *
306 	 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
307 	 *   tracks the history of the minimum values of z_elems_free over time
308 	 *   with "min" being the minimum it hit for the current period,
309 	 *   and "wma" the weighted moving average of those value.
310 	 *
311 	 *   This field is used if z_pcpu_cache is NULL,
312 	 *   otherwise it aliases with z_recirc_empty_{min,wma}
313 	 *
314 	 * z_recirc_{full,empty}_{min,wma}:
315 	 *   tracks the history of the the minimum number of full/empty
316 	 *   magazines in the depot over time, with "min" being the minimum
317 	 *   it hit for the current period, and "wma" the weighted moving
318 	 *   average of those value.
319 	 */
320 	struct zone_cache  *__zpercpu z_pcpu_cache;
321 	struct zone_depot   z_recirc;
322 
323 	hw_lck_ticket_t     z_recirc_lock;
324 	uint32_t            z_recirc_full_min;
325 	uint32_t            z_recirc_full_wma;
326 	union {
327 		uint32_t    z_recirc_empty_min;
328 		uint32_t    z_elems_free_min;
329 	};
330 	union {
331 		uint32_t    z_recirc_empty_wma;
332 		uint32_t    z_elems_free_wma;
333 	};
334 	uint32_t            z_recirc_cont_cur;
335 	uint32_t            z_recirc_cont_wma;
336 
337 	uint16_t            z_depot_size;
338 	uint16_t            z_depot_limit;
339 
340 	uint8_t             z_cacheline2[0] __attribute__((aligned(64)));
341 
342 	/*
343 	 * often mutated fields
344 	 */
345 
346 	hw_lck_ticket_t     z_lock;
347 
348 	/*
349 	 * Page accounting (wired / VA)
350 	 *
351 	 * Those numbers are unscaled for z_percpu zones
352 	 * (zone_scale_for_percpu() needs to be used to find the true value).
353 	 */
354 	uint32_t            z_wired_max;    /* how large can this zone grow        */
355 	uint32_t            z_wired_hwm;    /* z_wired_cur high watermark          */
356 	uint32_t            z_wired_cur;    /* number of pages used by this zone   */
357 	uint32_t            z_wired_empty;  /* pages collectable by GC             */
358 	uint32_t            z_va_cur;       /* amount of VA used by this zone      */
359 
360 	/*
361 	 * list of metadata structs, which maintain per-page free element lists
362 	 */
363 	zone_pva_t          z_pageq_empty;  /* populated, completely empty pages   */
364 	zone_pva_t          z_pageq_partial;/* populated, partially filled pages   */
365 	zone_pva_t          z_pageq_full;   /* populated, completely full pages    */
366 	zone_pva_t          z_pageq_va;     /* non-populated VA pages              */
367 
368 	/*
369 	 * Zone statistics
370 	 *
371 	 * z_elems_avail:
372 	 *   number of elements in the zone (at all).
373 	 */
374 	uint32_t            z_elems_free;   /* Number of free elements             */
375 	uint32_t            z_elems_avail;  /* Number of elements available        */
376 	uint32_t            z_elems_rsv;
377 	uint32_t            z_array_size_class;
378 
379 	struct zone        *z_kt_next;
380 
381 	uint8_t             z_cacheline3[0] __attribute__((aligned(64)));
382 
383 #if KASAN_CLASSIC
384 	uint16_t            z_kasan_redzone;
385 	spl_t               z_kasan_spl;
386 #endif
387 
388 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
389 	/*
390 	 * the allocation logs are used when:
391 	 *
392 	 * - zlog<n>= boot-args are used (and then z_log_on is set)
393 	 *
394 	 * - the leak detection was triggered for the zone.
395 	 *   In that case, the log can't ever be freed,
396 	 *   but it can be enabled/disabled dynamically.
397 	 */
398 	struct btlog       *z_btlog;
399 	struct btlog       *z_btlog_disabled;
400 #endif
401 } __attribute__((aligned((64))));
402 
403 /*!
404  * @typedef zone_security_flags_t
405  *
406  * @brief
407  * Type used to store the immutable security properties of a zone.
408  *
409  * @description
410  * These properties influence the security nature of a zone and can't be
411  * modified after lockdown.
412  */
413 typedef struct zone_security_flags {
414 	uint16_t
415 	/*
416 	 * Security sensitive configuration bits
417 	 */
418 	    z_submap_idx       :8,  /* a Z_SUBMAP_IDX_* value */
419 	    z_kheap_id         :2,  /* zone_kheap_id_t when part of a kalloc heap */
420 	    z_kalloc_type      :1,  /* zones that does types based seggregation */
421 	    z_lifo             :1,  /* depot and recirculation layer are LIFO */
422 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
423 	    z_submap_from_end  :1,  /* allocate from the left or the right ? */
424 	    z_noencrypt        :1,  /* do not encrypt pages when hibernating */
425 	    z_unused           :1;
426 	/*
427 	 * Signature equivalance zone
428 	 */
429 	zone_id_t           z_sig_eq;
430 } zone_security_flags_t;
431 
432 
433 /*
434  * Zsecurity config to enable strict free of iokit objects to zone
435  * or heap they were allocated from.
436  *
437  * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
438  * not break third party kexts that haven't yet been recompiled
439  * to use the new iokit macros.
440  */
441 #if XNU_PLATFORM_MacOSX && __x86_64__
442 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           OFF
443 #else
444 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           ON
445 #endif
446 
447 /*
448  * Zsecurity config to enable the read-only allocator
449  */
450 #if KASAN_CLASSIC
451 #   define ZSECURITY_CONFIG_READ_ONLY                   OFF
452 #else
453 #   define ZSECURITY_CONFIG_READ_ONLY                   ON
454 #endif
455 
456 /*
457  * Zsecurity config to enable making heap feng-shui
458  * less reliable.
459  */
460 #if KASAN_CLASSIC
461 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               OFF
462 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             1
463 #else
464 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               ON
465 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             4
466 #endif
467 
468 /*
469  * Zsecurity config to enable adjusting of elements
470  * with PGZ-OOB to right-align them in their space.
471  */
472 #if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING
473 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              OFF
474 #else
475 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              ON
476 #endif
477 
478 /*
479  * Zsecurity config to enable kalloc type segregation
480  */
481 #if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
482 #   define ZSECURITY_CONFIG_KT_BUDGET                   120
483 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
484 #else
485 #   define ZSECURITY_CONFIG_KT_BUDGET                   260
486 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               6
487 #endif
488 
489 
490 __options_decl(kalloc_type_options_t, uint64_t, {
491 	/*
492 	 * kalloc type option to switch default accounting to private.
493 	 */
494 	KT_OPTIONS_ACCT                         = 0x00000001,
495 	/*
496 	 * kalloc type option to print additional stats regarding zone
497 	 * budget distribution and signatures.
498 	 */
499 	KT_OPTIONS_DEBUG                        = 0x00000002,
500 	/*
501 	 * kalloc type option to allow loose freeing between heaps
502 	 */
503 	KT_OPTIONS_LOOSE_FREE                   = 0x00000004,
504 });
505 
506 __enum_decl(kt_var_heap_id_t, uint32_t, {
507 	/*
508 	 * Fake "data" heap used to link views of data-only allocation that
509 	 * have been redirected to KHEAP_DATA_BUFFERS
510 	 */
511 	KT_VAR_DATA_HEAP,
512 	/*
513 	 * Heaps for pointer arrays
514 	 */
515 	KT_VAR_PTR_HEAP0,
516 	KT_VAR_PTR_HEAP1,
517 	/*
518 	 * Indicating first additional heap added
519 	 */
520 	KT_VAR__FIRST_FLEXIBLE_HEAP,
521 });
522 
523 /*
524  * Zone submap indices
525  *
526  * Z_SUBMAP_IDX_VM
527  * this map has the special property that its allocations
528  * can be done without ever locking the submap, and doesn't use
529  * VM entries in the map (which limits certain VM map operations on it).
530  *
531  * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
532  *
533  * On LP64 it is also used to restrict VM allocations on LP64 lower
534  * in the kernel VA space, for pointer packing purposes.
535  *
536  * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
537  * used for unrestricted allocations
538  *
539  * Z_SUBMAP_IDX_DATA
540  * used to sequester bags of bytes from all other allocations and allow VA reuse
541  * within the map
542  *
543  * Z_SUBMAP_IDX_READ_ONLY
544  * used for the read-only allocator
545  */
546 __enum_decl(zone_submap_idx_t, uint32_t, {
547 	Z_SUBMAP_IDX_VM,
548 	Z_SUBMAP_IDX_READ_ONLY,
549 	Z_SUBMAP_IDX_GENERAL_0,
550 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
551 	Z_SUBMAP_IDX_GENERAL_1,
552 	Z_SUBMAP_IDX_GENERAL_2,
553 	Z_SUBMAP_IDX_GENERAL_3,
554 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
555 	Z_SUBMAP_IDX_DATA,
556 
557 	Z_SUBMAP_IDX_COUNT,
558 });
559 
560 #define KALLOC_MINALIGN     (1 << KALLOC_LOG2_MINALIGN)
561 
562 /*
563  * Variable kalloc_type heap config
564  */
565 struct kheap_info {
566 	zone_id_t               kh_zstart;
567 	kalloc_heap_t           kh_views;
568 	kalloc_type_var_view_t  kt_views;
569 };
570 typedef union kalloc_type_views {
571 	struct kalloc_type_view     *ktv_fixed;
572 	struct kalloc_type_var_view *ktv_var;
573 } kalloc_type_views_t;
574 
575 #define KT_VAR_MAX_HEAPS 8
576 #define MAX_ZONES       690
577 extern struct kheap_info        kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
578 extern zone_id_t _Atomic        num_zones;
579 extern uint32_t                 zone_view_count;
580 extern struct zone              zone_array[MAX_ZONES];
581 extern struct zone_size_params  zone_ro_size_params[ZONE_ID__LAST_RO + 1];
582 extern zone_security_flags_t    zone_security_array[];
583 extern const char * const       kalloc_heap_names[KHEAP_ID_COUNT];
584 extern mach_memory_info_t      *panic_kext_memory_info;
585 extern vm_size_t                panic_kext_memory_size;
586 extern vm_offset_t              panic_fault_address;
587 extern uint16_t                 _zc_mag_size;
588 
589 #define zone_index_foreach(i) \
590 	for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
591 	    i < num_zones_##i; i++)
592 
593 #define zone_foreach(z) \
594 	for (zone_t z = &zone_array[1], \
595 	    last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
596 	    z < last_zone_##z; z++)
597 
598 __abortlike
599 extern void zone_invalid_panic(zone_t zone);
600 
601 __pure2
602 static inline zone_id_t
zone_index(zone_t z)603 zone_index(zone_t z)
604 {
605 	unsigned long delta;
606 	uint64_t quo;
607 
608 	delta = (unsigned long)z - (unsigned long)zone_array;
609 	if (delta >= MAX_ZONES * sizeof(*z)) {
610 		zone_invalid_panic(z);
611 	}
612 	quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
613 	__builtin_assume(quo < MAX_ZONES);
614 	return (zone_id_t)quo;
615 }
616 
617 __pure2
618 static inline bool
zone_is_ro(zone_t zone)619 zone_is_ro(zone_t zone)
620 {
621 	return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
622 	       zone <= &zone_array[ZONE_ID__LAST_RO];
623 }
624 
625 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)626 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
627 {
628 	return atop(addr ^ (addr + size - 1)) != 0;
629 }
630 
631 __pure2
632 static inline uint16_t
zone_elem_redzone(zone_t zone)633 zone_elem_redzone(zone_t zone)
634 {
635 #if KASAN_CLASSIC
636 	return zone->z_kasan_redzone;
637 #else
638 	(void)zone;
639 	return 0;
640 #endif
641 }
642 
643 __pure2
644 static inline uint16_t
zone_elem_inner_offs(zone_t zone)645 zone_elem_inner_offs(zone_t zone)
646 {
647 	return zone->z_elem_offs;
648 }
649 
650 __pure2
651 static inline uint16_t
zone_elem_outer_offs(zone_t zone)652 zone_elem_outer_offs(zone_t zone)
653 {
654 	return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
655 }
656 
657 __pure2
658 static inline vm_offset_t
zone_elem_inner_size(zone_t zone)659 zone_elem_inner_size(zone_t zone)
660 {
661 	return zone->z_elem_size;
662 }
663 
664 __pure2
665 static inline vm_offset_t
zone_elem_outer_size(zone_t zone)666 zone_elem_outer_size(zone_t zone)
667 {
668 	return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
669 }
670 
671 __pure2
672 static inline zone_security_flags_t
zone_security_config(zone_t z)673 zone_security_config(zone_t z)
674 {
675 	zone_id_t zid = zone_index(z);
676 	return zone_security_array[zid];
677 }
678 
679 static inline uint32_t
zone_count_free(zone_t zone)680 zone_count_free(zone_t zone)
681 {
682 	return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
683 }
684 
685 static inline uint32_t
zone_count_allocated(zone_t zone)686 zone_count_allocated(zone_t zone)
687 {
688 	return zone->z_elems_avail - zone_count_free(zone);
689 }
690 
691 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)692 zone_scale_for_percpu(zone_t zone, vm_size_t size)
693 {
694 	if (zone->z_percpu) {
695 		size *= zpercpu_count();
696 	}
697 	return size;
698 }
699 
700 static inline vm_size_t
zone_size_wired(zone_t zone)701 zone_size_wired(zone_t zone)
702 {
703 	/*
704 	 * this either require the zone lock,
705 	 * or to be used for statistics purposes only.
706 	 */
707 	vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
708 	return zone_scale_for_percpu(zone, size);
709 }
710 
711 static inline vm_size_t
zone_size_free(zone_t zone)712 zone_size_free(zone_t zone)
713 {
714 	return zone_scale_for_percpu(zone,
715 	           zone_elem_inner_size(zone) * zone_count_free(zone));
716 }
717 
718 /* Under KASAN builds, this also accounts for quarantined elements. */
719 static inline vm_size_t
zone_size_allocated(zone_t zone)720 zone_size_allocated(zone_t zone)
721 {
722 	return zone_scale_for_percpu(zone,
723 	           zone_elem_inner_size(zone) * zone_count_allocated(zone));
724 }
725 
726 static inline vm_size_t
zone_size_wasted(zone_t zone)727 zone_size_wasted(zone_t zone)
728 {
729 	return zone_size_wired(zone) - zone_scale_for_percpu(zone,
730 	           zone_elem_outer_size(zone) * zone->z_elems_avail);
731 }
732 
733 __pure2
734 static inline bool
zone_exhaustible(zone_t zone)735 zone_exhaustible(zone_t zone)
736 {
737 	return zone->z_wired_max != ~0u;
738 }
739 
740 __pure2
741 static inline bool
zone_exhausted(zone_t zone)742 zone_exhausted(zone_t zone)
743 {
744 	return zone->z_wired_cur >= zone->z_wired_max;
745 }
746 
747 /*
748  * Set and get the signature equivalance for the given zone
749  */
750 extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq);
751 extern zone_id_t zone_get_sig_eq(zone_t zone);
752 /*
753  * Return the accumulated allocated memory on the given zone stats
754  */
755 static inline vm_size_t
zone_stats_get_mem_allocated(zone_stats_t stats)756 zone_stats_get_mem_allocated(zone_stats_t stats)
757 {
758 	return stats->zs_mem_allocated;
759 }
760 
761 /*
762  * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
763  * userspace reboot is needed. The only other way to query for this information
764  * is via mach_memory_info() which is unavailable on release kernels.
765  */
766 extern uint64_t get_zones_collectable_bytes(void);
767 
768 /*!
769  * @enum zone_gc_level_t
770  *
771  * @const ZONE_GC_TRIM
772  * Request a trimming GC: it will trim allocations in excess
773  * of the working set size estimate only.
774  *
775  * @const ZONE_GC_DRAIN
776  * Request a draining GC: this is an aggressive mode that will
777  * cause all caches to be drained and all free pages returned to the system.
778  *
779  * @const ZONE_GC_JETSAM
780  * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
781  * @c ZONE_GC_DRAIN depending on the state of the zone map.
782  * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
783  * request a @c ZONE_GC_JETSAM level.
784  */
785 __enum_closed_decl(zone_gc_level_t, uint32_t, {
786 	ZONE_GC_TRIM,
787 	ZONE_GC_DRAIN,
788 	ZONE_GC_JETSAM,
789 });
790 
791 /*!
792  * @function zone_gc
793  *
794  * @brief
795  * Reduces memory used by zones by trimming caches and freelists.
796  *
797  * @discussion
798  * @c zone_gc() is called:
799  * - by the pageout daemon when the system needs more free pages.
800  * - by the VM when contiguous page allocation requests get stuck
801  *   (see vm_page_find_contiguous()).
802  *
803  * @param level         The zone GC level requested.
804  */
805 extern void     zone_gc(zone_gc_level_t level);
806 
807 extern void     zone_gc_trim(void);
808 extern void     zone_gc_drain(void);
809 
810 #define ZONE_WSS_UPDATE_PERIOD  15
811 /*!
812  * @function compute_zone_working_set_size
813  *
814  * @brief
815  * Recomputes the working set size for every zone
816  *
817  * @discussion
818  * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
819  * computing an exponential moving average with a weight of 75%,
820  * so that the history of the last minute is the dominating factor.
821  */
822 extern void     compute_zone_working_set_size(void *);
823 
824 /* Debug logging for zone-map-exhaustion jetsams. */
825 extern void     get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
826 extern void     get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
827 
828 /* Bootstrap zone module (create zone zone) */
829 extern void     zone_bootstrap(void);
830 
831 /* Force-enable caching on a zone, generally unsafe to call directly */
832 extern void     zone_enable_caching(zone_t zone);
833 
834 /*!
835  * @function zone_early_mem_init
836  *
837  * @brief
838  * Steal memory from pmap (prior to initialization of zalloc)
839  * for the special vm zones that allow bootstrap memory and store
840  * the range so as to facilitate range checking in zfree.
841  *
842  * @param size              the size to steal (must be a page multiple)
843  */
844 __startup_func
845 extern vm_offset_t zone_early_mem_init(
846 	vm_size_t       size);
847 
848 /*!
849  * @function zone_get_early_alloc_size
850  *
851  * @brief
852  * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
853  * of the allocation granule for the zone with the given creation flags and
854  * element size.
855  */
856 __startup_func
857 extern vm_size_t zone_get_early_alloc_size(
858 	const char          *name __unused,
859 	vm_size_t            elem_size,
860 	zone_create_flags_t  flags,
861 	vm_size_t            min_elems);
862 
863 /*!
864  * @function zone_cram_early
865  *
866  * @brief
867  * Cram memory allocated with @c zone_early_mem_init() into a zone.
868  *
869  * @param zone          The zone to cram memory into.
870  * @param newmem        The base address for the memory to cram.
871  * @param size          The size of the memory to cram into the zone.
872  */
873 __startup_func
874 extern void     zone_cram_early(
875 	zone_t          zone,
876 	vm_offset_t     newmem,
877 	vm_size_t       size);
878 
879 extern bool     zone_maps_owned(
880 	vm_address_t    addr,
881 	vm_size_t       size);
882 
883 #if KASAN_LIGHT
884 extern bool     kasan_zone_maps_owned(
885 	vm_address_t    addr,
886 	vm_size_t       size);
887 #endif /* KASAN_LIGHT */
888 
889 extern void     zone_map_sizes(
890 	vm_map_size_t  *psize,
891 	vm_map_size_t  *pfree,
892 	vm_map_size_t  *plargest_free);
893 
894 extern bool
895 zone_map_nearing_exhaustion(void);
896 
897 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)898 zalloc_flags_get_tag(zalloc_flags_t flags)
899 {
900 	return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
901 }
902 
903 extern struct kalloc_result zalloc_ext(
904 	zone_t          zone,
905 	zone_stats_t    zstats,
906 	zalloc_flags_t  flags);
907 
908 #if KASAN
909 #define ZFREE_PACK_SIZE(esize, usize)   (((uint64_t)(usize) << 32) | (esize))
910 #define ZFREE_ELEM_SIZE(combined)       ((uint32_t)(combined))
911 #define ZFREE_USER_SIZE(combined)       ((combined) >> 32)
912 #else
913 #define ZFREE_PACK_SIZE(esize, usize)   (esize)
914 #define ZFREE_ELEM_SIZE(combined)       (combined)
915 #endif
916 
917 extern void     zfree_ext(
918 	zone_t          zone,
919 	zone_stats_t    zstats,
920 	void           *addr,
921 	uint64_t        combined_size);
922 
923 extern zone_id_t zone_id_for_element(
924 	void           *addr,
925 	vm_size_t       esize);
926 
927 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
928 extern void *zone_element_pgz_oob_adjust(
929 	void           *addr,
930 	vm_size_t       req_size,
931 	vm_size_t       elem_size);
932 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
933 
934 extern void zone_element_bounds_check(
935 	vm_address_t    addr,
936 	vm_size_t       len);
937 
938 extern vm_size_t zone_element_size(
939 	void           *addr,
940 	zone_t         *z,
941 	bool            clear_oob,
942 	vm_offset_t    *oob_offs);
943 
944 /*!
945  * @function zone_spans_ro_va
946  *
947  * @abstract
948  * This function is used to check whether the specified address range
949  * spans through the read-only zone range.
950  *
951  * @discussion
952  * This only checks for the range specified within ZONE_ADDR_READONLY.
953  * The parameters addr_start and addr_end are stripped off of PAC bits
954  * before the check is made.
955  */
956 extern bool zone_spans_ro_va(
957 	vm_offset_t     addr_start,
958 	vm_offset_t     addr_end);
959 
960 /*!
961  * @function __zalloc_ro_mut_atomic
962  *
963  * @abstract
964  * This function is called from the pmap to perform the specified atomic
965  * operation on memory from the read-only allocator.
966  *
967  * @discussion
968  * This function is for internal use only and should not be called directly.
969  */
970 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)971 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
972 {
973 #define __ZALLOC_RO_MUT_OP(op, op2) \
974 	case ZRO_ATOMIC_##op##_8: \
975 	        return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
976 	case ZRO_ATOMIC_##op##_16: \
977 	        return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
978 	case ZRO_ATOMIC_##op##_32: \
979 	        return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
980 	case ZRO_ATOMIC_##op##_64: \
981 	        return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
982 
983 	switch (op) {
984 		__ZALLOC_RO_MUT_OP(OR, or_orig);
985 		__ZALLOC_RO_MUT_OP(XOR, xor_orig);
986 		__ZALLOC_RO_MUT_OP(AND, and_orig);
987 		__ZALLOC_RO_MUT_OP(ADD, add_orig);
988 		__ZALLOC_RO_MUT_OP(XCHG, xchg);
989 	default:
990 		panic("%s: Invalid atomic operation: %d", __func__, op);
991 	}
992 
993 #undef __ZALLOC_RO_MUT_OP
994 }
995 
996 /*!
997  * @function zone_owns
998  *
999  * @abstract
1000  * This function is a soft version of zone_require that checks if a given
1001  * pointer belongs to the specified zone and should not be used outside
1002  * allocator code.
1003  *
1004  * @discussion
1005  * Note that zone_owns() can only work with:
1006  * - zones not allowing foreign memory
1007  * - zones in the general submap.
1008  *
1009  * @param zone          the zone the address needs to belong to.
1010  * @param addr          the element address to check.
1011  */
1012 extern bool     zone_owns(
1013 	zone_t          zone,
1014 	void           *addr);
1015 
1016 /**!
1017  * @function zone_submap
1018  *
1019  * @param zsflags       the security flags of a specified zone.
1020  * @returns             the zone (sub)map this zone allocates from.
1021  */
1022 __pure2
1023 extern vm_map_t zone_submap(
1024 	zone_security_flags_t   zsflags);
1025 
1026 #ifndef VM_TAG_SIZECLASSES
1027 #error MAX_TAG_ZONES
1028 #endif
1029 #if VM_TAG_SIZECLASSES
1030 
1031 extern uint16_t zone_index_from_tag_index(
1032 	uint32_t        tag_zone_index);
1033 
1034 #endif /* VM_TAG_SIZECLASSES */
1035 
1036 extern lck_grp_t zone_locks_grp;
1037 
1038 static inline void
zone_lock(zone_t zone)1039 zone_lock(zone_t zone)
1040 {
1041 #if KASAN_FAKESTACK
1042 	spl_t s = 0;
1043 	if (zone->z_kasan_fakestacks) {
1044 		s = splsched();
1045 	}
1046 #endif /* KASAN_FAKESTACK */
1047 	hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
1048 #if KASAN_FAKESTACK
1049 	zone->z_kasan_spl = s;
1050 #endif /* KASAN_FAKESTACK */
1051 }
1052 
1053 static inline void
zone_unlock(zone_t zone)1054 zone_unlock(zone_t zone)
1055 {
1056 #if KASAN_FAKESTACK
1057 	spl_t s = zone->z_kasan_spl;
1058 	zone->z_kasan_spl = 0;
1059 #endif /* KASAN_FAKESTACK */
1060 	hw_lck_ticket_unlock(&zone->z_lock);
1061 #if KASAN_FAKESTACK
1062 	if (zone->z_kasan_fakestacks) {
1063 		splx(s);
1064 	}
1065 #endif /* KASAN_FAKESTACK */
1066 }
1067 
1068 #define MAX_ZONE_NAME   32      /* max length of a zone name we can take from the boot-args */
1069 
1070 int track_this_zone(const char *zonename, const char *logname);
1071 extern bool panic_include_kalloc_types;
1072 extern zone_t kalloc_type_src_zone;
1073 extern zone_t kalloc_type_dst_zone;
1074 
1075 #if DEBUG || DEVELOPMENT
1076 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1077 #endif /* DEBUG || DEVELOPMENT */
1078 
1079 #pragma GCC visibility pop
1080 
1081 __END_DECLS
1082 
1083 #endif  /* _KERN_ZALLOC_INTERNAL_H_ */
1084