xref: /xnu-8796.121.2/osfmk/kern/zalloc_internal.h (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61 
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65 
66 #include <os/atomic_private.h>
67 #include <sys/queue.h>
68 #include <vm/vm_map_internal.h>
69 
70 #if KASAN
71 #include <san/kasan.h>
72 #include <kern/spl.h>
73 #endif /* !KASAN */
74 
75 /*
76  * Disable zalloc zero validation under kasan as it is
77  * double-duty with what kasan already does.
78  */
79 #if KASAN
80 #define ZALLOC_ENABLE_ZERO_CHECK        0
81 #else
82 #define ZALLOC_ENABLE_ZERO_CHECK        1
83 #endif
84 
85 #if KASAN
86 #define ZALLOC_ENABLE_LOGGING           0
87 #elif DEBUG || DEVELOPMENT
88 #define ZALLOC_ENABLE_LOGGING           1
89 #else
90 #define ZALLOC_ENABLE_LOGGING           0
91 #endif
92 
93 /*!
94  * @file <kern/zalloc_internal.h>
95  *
96  * @abstract
97  * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
98  * kalloc subsystems.
99  */
100 
101 __BEGIN_DECLS
102 
103 #pragma GCC visibility push(hidden)
104 
105 /*
106  *	A zone is a collection of fixed size blocks for which there
107  *	is fast allocation/deallocation access.  Kernel routines can
108  *	use zones to manage data structures dynamically, creating a zone
109  *	for each type of data structure to be managed.
110  *
111  */
112 
113 /*!
114  * @typedef zone_pva_t
115  *
116  * @brief
117  * Type used to point to a page virtual address in the zone allocator.
118  *
119  * @description
120  * - Valid pages have the top bit set.
121  * - 0 represents the "NULL" page
122  * - non 0 values with the top bit cleared represent queue heads,
123  *   indexed from the beginning of the __DATA section of the kernel.
124  *   (see zone_pageq_base).
125  */
126 typedef struct zone_packed_virtual_address {
127 	uint32_t packed_address;
128 } zone_pva_t;
129 
130 /*!
131  * @struct zone_stats
132  *
133  * @abstract
134  * Per-cpu structure used for basic zone stats.
135  *
136  * @discussion
137  * The values aren't scaled for per-cpu zones.
138  */
139 struct zone_stats {
140 	uint64_t            zs_mem_allocated;
141 	uint64_t            zs_mem_freed;
142 	uint64_t            zs_alloc_fail;
143 	uint32_t            zs_alloc_rr;     /* allocation rr bias */
144 };
145 
146 typedef struct zone_magazine *zone_magazine_t;
147 
148 /*!
149  * @struct zone_depot
150  *
151  * @abstract
152  * Holds a list of full and empty magazines.
153  *
154  * @discussion
155  * The data structure is a "STAILQ" and an "SLIST" combined with counters
156  * to know their lengths in O(1). Here is a graphical example:
157  *
158  *      zd_full = 3
159  *      zd_empty = 1
160  * ╭─── zd_head
161  * │ ╭─ zd_tail
162  * │ ╰────────────────────────────────────╮
163  * │    ╭───────╮   ╭───────╮   ╭───────╮ v ╭───────╮
164  * ╰───>│███████┼──>│███████┼──>│███████┼──>│       ┼─> X
165  *      ╰───────╯   ╰───────╯   ╰───────╯   ╰───────╯
166  */
167 struct zone_depot {
168 	uint32_t            zd_full;
169 	uint32_t            zd_empty;
170 	zone_magazine_t     zd_head;
171 	zone_magazine_t    *zd_tail;
172 };
173 
174 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
175 #define Z_MAGIC_QUO(s)      (((1ull << 32) - 1) / (uint64_t)(s) + 1)
176 #define Z_MAGIC_ALIGNED(s)  (~0u / (uint32_t)(s) + 1)
177 
178 /*
179  * Returns (offs / size) if offs is small enough
180  * and magic = Z_MAGIC_QUO(size)
181  */
182 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)183 Z_FAST_QUO(uint64_t offs, uint64_t magic)
184 {
185 	return (offs * magic) >> 32;
186 }
187 
188 /*
189  * Returns (offs % size) if offs is small enough
190  * and magic = Z_MAGIC_QUO(size)
191  */
192 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)193 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
194 {
195 	uint32_t lowbits = (uint32_t)(offs * magic);
196 
197 	return (lowbits * size) >> 32;
198 }
199 
200 /*
201  * Returns whether (offs % size) == 0 if offs is small enough
202  * and magic = Z_MAGIC_ALIGNED(size)
203  */
204 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)205 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
206 {
207 	return (uint32_t)(offs * magic) < magic;
208 }
209 
210 struct zone_size_params {
211 	uint32_t            z_align_magic;  /* magic to use with Z_FAST_ALIGNED()  */
212 	uint32_t            z_elem_size;    /* size of an element                  */
213 };
214 
215 struct zone_expand {
216 	struct zone_expand *ze_next;
217 	thread_t            ze_thread;
218 	bool                ze_pg_wait;
219 	bool                ze_vm_priv;
220 	bool                ze_clear_priv;
221 };
222 
223 #define Z_WMA_UNIT (1u << 8)
224 #define Z_WMA_MIX(base, e)  ((3 * (base) + (e) * Z_WMA_UNIT) / 4)
225 
226 struct zone {
227 	/*
228 	 * Readonly / rarely written fields
229 	 */
230 
231 	/*
232 	 * The first 4 fields match a zone_view.
233 	 *
234 	 * z_self points back to the zone when the zone is initialized,
235 	 * or is NULL else.
236 	 */
237 	struct zone        *z_self;
238 	zone_stats_t        z_stats;
239 	const char         *z_name;
240 	struct zone_view   *z_views;
241 	struct zone_expand *z_expander;
242 
243 	uint64_t            z_quo_magic;
244 	uint32_t            z_align_magic;
245 	uint16_t            z_elem_size;
246 	uint16_t            z_elem_offs;
247 	uint16_t            z_chunk_pages;
248 	uint16_t            z_chunk_elems;
249 
250 	uint32_t /* 32 bits */
251 	/*
252 	 * Lifecycle state (Mutable after creation)
253 	 */
254 	    z_destroyed        :1,  /* zone is (being) destroyed */
255 	    z_async_refilling  :1,  /* asynchronous allocation pending? */
256 	    z_depot_cleanup    :1,  /* per cpu depots need cleaning */
257 	    z_expanding_wait   :1,  /* is thread waiting for expansion? */
258 
259 	/*
260 	 * Behavior configuration bits
261 	 */
262 	    z_percpu           :1,  /* the zone is percpu */
263 	    z_smr              :1,  /* the zone uses SMR */
264 	    z_permanent        :1,  /* the zone allocations are permanent */
265 	    z_nocaching        :1,  /* disallow zone caching for this zone */
266 	    collectable        :1,  /* garbage collect empty pages */
267 	    exhaustible        :1,  /* merely return if empty? */
268 	    no_callout         :1,
269 	    z_destructible     :1,  /* zone can be zdestroy()ed  */
270 
271 	    _reserved          :7,
272 
273 	/*
274 	 * Debugging features
275 	 */
276 	    z_pgz_tracked      :1,  /* this zone is tracked by pgzalloc */
277 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
278 	    z_kasan_fakestacks :1,
279 	    z_kasan_quarantine :1,  /* whether to use the kasan quarantine */
280 	    z_tags_sizeclass   :6,  /* idx into zone_tags_sizeclasses to associate
281 	                             * sizeclass for a particualr kalloc tag */
282 	    z_uses_tags        :1,
283 	    z_log_on           :1,  /* zone logging was enabled by boot-arg */
284 	    z_tbi_tag          :1;  /* Zone supports tbi tagging */
285 
286 	uint8_t             z_cacheline1[0] __attribute__((aligned(64)));
287 
288 	/*
289 	 * Zone caching / recirculation cacheline
290 	 *
291 	 * z_recirc* fields are protected by the recirculation lock.
292 	 *
293 	 * z_recirc_cont_wma:
294 	 *   weighted moving average of the number of contentions per second,
295 	 *   in Z_WMA_UNIT units (fixed point decimal).
296 	 *
297 	 * z_recirc_cont_cur:
298 	 *   count of recorded contentions that will be fused
299 	 *   in z_recirc_cont_wma at the next period.
300 	 *
301 	 *   Note: if caching is disabled,
302 	 *   this field is used under the zone lock.
303 	 *
304 	 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
305 	 *   tracks the history of the minimum values of z_elems_free over time
306 	 *   with "min" being the minimum it hit for the current period,
307 	 *   and "wma" the weighted moving average of those value.
308 	 *
309 	 *   This field is used if z_pcpu_cache is NULL,
310 	 *   otherwise it aliases with z_recirc_empty_{min,wma}
311 	 *
312 	 * z_recirc_{full,empty}_{min,wma}:
313 	 *   tracks the history of the the minimum number of full/empty
314 	 *   magazines in the depot over time, with "min" being the minimum
315 	 *   it hit for the current period, and "wma" the weighted moving
316 	 *   average of those value.
317 	 */
318 	struct zone_cache  *__zpercpu z_pcpu_cache;
319 	struct zone_depot   z_recirc;
320 
321 	hw_lck_ticket_t     z_recirc_lock;
322 	uint32_t            z_recirc_full_min;
323 	uint32_t            z_recirc_full_wma;
324 	union {
325 		uint32_t    z_recirc_empty_min;
326 		uint32_t    z_elems_free_min;
327 	};
328 	union {
329 		uint32_t    z_recirc_empty_wma;
330 		uint32_t    z_elems_free_wma;
331 	};
332 	uint32_t            z_recirc_cont_cur;
333 	uint32_t            z_recirc_cont_wma;
334 
335 	uint16_t            z_depot_size;
336 	uint16_t            z_depot_limit;
337 
338 	uint8_t             z_cacheline2[0] __attribute__((aligned(64)));
339 
340 	/*
341 	 * often mutated fields
342 	 */
343 
344 	hw_lck_ticket_t     z_lock;
345 
346 	/*
347 	 * Page accounting (wired / VA)
348 	 *
349 	 * Those numbers are unscaled for z_percpu zones
350 	 * (zone_scale_for_percpu() needs to be used to find the true value).
351 	 */
352 	uint32_t            z_wired_max;    /* how large can this zone grow        */
353 	uint32_t            z_wired_hwm;    /* z_wired_cur high watermark          */
354 	uint32_t            z_wired_cur;    /* number of pages used by this zone   */
355 	uint32_t            z_wired_empty;  /* pages collectable by GC             */
356 	uint32_t            z_va_cur;       /* amount of VA used by this zone      */
357 
358 	/*
359 	 * list of metadata structs, which maintain per-page free element lists
360 	 */
361 	zone_pva_t          z_pageq_empty;  /* populated, completely empty pages   */
362 	zone_pva_t          z_pageq_partial;/* populated, partially filled pages   */
363 	zone_pva_t          z_pageq_full;   /* populated, completely full pages    */
364 	zone_pva_t          z_pageq_va;     /* non-populated VA pages              */
365 
366 	/*
367 	 * Zone statistics
368 	 *
369 	 * z_elems_avail:
370 	 *   number of elements in the zone (at all).
371 	 */
372 	uint32_t            z_elems_free;   /* Number of free elements             */
373 	uint32_t            z_elems_avail;  /* Number of elements available        */
374 	uint32_t            z_elems_rsv;
375 	uint32_t            z_array_size_class;
376 
377 	struct zone        *z_kt_next;
378 
379 	uint8_t             z_cacheline3[0] __attribute__((aligned(64)));
380 
381 #if KASAN_CLASSIC
382 	uint16_t            z_kasan_redzone;
383 	spl_t               z_kasan_spl;
384 #endif
385 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
386 	/*
387 	 * the allocation logs are used when:
388 	 *
389 	 * - zlog<n>= boot-args are used (and then z_log_on is set)
390 	 *
391 	 * - the leak detection was triggered for the zone.
392 	 *   In that case, the log can't ever be freed,
393 	 *   but it can be enabled/disabled dynamically.
394 	 */
395 	struct btlog       *z_btlog;
396 	struct btlog       *z_btlog_disabled;
397 #endif
398 } __attribute__((aligned((64))));
399 
400 /*!
401  * @typedef zone_security_flags_t
402  *
403  * @brief
404  * Type used to store the immutable security properties of a zone.
405  *
406  * @description
407  * These properties influence the security nature of a zone and can't be
408  * modified after lockdown.
409  */
410 typedef struct zone_security_flags {
411 	uint16_t
412 	/*
413 	 * Security sensitive configuration bits
414 	 */
415 	    z_submap_idx       :8,  /* a Z_SUBMAP_IDX_* value */
416 	    z_kheap_id         :2,  /* zone_kheap_id_t when part of a kalloc heap */
417 	    z_kalloc_type      :1,  /* zones that does types based seggregation */
418 	    z_lifo             :1,  /* depot and recirculation layer are LIFO */
419 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
420 	    z_submap_from_end  :1,  /* allocate from the left or the right ? */
421 	    z_noencrypt        :1,  /* do not encrypt pages when hibernating */
422 	    z_unused           :1;
423 } zone_security_flags_t;
424 
425 
426 /*
427  * Zsecurity config to enable strict free of iokit objects to zone
428  * or heap they were allocated from.
429  *
430  * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
431  * not break third party kexts that haven't yet been recompiled
432  * to use the new iokit macros.
433  */
434 #if XNU_PLATFORM_MacOSX && __x86_64__
435 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           OFF
436 #else
437 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           ON
438 #endif
439 
440 /*
441  * Zsecurity config to enable the read-only allocator
442  */
443 #if KASAN_CLASSIC
444 #   define ZSECURITY_CONFIG_READ_ONLY                   OFF
445 #else
446 #   define ZSECURITY_CONFIG_READ_ONLY                   ON
447 #endif
448 
449 /*
450  * Zsecurity config to enable making heap feng-shui
451  * less reliable.
452  */
453 #if KASAN_CLASSIC
454 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               OFF
455 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             1
456 #else
457 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               ON
458 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             4
459 #endif
460 
461 /*
462  * Zsecurity config to enable adjusting of elements
463  * with PGZ-OOB to right-align them in their space.
464  */
465 #if KASAN || defined(__x86_64__)
466 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              OFF
467 #else
468 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              ON
469 #endif
470 
471 /*
472  * Zsecurity config to enable kalloc type segregation
473  */
474 #if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
475 #   define ZSECURITY_CONFIG_KT_BUDGET                   85
476 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               3
477 #else
478 #   define ZSECURITY_CONFIG_KT_BUDGET                   200
479 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               3
480 #endif
481 
482 
483 __options_decl(kalloc_type_options_t, uint64_t, {
484 	/*
485 	 * kalloc type option to switch default accounting to private.
486 	 */
487 	KT_OPTIONS_ACCT                         = 0x00000001,
488 	/*
489 	 * kalloc type option to print additional stats regarding zone
490 	 * budget distribution and signatures.
491 	 */
492 	KT_OPTIONS_DEBUG                        = 0x00000002,
493 	/*
494 	 * kalloc type option to allow loose freeing between heaps
495 	 */
496 	KT_OPTIONS_LOOSE_FREE                   = 0x00000004,
497 });
498 
499 __enum_decl(kt_var_heap_id_t, uint32_t, {
500 	/*
501 	 * Fake "data" heap used to link views of data-only allocation that
502 	 * have been redirected to KHEAP_DATA_BUFFERS
503 	 */
504 	KT_VAR_DATA_HEAP,
505 	/*
506 	 * Heap for pointer arrays
507 	 */
508 	KT_VAR_PTR_HEAP,
509 	/*
510 	 * Indicating first additional heap added
511 	 */
512 	KT_VAR__FIRST_FLEXIBLE_HEAP,
513 });
514 
515 /*
516  * Zone submap indices
517  *
518  * Z_SUBMAP_IDX_VM
519  * this map has the special property that its allocations
520  * can be done without ever locking the submap, and doesn't use
521  * VM entries in the map (which limits certain VM map operations on it).
522  *
523  * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
524  *
525  * On LP64 it is also used to restrict VM allocations on LP64 lower
526  * in the kernel VA space, for pointer packing purposes.
527  *
528  * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
529  * used for unrestricted allocations
530  *
531  * Z_SUBMAP_IDX_DATA
532  * used to sequester bags of bytes from all other allocations and allow VA reuse
533  * within the map
534  *
535  * Z_SUBMAP_IDX_READ_ONLY
536  * used for the read-only allocator
537  */
538 __enum_decl(zone_submap_idx_t, uint32_t, {
539 	Z_SUBMAP_IDX_VM,
540 	Z_SUBMAP_IDX_READ_ONLY,
541 	Z_SUBMAP_IDX_GENERAL_0,
542 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
543 	Z_SUBMAP_IDX_GENERAL_1,
544 	Z_SUBMAP_IDX_GENERAL_2,
545 	Z_SUBMAP_IDX_GENERAL_3,
546 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
547 	Z_SUBMAP_IDX_DATA,
548 
549 	Z_SUBMAP_IDX_COUNT,
550 });
551 
552 #define KALLOC_MINALIGN     (1 << KALLOC_LOG2_MINALIGN)
553 
554 /*
555  * Variable kalloc_type heap config
556  */
557 struct kheap_info {
558 	zone_id_t             kh_zstart;
559 	union {
560 		kalloc_heap_t       kh_views;
561 		kalloc_type_var_view_t kt_views;
562 	};
563 };
564 typedef union kalloc_type_views {
565 	struct kalloc_type_view     *ktv_fixed;
566 	struct kalloc_type_var_view *ktv_var;
567 } kalloc_type_views_t;
568 
569 #define KT_VAR_MAX_HEAPS 8
570 #define MAX_ZONES       650
571 extern struct kheap_info        kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
572 extern zone_id_t _Atomic        num_zones;
573 extern uint32_t                 zone_view_count;
574 extern struct zone              zone_array[MAX_ZONES];
575 extern struct zone_size_params  zone_ro_size_params[ZONE_ID__LAST_RO + 1];
576 extern zone_security_flags_t    zone_security_array[];
577 extern const char * const       kalloc_heap_names[KHEAP_ID_COUNT];
578 extern mach_memory_info_t      *panic_kext_memory_info;
579 extern vm_size_t                panic_kext_memory_size;
580 extern vm_offset_t              panic_fault_address;
581 extern uint16_t                 _zc_mag_size;
582 
583 #define zone_index_foreach(i) \
584 	for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
585 	    i < num_zones_##i; i++)
586 
587 #define zone_foreach(z) \
588 	for (zone_t z = &zone_array[1], \
589 	    last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
590 	    z < last_zone_##z; z++)
591 
592 __abortlike
593 extern void zone_invalid_panic(zone_t zone);
594 
595 __pure2
596 static inline zone_id_t
zone_index(zone_t z)597 zone_index(zone_t z)
598 {
599 	unsigned long delta;
600 	uint64_t quo;
601 
602 	delta = (unsigned long)z - (unsigned long)zone_array;
603 	if (delta >= MAX_ZONES * sizeof(*z)) {
604 		zone_invalid_panic(z);
605 	}
606 	quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
607 	__builtin_assume(quo < MAX_ZONES);
608 	return (zone_id_t)quo;
609 }
610 
611 __pure2
612 static inline bool
zone_is_ro(zone_t zone)613 zone_is_ro(zone_t zone)
614 {
615 	return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
616 	       zone <= &zone_array[ZONE_ID__LAST_RO];
617 }
618 
619 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)620 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
621 {
622 	return atop(addr ^ (addr + size - 1)) != 0;
623 }
624 
625 __pure2
626 static inline uint16_t
zone_elem_redzone(zone_t zone)627 zone_elem_redzone(zone_t zone)
628 {
629 #if KASAN_CLASSIC
630 	return zone->z_kasan_redzone;
631 #else
632 	(void)zone;
633 	return 0;
634 #endif
635 }
636 
637 __pure2
638 static inline uint16_t
zone_elem_inner_offs(zone_t zone)639 zone_elem_inner_offs(zone_t zone)
640 {
641 	return zone->z_elem_offs;
642 }
643 
644 __pure2
645 static inline uint16_t
zone_elem_outer_offs(zone_t zone)646 zone_elem_outer_offs(zone_t zone)
647 {
648 	return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
649 }
650 
651 __pure2
652 static inline vm_offset_t
zone_elem_inner_size(zone_t zone)653 zone_elem_inner_size(zone_t zone)
654 {
655 	return zone->z_elem_size;
656 }
657 
658 __pure2
659 static inline vm_offset_t
zone_elem_outer_size(zone_t zone)660 zone_elem_outer_size(zone_t zone)
661 {
662 	return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
663 }
664 
665 __pure2
666 static inline zone_security_flags_t
zone_security_config(zone_t z)667 zone_security_config(zone_t z)
668 {
669 	zone_id_t zid = zone_index(z);
670 	return zone_security_array[zid];
671 }
672 
673 static inline uint32_t
zone_count_free(zone_t zone)674 zone_count_free(zone_t zone)
675 {
676 	return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
677 }
678 
679 static inline uint32_t
zone_count_allocated(zone_t zone)680 zone_count_allocated(zone_t zone)
681 {
682 	return zone->z_elems_avail - zone_count_free(zone);
683 }
684 
685 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)686 zone_scale_for_percpu(zone_t zone, vm_size_t size)
687 {
688 	if (zone->z_percpu) {
689 		size *= zpercpu_count();
690 	}
691 	return size;
692 }
693 
694 static inline vm_size_t
zone_size_wired(zone_t zone)695 zone_size_wired(zone_t zone)
696 {
697 	/*
698 	 * this either require the zone lock,
699 	 * or to be used for statistics purposes only.
700 	 */
701 	vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
702 	return zone_scale_for_percpu(zone, size);
703 }
704 
705 static inline vm_size_t
zone_size_free(zone_t zone)706 zone_size_free(zone_t zone)
707 {
708 	return zone_scale_for_percpu(zone,
709 	           zone_elem_inner_size(zone) * zone_count_free(zone));
710 }
711 
712 /* Under KASAN builds, this also accounts for quarantined elements. */
713 static inline vm_size_t
zone_size_allocated(zone_t zone)714 zone_size_allocated(zone_t zone)
715 {
716 	return zone_scale_for_percpu(zone,
717 	           zone_elem_inner_size(zone) * zone_count_allocated(zone));
718 }
719 
720 static inline vm_size_t
zone_size_wasted(zone_t zone)721 zone_size_wasted(zone_t zone)
722 {
723 	return zone_size_wired(zone) - zone_scale_for_percpu(zone,
724 	           zone_elem_outer_size(zone) * zone->z_elems_avail);
725 }
726 
727 /*
728  * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
729  * userspace reboot is needed. The only other way to query for this information
730  * is via mach_memory_info() which is unavailable on release kernels.
731  */
732 extern uint64_t get_zones_collectable_bytes(void);
733 
734 /*!
735  * @enum zone_gc_level_t
736  *
737  * @const ZONE_GC_TRIM
738  * Request a trimming GC: it will trim allocations in excess
739  * of the working set size estimate only.
740  *
741  * @const ZONE_GC_DRAIN
742  * Request a draining GC: this is an aggressive mode that will
743  * cause all caches to be drained and all free pages returned to the system.
744  *
745  * @const ZONE_GC_JETSAM
746  * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
747  * @c ZONE_GC_DRAIN depending on the state of the zone map.
748  * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
749  * request a @c ZONE_GC_JETSAM level.
750  */
751 __enum_closed_decl(zone_gc_level_t, uint32_t, {
752 	ZONE_GC_TRIM,
753 	ZONE_GC_DRAIN,
754 	ZONE_GC_JETSAM,
755 });
756 
757 /*!
758  * @function zone_gc
759  *
760  * @brief
761  * Reduces memory used by zones by trimming caches and freelists.
762  *
763  * @discussion
764  * @c zone_gc() is called:
765  * - by the pageout daemon when the system needs more free pages.
766  * - by the VM when contiguous page allocation requests get stuck
767  *   (see vm_page_find_contiguous()).
768  *
769  * @param level         The zone GC level requested.
770  */
771 extern void     zone_gc(zone_gc_level_t level);
772 
773 extern void     zone_gc_trim(void);
774 extern void     zone_gc_drain(void);
775 
776 #define ZONE_WSS_UPDATE_PERIOD  15
777 /*!
778  * @function compute_zone_working_set_size
779  *
780  * @brief
781  * Recomputes the working set size for every zone
782  *
783  * @discussion
784  * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
785  * computing an exponential moving average with a weight of 75%,
786  * so that the history of the last minute is the dominating factor.
787  */
788 extern void     compute_zone_working_set_size(void *);
789 
790 /* Debug logging for zone-map-exhaustion jetsams. */
791 extern void     get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
792 extern void     get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
793 
794 /* Bootstrap zone module (create zone zone) */
795 extern void     zone_bootstrap(void);
796 
797 /* Force-enable caching on a zone, generally unsafe to call directly */
798 extern void     zone_enable_caching(zone_t zone);
799 
800 /*!
801  * @function zone_early_mem_init
802  *
803  * @brief
804  * Steal memory from pmap (prior to initialization of zalloc)
805  * for the special vm zones that allow bootstrap memory and store
806  * the range so as to facilitate range checking in zfree.
807  *
808  * @param size              the size to steal (must be a page multiple)
809  */
810 __startup_func
811 extern vm_offset_t zone_early_mem_init(
812 	vm_size_t       size);
813 
814 /*!
815  * @function zone_get_early_alloc_size
816  *
817  * @brief
818  * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
819  * of the allocation granule for the zone with the given creation flags and
820  * element size.
821  */
822 __startup_func
823 extern vm_size_t zone_get_early_alloc_size(
824 	const char          *name __unused,
825 	vm_size_t            elem_size,
826 	zone_create_flags_t  flags,
827 	vm_size_t            min_elems);
828 
829 /*!
830  * @function zone_cram_early
831  *
832  * @brief
833  * Cram memory allocated with @c zone_early_mem_init() into a zone.
834  *
835  * @param zone          The zone to cram memory into.
836  * @param newmem        The base address for the memory to cram.
837  * @param size          The size of the memory to cram into the zone.
838  */
839 __startup_func
840 extern void     zone_cram_early(
841 	zone_t          zone,
842 	vm_offset_t     newmem,
843 	vm_size_t       size);
844 
845 extern bool     zone_maps_owned(
846 	vm_address_t    addr,
847 	vm_size_t       size);
848 
849 #if KASAN_LIGHT
850 extern bool     kasan_zone_maps_owned(
851 	vm_address_t    addr,
852 	vm_size_t       size);
853 #endif /* KASAN_LIGHT */
854 
855 extern void     zone_map_sizes(
856 	vm_map_size_t  *psize,
857 	vm_map_size_t  *pfree,
858 	vm_map_size_t  *plargest_free);
859 
860 extern bool
861 zone_map_nearing_exhaustion(void);
862 
863 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)864 zalloc_flags_get_tag(zalloc_flags_t flags)
865 {
866 	return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
867 }
868 
869 extern struct kalloc_result zalloc_ext(
870 	zone_t          zone,
871 	zone_stats_t    zstats,
872 	zalloc_flags_t  flags);
873 
874 #if KASAN
875 #define ZFREE_PACK_SIZE(esize, usize)   (((uint64_t)(usize) << 32) | (esize))
876 #define ZFREE_ELEM_SIZE(combined)       ((uint32_t)(combined))
877 #define ZFREE_USER_SIZE(combined)       ((combined) >> 32)
878 #else
879 #define ZFREE_PACK_SIZE(esize, usize)   (esize)
880 #define ZFREE_ELEM_SIZE(combined)       (combined)
881 #endif
882 
883 extern void     zfree_ext(
884 	zone_t          zone,
885 	zone_stats_t    zstats,
886 	void           *addr,
887 	uint64_t        combined_size);
888 
889 extern zone_id_t zone_id_for_element(
890 	void           *addr,
891 	vm_size_t       esize);
892 
893 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
894 extern void *zone_element_pgz_oob_adjust(
895 	void           *addr,
896 	vm_size_t       req_size,
897 	vm_size_t       elem_size);
898 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
899 
900 extern void zone_element_bounds_check(
901 	vm_address_t    addr,
902 	vm_size_t       len);
903 
904 extern vm_size_t zone_element_size(
905 	void           *addr,
906 	zone_t         *z,
907 	bool            clear_oob,
908 	vm_offset_t    *oob_offs);
909 
910 /*!
911  * @function zone_spans_ro_va
912  *
913  * @abstract
914  * This function is used to check whether the specified address range
915  * spans through the read-only zone range.
916  *
917  * @discussion
918  * This only checks for the range specified within ZONE_ADDR_READONLY.
919  * The parameters addr_start and addr_end are stripped off of PAC bits
920  * before the check is made.
921  */
922 extern bool zone_spans_ro_va(
923 	vm_offset_t     addr_start,
924 	vm_offset_t     addr_end);
925 
926 /*!
927  * @function __zalloc_ro_mut_atomic
928  *
929  * @abstract
930  * This function is called from the pmap to perform the specified atomic
931  * operation on memory from the read-only allocator.
932  *
933  * @discussion
934  * This function is for internal use only and should not be called directly.
935  */
936 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)937 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
938 {
939 #define __ZALLOC_RO_MUT_OP(op, op2) \
940 	case ZRO_ATOMIC_##op##_8: \
941 	        return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
942 	case ZRO_ATOMIC_##op##_16: \
943 	        return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
944 	case ZRO_ATOMIC_##op##_32: \
945 	        return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
946 	case ZRO_ATOMIC_##op##_64: \
947 	        return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
948 
949 	switch (op) {
950 		__ZALLOC_RO_MUT_OP(OR, or_orig);
951 		__ZALLOC_RO_MUT_OP(XOR, xor_orig);
952 		__ZALLOC_RO_MUT_OP(AND, and_orig);
953 		__ZALLOC_RO_MUT_OP(ADD, add_orig);
954 		__ZALLOC_RO_MUT_OP(XCHG, xchg);
955 	default:
956 		panic("%s: Invalid atomic operation: %d", __func__, op);
957 	}
958 
959 #undef __ZALLOC_RO_MUT_OP
960 }
961 
962 /*!
963  * @function zone_owns
964  *
965  * @abstract
966  * This function is a soft version of zone_require that checks if a given
967  * pointer belongs to the specified zone and should not be used outside
968  * allocator code.
969  *
970  * @discussion
971  * Note that zone_owns() can only work with:
972  * - zones not allowing foreign memory
973  * - zones in the general submap.
974  *
975  * @param zone          the zone the address needs to belong to.
976  * @param addr          the element address to check.
977  */
978 extern bool     zone_owns(
979 	zone_t          zone,
980 	void           *addr);
981 
982 /**!
983  * @function zone_submap
984  *
985  * @param zsflags       the security flags of a specified zone.
986  * @returns             the zone (sub)map this zone allocates from.
987  */
988 __pure2
989 extern vm_map_t zone_submap(
990 	zone_security_flags_t   zsflags);
991 
992 #ifndef VM_TAG_SIZECLASSES
993 #error MAX_TAG_ZONES
994 #endif
995 #if VM_TAG_SIZECLASSES
996 
997 extern uint16_t zone_index_from_tag_index(
998 	uint32_t        tag_zone_index);
999 
1000 #endif /* VM_TAG_SIZECLASSES */
1001 
1002 extern lck_grp_t zone_locks_grp;
1003 
1004 static inline void
zone_lock(zone_t zone)1005 zone_lock(zone_t zone)
1006 {
1007 #if KASAN_FAKESTACK
1008 	spl_t s = 0;
1009 	if (zone->z_kasan_fakestacks) {
1010 		s = splsched();
1011 	}
1012 #endif /* KASAN_FAKESTACK */
1013 	hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
1014 #if KASAN_FAKESTACK
1015 	zone->z_kasan_spl = s;
1016 #endif /* KASAN_FAKESTACK */
1017 }
1018 
1019 static inline void
zone_unlock(zone_t zone)1020 zone_unlock(zone_t zone)
1021 {
1022 #if KASAN_FAKESTACK
1023 	spl_t s = zone->z_kasan_spl;
1024 	zone->z_kasan_spl = 0;
1025 #endif /* KASAN_FAKESTACK */
1026 	hw_lck_ticket_unlock(&zone->z_lock);
1027 #if KASAN_FAKESTACK
1028 	if (zone->z_kasan_fakestacks) {
1029 		splx(s);
1030 	}
1031 #endif /* KASAN_FAKESTACK */
1032 }
1033 
1034 #define MAX_ZONE_NAME   32      /* max length of a zone name we can take from the boot-args */
1035 
1036 int track_this_zone(const char *zonename, const char *logname);
1037 extern bool panic_include_kalloc_types;
1038 extern zone_t kalloc_type_src_zone;
1039 extern zone_t kalloc_type_dst_zone;
1040 
1041 #if DEBUG || DEVELOPMENT
1042 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1043 #endif /* DEBUG || DEVELOPMENT */
1044 
1045 #pragma GCC visibility pop
1046 
1047 __END_DECLS
1048 
1049 #endif  /* _KERN_ZALLOC_INTERNAL_H_ */
1050