xref: /xnu-8792.41.9/osfmk/kern/zalloc_internal.h (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61 
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65 
66 #include <os/atomic_private.h>
67 #include <sys/queue.h>
68 #include <vm/vm_map_internal.h>
69 
70 #if KASAN
71 #include <san/kasan.h>
72 #include <kern/spl.h>
73 #endif /* !KASAN */
74 
75 #if KASAN_ZALLOC
76 /*
77  * Disable zalloc zero validation under kasan as it is
78  * double-duty with what kasan already does.
79  */
80 #define ZALLOC_ENABLE_ZERO_CHECK 0
81 #define ZONE_ENABLE_LOGGING 0
82 #elif DEBUG || DEVELOPMENT
83 #define ZALLOC_ENABLE_ZERO_CHECK 1
84 #define ZONE_ENABLE_LOGGING 1
85 #else
86 #define ZALLOC_ENABLE_ZERO_CHECK 1
87 #define ZONE_ENABLE_LOGGING 0
88 #endif
89 
90 /*!
91  * @file <kern/zalloc_internal.h>
92  *
93  * @abstract
94  * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
95  * kalloc subsystems.
96  */
97 
98 __BEGIN_DECLS
99 
100 #pragma GCC visibility push(hidden)
101 
102 /*
103  *	A zone is a collection of fixed size blocks for which there
104  *	is fast allocation/deallocation access.  Kernel routines can
105  *	use zones to manage data structures dynamically, creating a zone
106  *	for each type of data structure to be managed.
107  *
108  */
109 
110 /*!
111  * @typedef zone_pva_t
112  *
113  * @brief
114  * Type used to point to a page virtual address in the zone allocator.
115  *
116  * @description
117  * - Valid pages have the top bit set.
118  * - 0 represents the "NULL" page
119  * - non 0 values with the top bit cleared represent queue heads,
120  *   indexed from the beginning of the __DATA section of the kernel.
121  *   (see zone_pageq_base).
122  */
123 typedef struct zone_packed_virtual_address {
124 	uint32_t packed_address;
125 } zone_pva_t;
126 
127 /*!
128  * @struct zone_stats
129  *
130  * @abstract
131  * Per-cpu structure used for basic zone stats.
132  *
133  * @discussion
134  * The values aren't scaled for per-cpu zones.
135  */
136 struct zone_stats {
137 	uint64_t            zs_mem_allocated;
138 	uint64_t            zs_mem_freed;
139 	uint32_t            zs_alloc_rr;     /* allocation rr bias */
140 };
141 
142 STAILQ_HEAD(zone_depot, zone_magazine);
143 
144 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
145 #define Z_MAGIC_QUO(s)      (((1ull << 32) - 1) / (uint64_t)(s) + 1)
146 #define Z_MAGIC_ALIGNED(s)  (~0u / (uint32_t)(s) + 1)
147 
148 /*
149  * Returns (offs / size) if offs is small enough
150  * and magic = Z_MAGIC_QUO(size)
151  */
152 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)153 Z_FAST_QUO(uint64_t offs, uint64_t magic)
154 {
155 	return (offs * magic) >> 32;
156 }
157 
158 /*
159  * Returns (offs % size) if offs is small enough
160  * and magic = Z_MAGIC_QUO(size)
161  */
162 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)163 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
164 {
165 	uint32_t lowbits = (uint32_t)(offs * magic);
166 
167 	return (lowbits * size) >> 32;
168 }
169 
170 /*
171  * Returns whether (offs % size) == 0 if offs is small enough
172  * and magic = Z_MAGIC_ALIGNED(size)
173  */
174 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)175 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
176 {
177 	return (uint32_t)(offs * magic) < magic;
178 }
179 
180 struct zone_size_params {
181 	uint32_t            z_align_magic;  /* magic to use with Z_FAST_ALIGNED()  */
182 	uint32_t            z_elem_size;    /* size of an element                  */
183 };
184 
185 struct zone_expand {
186 	struct zone_expand *ze_next;
187 	thread_t            ze_thread;
188 	bool                ze_pg_wait;
189 	bool                ze_vm_priv;
190 	bool                ze_clear_priv;
191 };
192 
193 struct zone {
194 	/*
195 	 * Readonly / rarely written fields
196 	 */
197 
198 	/*
199 	 * The first 4 fields match a zone_view.
200 	 *
201 	 * z_self points back to the zone when the zone is initialized,
202 	 * or is NULL else.
203 	 */
204 	struct zone        *z_self;
205 	zone_stats_t        z_stats;
206 	const char         *z_name;
207 	struct zone_view   *z_views;
208 
209 	struct zone_expand *z_expander;
210 	struct zone_cache  *__zpercpu z_pcpu_cache;
211 
212 	uint64_t            z_quo_magic;
213 	uint32_t            z_align_magic;
214 	uint16_t            z_elem_size;
215 	uint16_t            z_elem_offs;
216 	uint16_t            z_chunk_pages;
217 	uint16_t            z_chunk_elems;
218 
219 	uint32_t /* 32 bits */
220 	/*
221 	 * Lifecycle state (Mutable after creation)
222 	 */
223 	    z_destroyed        :1,  /* zone is (being) destroyed */
224 	    z_async_refilling  :1,  /* asynchronous allocation pending? */
225 	    z_expanding_wait   :1,  /* is thread waiting for expansion? */
226 
227 	/*
228 	 * Behavior configuration bits
229 	 */
230 	    z_percpu           :1,  /* the zone is percpu */
231 	    z_permanent        :1,  /* the zone allocations are permanent */
232 	    z_nocaching        :1,  /* disallow zone caching for this zone */
233 	    collectable        :1,  /* garbage collect empty pages */
234 	    exhaustible        :1,  /* merely return if empty? */
235 	    expandable         :1,  /* expand zone (with message)? */
236 	    no_callout         :1,
237 	    z_destructible     :1,  /* zone can be zdestroy()ed  */
238 
239 	    _reserved          :6,
240 
241 	/*
242 	 * Debugging features
243 	 */
244 	    alignment_required :1,  /* element alignment needs to be preserved */
245 	    z_pgz_tracked      :1,  /* this zone is tracked by pgzalloc */
246 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
247 	    kasan_fakestacks   :1,
248 	    kasan_noquarantine :1,  /* whether to use the kasan quarantine */
249 	    z_tags_sizeclass   :6,  /* idx into zone_tags_sizeclasses to associate
250 	                             * sizeclass for a particualr kalloc tag */
251 	    z_uses_tags        :1,
252 	    z_tags_inline      :1,
253 	    z_log_on           :1,  /* zone logging was enabled by boot-arg */
254 	    z_tbi_tag          :1;  /* Zone supports tbi tagging */
255 
256 	/*
257 	 * often mutated fields
258 	 */
259 
260 	lck_ticket_t        z_lock;
261 	struct zone_depot   z_recirc;
262 
263 	/*
264 	 * Page accounting (wired / VA)
265 	 *
266 	 * Those numbers are unscaled for z_percpu zones
267 	 * (zone_scale_for_percpu() needs to be used to find the true value).
268 	 */
269 	uint32_t            z_wired_max;    /* how large can this zone grow        */
270 	uint32_t            z_wired_hwm;    /* z_wired_cur high watermark          */
271 	uint32_t            z_wired_cur;    /* number of pages used by this zone   */
272 	uint32_t            z_wired_empty;  /* pages collectable by GC             */
273 	uint32_t            z_va_cur;       /* amount of VA used by this zone      */
274 
275 	/*
276 	 * list of metadata structs, which maintain per-page free element lists
277 	 */
278 	zone_pva_t          z_pageq_empty;  /* populated, completely empty pages   */
279 	zone_pva_t          z_pageq_partial;/* populated, partially filled pages   */
280 	zone_pva_t          z_pageq_full;   /* populated, completely full pages    */
281 	zone_pva_t          z_pageq_va;     /* non-populated VA pages              */
282 
283 	/*
284 	 * Zone statistics
285 	 *
286 	 * z_contention_wma:
287 	 *   weighted moving average of the number of contentions per second,
288 	 *   in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
289 	 *
290 	 * z_contention_cur:
291 	 *   count of recorded contentions that will be fused in z_contention_wma
292 	 *   at the next period.
293 	 *
294 	 * z_recirc_cur:
295 	 *   number of magazines in the recirculation depot.
296 	 *
297 	 * z_elems_free:
298 	 *   number of free elements in the zone.
299 	 *
300 	 * z_elems_{min,max}:
301 	 *   tracks the low/high watermark of z_elems_free for the current
302 	 *   weighted moving average period.
303 	 *
304 	 * z_elems_free_wss:
305 	 *   weighted moving average of the (z_elems_free_max - z_elems_free_min)
306 	 *   amplited which is used by the GC for trim operations.
307 	 *
308 	 * z_elems_avail:
309 	 *   number of elements in the zone (at all).
310 	 */
311 #define Z_CONTENTION_WMA_UNIT (1u << 8)
312 	uint32_t            z_contention_wma;
313 	uint32_t            z_contention_cur;
314 	uint32_t            z_recirc_cur;
315 	uint32_t            z_elems_free_max;
316 	uint32_t            z_elems_free_wss;
317 	uint32_t            z_elems_free_min;
318 	uint32_t            z_elems_free;   /* Number of free elements             */
319 	uint32_t            z_elems_avail;  /* Number of elements available        */
320 	uint32_t            z_elems_rsv;
321 	uint32_t            z_array_size_class;
322 
323 #if KASAN_ZALLOC
324 	uint32_t            z_kasan_redzone;
325 	spl_t               z_kasan_spl;
326 #endif
327 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
328 	/*
329 	 * the allocation logs are used when:
330 	 *
331 	 * - zlog<n>= boot-args are used (and then z_log_on is set)
332 	 *
333 	 * - the leak detection was triggered for the zone.
334 	 *   In that case, the log can't ever be freed,
335 	 *   but it can be enabled/disabled dynamically.
336 	 */
337 	struct btlog       *z_btlog;
338 	struct btlog       *z_btlog_disabled;
339 #endif
340 #if KASAN_TBI
341 	struct btlog       *z_btlog_kasan;
342 #endif /* KASAN_TBI */
343 	struct zone        *z_kt_next;
344 };
345 
346 /*!
347  * @typedef zone_security_flags_t
348  *
349  * @brief
350  * Type used to store the immutable security properties of a zone.
351  *
352  * @description
353  * These properties influence the security nature of a zone and can't be
354  * modified after lockdown.
355  */
356 typedef struct zone_security_flags {
357 	uint16_t
358 	/*
359 	 * Security sensitive configuration bits
360 	 */
361 	    z_submap_idx       :8,  /* a Z_SUBMAP_IDX_* value */
362 	    z_pgz_use_guards   :1,  /* this zone uses guards with PGZ */
363 	    z_submap_from_end  :1,  /* allocate from the left or the right ? */
364 	    z_kheap_id         :3,  /* zone_kheap_id_t when part of a kalloc heap */
365 	    z_noencrypt        :1,  /* do not encrypt pages when hibernating */
366 	    z_va_sequester     :1,  /* page sequester: no VA reuse with other zones */
367 	    z_kalloc_type      :1;  /* zones that does types based seggregation */
368 } zone_security_flags_t;
369 
370 
371 /*
372  * Zsecurity config to enable sequestering VA of zones
373  */
374 #if KASAN_ZALLOC || !defined(__LP64__)
375 #   define ZSECURITY_CONFIG_SEQUESTER                   OFF
376 #else
377 #   define ZSECURITY_CONFIG_SEQUESTER                   ON
378 #endif
379 
380 /*
381  * Zsecurity config to enable creating separate kalloc zones for
382  * bags of bytes
383  */
384 #if KASAN_ZALLOC || !defined(__LP64__)
385 #   define ZSECURITY_CONFIG_SUBMAP_USER_DATA            OFF
386 #else
387 #   define ZSECURITY_CONFIG_SUBMAP_USER_DATA            ON
388 #endif
389 
390 /*
391  * Zsecurity config to enable strict free of iokit objects to zone
392  * or heap they were allocated from.
393  *
394  * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
395  * not break third party kexts that haven't yet been recompiled
396  * to use the new iokit macros.
397  */
398 #if XNU_PLATFORM_MacOSX && __x86_64__
399 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           OFF
400 #else
401 #   define ZSECURITY_CONFIG_STRICT_IOKIT_FREE           ON
402 #endif
403 
404 /*
405  * Zsecurity config to enable the read-only allocator
406  */
407 #if KASAN_ZALLOC || !defined(__LP64__)
408 #   define ZSECURITY_CONFIG_READ_ONLY                   OFF
409 #else
410 #   define ZSECURITY_CONFIG_READ_ONLY                   ON
411 #endif
412 
413 /*
414  * Zsecurity config to enable making heap feng-shui
415  * less reliable.
416  */
417 #if KASAN_ZALLOC || !defined(__LP64__)
418 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               OFF
419 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             1
420 #else
421 #   define ZSECURITY_CONFIG_SAD_FENG_SHUI               ON
422 #   define ZSECURITY_CONFIG_GENERAL_SUBMAPS             4
423 #endif
424 
425 /*
426  * Zsecurity config to enable adjusting of elements
427  * with PGZ-OOB to right-align them in their space.
428  */
429 #if KASAN || defined(__x86_64__) || !defined(__LP64__)
430 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              OFF
431 #else
432 #   define ZSECURITY_CONFIG_PGZ_OOB_ADJUST              ON
433 #endif
434 
435 /*
436  * Zsecurity config to enable kalloc type segregation
437  */
438 #if KASAN_ZALLOC || !defined(__LP64__)
439 #   define ZSECURITY_CONFIG_KALLOC_TYPE                 OFF
440 #   define ZSECURITY_CONFIG_KT_BUDGET                   0
441 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               0
442 #else
443 #   define ZSECURITY_CONFIG_KALLOC_TYPE                 ON
444 #if XNU_TARGET_OS_WATCH
445 #   define ZSECURITY_CONFIG_KT_BUDGET                   85
446 #else
447 #   define ZSECURITY_CONFIG_KT_BUDGET                   200
448 #endif
449 #   define ZSECURITY_CONFIG_KT_VAR_BUDGET               3
450 #endif
451 
452 
453 __options_decl(kalloc_type_options_t, uint64_t, {
454 	/*
455 	 * kalloc type option to switch default accounting to private.
456 	 */
457 	KT_OPTIONS_ACCT                         = 0x00000001,
458 	/*
459 	 * kalloc type option to print additional stats regarding zone
460 	 * budget distribution and signatures.
461 	 */
462 	KT_OPTIONS_DEBUG                        = 0x00000002,
463 	/*
464 	 * kalloc type option to allow loose freeing between heaps
465 	 */
466 	KT_OPTIONS_LOOSE_FREE                   = 0x00000004,
467 });
468 
469 __enum_decl(kt_var_heap_id_t, uint32_t, {
470 	/*
471 	 * Fake "data" heap used to link views of data-only allocation that
472 	 * have been redirected to KHEAP_DATA_BUFFERS
473 	 */
474 	KT_VAR_DATA_HEAP,
475 	/*
476 	 * Heap for pointer arrays
477 	 */
478 	KT_VAR_PTR_HEAP,
479 	/*
480 	 * Indicating first additional heap added
481 	 */
482 	KT_VAR__FIRST_FLEXIBLE_HEAP,
483 });
484 
485 /*
486  * Zone submap indices
487  *
488  * Z_SUBMAP_IDX_VM
489  * this map has the special property that its allocations
490  * can be done without ever locking the submap, and doesn't use
491  * VM entries in the map (which limits certain VM map operations on it).
492  *
493  * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
494  *
495  * On LP64 it is also used to restrict VM allocations on LP64 lower
496  * in the kernel VA space, for pointer packing purposes.
497  *
498  * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
499  * used for unrestricted allocations
500  *
501  * Z_SUBMAP_IDX_DATA
502  * used to sequester bags of bytes from all other allocations and allow VA reuse
503  * within the map
504  *
505  * Z_SUBMAP_IDX_READ_ONLY
506  * used for the read-only allocator
507  */
508 __enum_decl(zone_submap_idx_t, uint32_t, {
509 	Z_SUBMAP_IDX_VM,
510 	Z_SUBMAP_IDX_READ_ONLY,
511 	Z_SUBMAP_IDX_GENERAL_0,
512 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
513 	Z_SUBMAP_IDX_GENERAL_1,
514 	Z_SUBMAP_IDX_GENERAL_2,
515 	Z_SUBMAP_IDX_GENERAL_3,
516 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
517 	Z_SUBMAP_IDX_DATA,
518 
519 	Z_SUBMAP_IDX_COUNT,
520 });
521 
522 #define KALLOC_MINALIGN     (1 << KALLOC_LOG2_MINALIGN)
523 #define KALLOC_DLUT_SIZE    (2048 / KALLOC_MINALIGN)
524 
525 struct kheap_zones {
526 	struct kalloc_zone_cfg         *cfg;
527 	struct kalloc_heap             *views;
528 	zone_kheap_id_t                 heap_id;
529 	uint16_t                        max_k_zone;
530 	uint8_t                         dlut[KALLOC_DLUT_SIZE];   /* table of indices into k_zone[] */
531 	uint8_t                         k_zindex_start;
532 	/* If there's no hit in the DLUT, then start searching from k_zindex_start. */
533 	zone_t                         *k_zone;
534 };
535 
536 /*
537  * Variable kalloc_type heap config
538  */
539 struct kt_heap_zones {
540 	zone_id_t                       kh_zstart;
541 	zone_kheap_id_t                 heap_id;
542 	struct kalloc_type_var_view    *views;
543 };
544 
545 typedef union kalloc_type_views {
546 	struct kalloc_type_view     *ktv_fixed;
547 	struct kalloc_type_var_view *ktv_var;
548 } kalloc_type_views_t;
549 
550 #define KT_VAR_MAX_HEAPS 8
551 #define MAX_ZONES       650
552 extern struct kt_heap_zones     kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
553 extern zone_id_t _Atomic        num_zones;
554 extern uint32_t                 zone_view_count;
555 extern struct zone              zone_array[MAX_ZONES];
556 extern struct zone_size_params  zone_ro_size_params[ZONE_ID__LAST_RO + 1];
557 extern zone_security_flags_t    zone_security_array[];
558 extern const char * const       kalloc_heap_names[KHEAP_ID_COUNT];
559 extern mach_memory_info_t      *panic_kext_memory_info;
560 extern vm_size_t                panic_kext_memory_size;
561 extern vm_offset_t              panic_fault_address;
562 extern vm_map_size_t            zone_map_size;
563 
564 #define zone_index_foreach(i) \
565 	for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
566 	    i < num_zones_##i; i++)
567 
568 #define zone_foreach(z) \
569 	for (zone_t z = &zone_array[1], \
570 	    last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
571 	    z < last_zone_##z; z++)
572 
573 __abortlike
574 extern void zone_invalid_panic(zone_t zone);
575 
576 __pure2
577 static inline zone_t
zone_by_id(size_t zid)578 zone_by_id(size_t zid)
579 {
580 	return (zone_t)((uintptr_t)zone_array + zid * sizeof(struct zone));
581 }
582 
583 __pure2
584 static inline zone_id_t
zone_index(zone_t z)585 zone_index(zone_t z)
586 {
587 	unsigned long delta;
588 	uint64_t quo;
589 
590 	delta = (unsigned long)z - (unsigned long)zone_array;
591 	if (delta >= MAX_ZONES * sizeof(*z)) {
592 		zone_invalid_panic(z);
593 	}
594 	quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
595 	__builtin_assume(quo < MAX_ZONES);
596 	return (zone_id_t)quo;
597 }
598 
599 __pure2
600 static inline bool
zone_is_ro(zone_t zone)601 zone_is_ro(zone_t zone)
602 {
603 	return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
604 	       zone <= &zone_array[ZONE_ID__LAST_RO];
605 }
606 
607 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)608 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
609 {
610 	return atop(addr ^ (addr + size - 1)) != 0;
611 }
612 
613 __pure2
614 static inline uint16_t
zone_elem_offs(zone_t zone)615 zone_elem_offs(zone_t zone)
616 {
617 	return zone->z_elem_offs;
618 }
619 
620 __pure2
621 static inline vm_offset_t
zone_elem_size(zone_t zone)622 zone_elem_size(zone_t zone)
623 {
624 	return zone->z_elem_size;
625 }
626 
627 __pure2
628 static inline zone_security_flags_t
zone_security_config(zone_t z)629 zone_security_config(zone_t z)
630 {
631 	zone_id_t zid = zone_index(z);
632 	return zone_security_array[zid];
633 }
634 
635 static inline uint32_t
zone_count_allocated(zone_t zone)636 zone_count_allocated(zone_t zone)
637 {
638 	return zone->z_elems_avail - zone->z_elems_free;
639 }
640 
641 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)642 zone_scale_for_percpu(zone_t zone, vm_size_t size)
643 {
644 	if (zone->z_percpu) {
645 		size *= zpercpu_count();
646 	}
647 	return size;
648 }
649 
650 static inline vm_size_t
zone_size_wired(zone_t zone)651 zone_size_wired(zone_t zone)
652 {
653 	/*
654 	 * this either require the zone lock,
655 	 * or to be used for statistics purposes only.
656 	 */
657 	vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
658 	return zone_scale_for_percpu(zone, size);
659 }
660 
661 static inline vm_size_t
zone_size_free(zone_t zone)662 zone_size_free(zone_t zone)
663 {
664 	return zone_scale_for_percpu(zone,
665 	           (vm_size_t)zone->z_elem_size * zone->z_elems_free);
666 }
667 
668 /* Under KASAN builds, this also accounts for quarantined elements. */
669 static inline vm_size_t
zone_size_allocated(zone_t zone)670 zone_size_allocated(zone_t zone)
671 {
672 	return zone_scale_for_percpu(zone,
673 	           (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
674 }
675 
676 static inline vm_size_t
zone_size_wasted(zone_t zone)677 zone_size_wasted(zone_t zone)
678 {
679 	return zone_size_wired(zone) - zone_scale_for_percpu(zone,
680 	           (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
681 }
682 
683 /*
684  * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
685  * userspace reboot is needed. The only other way to query for this information
686  * is via mach_memory_info() which is unavailable on release kernels.
687  */
688 extern uint64_t get_zones_collectable_bytes(void);
689 
690 /*!
691  * @enum zone_gc_level_t
692  *
693  * @const ZONE_GC_TRIM
694  * Request a trimming GC: it will trim allocations in excess
695  * of the working set size estimate only.
696  *
697  * @const ZONE_GC_DRAIN
698  * Request a draining GC: this is an aggressive mode that will
699  * cause all caches to be drained and all free pages returned to the system.
700  *
701  * @const ZONE_GC_JETSAM
702  * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
703  * @c ZONE_GC_DRAIN depending on the state of the zone map.
704  * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
705  * request a @c ZONE_GC_JETSAM level.
706  */
707 __enum_closed_decl(zone_gc_level_t, uint32_t, {
708 	ZONE_GC_TRIM,
709 	ZONE_GC_DRAIN,
710 	ZONE_GC_JETSAM,
711 });
712 
713 /*!
714  * @function zone_gc
715  *
716  * @brief
717  * Reduces memory used by zones by trimming caches and freelists.
718  *
719  * @discussion
720  * @c zone_gc() is called:
721  * - by the pageout daemon when the system needs more free pages.
722  * - by the VM when contiguous page allocation requests get stuck
723  *   (see vm_page_find_contiguous()).
724  *
725  * @param level         The zone GC level requested.
726  */
727 extern void     zone_gc(zone_gc_level_t level);
728 
729 extern void     zone_gc_trim(void);
730 extern void     zone_gc_drain(void);
731 
732 #define ZONE_WSS_UPDATE_PERIOD  10
733 /*!
734  * @function compute_zone_working_set_size
735  *
736  * @brief
737  * Recomputes the working set size for every zone
738  *
739  * @discussion
740  * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
741  * computing an exponential moving average with a weight of 75%,
742  * so that the history of the last minute is the dominating factor.
743  */
744 extern void     compute_zone_working_set_size(void *);
745 
746 /* Debug logging for zone-map-exhaustion jetsams. */
747 extern void     get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
748 extern void     get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
749 
750 /* Bootstrap zone module (create zone zone) */
751 extern void     zone_bootstrap(void);
752 
753 /*!
754  * @function zone_early_mem_init
755  *
756  * @brief
757  * Steal memory from pmap (prior to initialization of zalloc)
758  * for the special vm zones that allow bootstrap memory and store
759  * the range so as to facilitate range checking in zfree.
760  *
761  * @param size              the size to steal (must be a page multiple)
762  */
763 __startup_func
764 extern vm_offset_t zone_early_mem_init(
765 	vm_size_t       size);
766 
767 /*!
768  * @function zone_get_early_alloc_size
769  *
770  * @brief
771  * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
772  * of the allocation granule for the zone with the given creation flags and
773  * element size.
774  */
775 __startup_func
776 extern vm_size_t zone_get_early_alloc_size(
777 	const char          *name __unused,
778 	vm_size_t            elem_size,
779 	zone_create_flags_t  flags,
780 	vm_size_t            min_elems);
781 
782 /*!
783  * @function zone_cram_early
784  *
785  * @brief
786  * Cram memory allocated with @c zone_early_mem_init() into a zone.
787  *
788  * @param zone          The zone to cram memory into.
789  * @param newmem        The base address for the memory to cram.
790  * @param size          The size of the memory to cram into the zone.
791  */
792 __startup_func
793 extern void     zone_cram_early(
794 	zone_t          zone,
795 	vm_offset_t     newmem,
796 	vm_size_t       size);
797 
798 extern bool     zone_maps_owned(
799 	vm_address_t    addr,
800 	vm_size_t       size);
801 
802 #if KASAN_LIGHT
803 extern bool     kasan_zone_maps_owned(
804 	vm_address_t    addr,
805 	vm_size_t       size);
806 #endif /* KASAN_LIGHT */
807 
808 extern void     zone_map_sizes(
809 	vm_map_size_t  *psize,
810 	vm_map_size_t  *pfree,
811 	vm_map_size_t  *plargest_free);
812 
813 extern bool
814 zone_map_nearing_exhaustion(void);
815 
816 #if defined(__LP64__)
817 #define ZONE_POISON       0xdeadbeefdeadbeef
818 #else
819 #define ZONE_POISON       0xdeadbeef
820 #endif
821 
822 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)823 zalloc_flags_get_tag(zalloc_flags_t flags)
824 {
825 	return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
826 }
827 
828 extern struct kalloc_result zalloc_ext(
829 	zone_t          zone,
830 	zone_stats_t    zstats,
831 	zalloc_flags_t  flags);
832 
833 extern void     zfree_ext(
834 	zone_t          zone,
835 	zone_stats_t    zstats,
836 	void           *addr,
837 	vm_size_t       elem_size);
838 
839 extern zone_id_t zone_id_for_element(
840 	void           *addr,
841 	vm_size_t       esize);
842 
843 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
844 extern void *zone_element_pgz_oob_adjust(
845 	void           *addr,
846 	vm_size_t       req_size,
847 	vm_size_t       elem_size);
848 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
849 
850 extern void zone_element_bounds_check(
851 	vm_address_t    addr,
852 	vm_size_t       len);
853 
854 extern vm_size_t zone_element_size(
855 	void           *addr,
856 	zone_t         *z,
857 	bool            clear_oob,
858 	vm_offset_t    *oob_offs);
859 
860 /*!
861  * @function zone_spans_ro_va
862  *
863  * @abstract
864  * This function is used to check whether the specified address range
865  * spans through the read-only zone range.
866  *
867  * @discussion
868  * This only checks for the range specified within ZONE_ADDR_READONLY.
869  * The parameters addr_start and addr_end are stripped off of PAC bits
870  * before the check is made.
871  */
872 extern bool zone_spans_ro_va(
873 	vm_offset_t     addr_start,
874 	vm_offset_t     addr_end);
875 
876 /*!
877  * @function __zalloc_ro_mut_atomic
878  *
879  * @abstract
880  * This function is called from the pmap to perform the specified atomic
881  * operation on memory from the read-only allocator.
882  *
883  * @discussion
884  * This function is for internal use only and should not be called directly.
885  */
886 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)887 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
888 {
889 #define __ZALLOC_RO_MUT_OP(op, op2) \
890 	case ZRO_ATOMIC_##op##_8: \
891 	        return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
892 	case ZRO_ATOMIC_##op##_16: \
893 	        return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
894 	case ZRO_ATOMIC_##op##_32: \
895 	        return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
896 	case ZRO_ATOMIC_##op##_64: \
897 	        return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
898 
899 	switch (op) {
900 		__ZALLOC_RO_MUT_OP(OR, or_orig);
901 		__ZALLOC_RO_MUT_OP(XOR, xor_orig);
902 		__ZALLOC_RO_MUT_OP(AND, and_orig);
903 		__ZALLOC_RO_MUT_OP(ADD, add_orig);
904 		__ZALLOC_RO_MUT_OP(XCHG, xchg);
905 	default:
906 		panic("%s: Invalid atomic operation: %d", __func__, op);
907 	}
908 
909 #undef __ZALLOC_RO_MUT_OP
910 }
911 
912 /*!
913  * @function zone_owns
914  *
915  * @abstract
916  * This function is a soft version of zone_require that checks if a given
917  * pointer belongs to the specified zone and should not be used outside
918  * allocator code.
919  *
920  * @discussion
921  * Note that zone_owns() can only work with:
922  * - zones not allowing foreign memory
923  * - zones in the general submap.
924  *
925  * @param zone          the zone the address needs to belong to.
926  * @param addr          the element address to check.
927  */
928 extern bool     zone_owns(
929 	zone_t          zone,
930 	void           *addr);
931 
932 /**!
933  * @function zone_submap
934  *
935  * @param zsflags       the security flags of a specified zone.
936  * @returns             the zone (sub)map this zone allocates from.
937  */
938 __pure2
939 extern vm_map_t zone_submap(
940 	zone_security_flags_t   zsflags);
941 
942 /*
943  *  Structure for keeping track of a backtrace, used for leak detection.
944  *  This is in the .h file because it is used during panic, see kern/debug.c
945  *  A non-zero size indicates that the trace is in use.
946  */
947 struct ztrace {
948 	vm_size_t               zt_size;                        /* How much memory are all the allocations referring to this trace taking up? */
949 	uint32_t                zt_depth;                       /* depth of stack (0 to MAX_ZTRACE_DEPTH) */
950 	void*                   zt_stack[MAX_ZTRACE_DEPTH];     /* series of return addresses from OSBacktrace */
951 	uint32_t                zt_collisions;                  /* How many times did a different stack land here while it was occupied? */
952 	uint32_t                zt_hit_count;                   /* for determining effectiveness of hash function */
953 };
954 
955 #ifndef VM_TAG_SIZECLASSES
956 #error MAX_TAG_ZONES
957 #endif
958 #if VM_TAG_SIZECLASSES
959 
960 extern uint16_t zone_index_from_tag_index(
961 	uint32_t        tag_zone_index);
962 
963 #endif /* VM_TAG_SIZECLASSES */
964 
965 extern lck_grp_t zone_locks_grp;
966 
967 static inline void
zone_lock(zone_t zone)968 zone_lock(zone_t zone)
969 {
970 #if KASAN_ZALLOC
971 	spl_t s = 0;
972 	if (zone->kasan_fakestacks) {
973 		s = splsched();
974 	}
975 #endif /* KASAN_ZALLOC */
976 	lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
977 #if KASAN_ZALLOC
978 	zone->z_kasan_spl = s;
979 #endif /* KASAN_ZALLOC */
980 }
981 
982 static inline void
zone_unlock(zone_t zone)983 zone_unlock(zone_t zone)
984 {
985 #if KASAN_ZALLOC
986 	spl_t s = zone->z_kasan_spl;
987 	zone->z_kasan_spl = 0;
988 #endif /* KASAN_ZALLOC */
989 	lck_ticket_unlock(&zone->z_lock);
990 #if KASAN_ZALLOC
991 	if (zone->kasan_fakestacks) {
992 		splx(s);
993 	}
994 #endif /* KASAN_ZALLOC */
995 }
996 
997 #define MAX_ZONE_NAME   32      /* max length of a zone name we can take from the boot-args */
998 
999 int track_this_zone(const char *zonename, const char *logname);
1000 extern bool panic_include_kalloc_types;
1001 extern zone_t kalloc_type_src_zone;
1002 extern zone_t kalloc_type_dst_zone;
1003 
1004 #if DEBUG || DEVELOPMENT
1005 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1006 #endif /* DEBUG || DEVELOPMENT */
1007 
1008 #pragma GCC visibility pop
1009 
1010 __END_DECLS
1011 
1012 #endif  /* _KERN_ZALLOC_INTERNAL_H_ */
1013