1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65
66 #include <os/atomic_private.h>
67 #include <sys/queue.h>
68
69 #if KASAN
70 #include <san/kasan.h>
71 #include <kern/spl.h>
72 #endif /* !KASAN */
73
74 #if KASAN_ZALLOC
75 /*
76 * Disable zalloc zero validation under kasan as it is
77 * double-duty with what kasan already does.
78 */
79 #define ZALLOC_ENABLE_ZERO_CHECK 0
80 #define ZONE_ENABLE_LOGGING 0
81 #elif DEBUG || DEVELOPMENT
82 #define ZALLOC_ENABLE_ZERO_CHECK 1
83 #define ZONE_ENABLE_LOGGING 1
84 #else
85 #define ZALLOC_ENABLE_ZERO_CHECK 1
86 #define ZONE_ENABLE_LOGGING 0
87 #endif
88
89 /*!
90 * @file <kern/zalloc_internal.h>
91 *
92 * @abstract
93 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
94 * kalloc subsystems.
95 */
96
97 __BEGIN_DECLS
98
99 #pragma GCC visibility push(hidden)
100
101 #if CONFIG_GZALLOC
102 typedef struct gzalloc_data {
103 uint32_t gzfc_index;
104 vm_offset_t *gzfc;
105 } gzalloc_data_t;
106 #endif
107
108 /*
109 * A zone is a collection of fixed size blocks for which there
110 * is fast allocation/deallocation access. Kernel routines can
111 * use zones to manage data structures dynamically, creating a zone
112 * for each type of data structure to be managed.
113 *
114 */
115
116 /*!
117 * @typedef zone_pva_t
118 *
119 * @brief
120 * Type used to point to a page virtual address in the zone allocator.
121 *
122 * @description
123 * - Valid pages have the top bit set.
124 * - 0 represents the "NULL" page
125 * - non 0 values with the top bit cleared represent queue heads,
126 * indexed from the beginning of the __DATA section of the kernel.
127 * (see zone_pageq_base).
128 */
129 typedef struct zone_packed_virtual_address {
130 uint32_t packed_address;
131 } zone_pva_t;
132
133 /*!
134 * @struct zone_stats
135 *
136 * @abstract
137 * Per-cpu structure used for basic zone stats.
138 *
139 * @discussion
140 * The values aren't scaled for per-cpu zones.
141 */
142 struct zone_stats {
143 uint64_t zs_mem_allocated;
144 uint64_t zs_mem_freed;
145 uint32_t zs_alloc_rr; /* allocation rr bias */
146 };
147
148 STAILQ_HEAD(zone_depot, zone_magazine);
149
150 struct zone {
151 /*
152 * Readonly / rarely written fields
153 */
154
155 /*
156 * The first 4 fields match a zone_view.
157 *
158 * z_self points back to the zone when the zone is initialized,
159 * or is NULL else.
160 */
161 struct zone *z_self;
162 zone_stats_t z_stats;
163 const char *z_name;
164 struct zone_view *z_views;
165
166 struct thread *z_expander;
167 struct zone_cache *__zpercpu z_pcpu_cache;
168
169 uint16_t z_chunk_pages; /* size used for more memory in pages */
170 uint16_t z_chunk_elems; /* count of allocations per chunk */
171 uint16_t z_elems_rsv; /* maintain a free reserve of elements */
172 uint16_t z_elem_size; /* size of an element */
173 uint16_t z_pgz_oob_offs; /* element initial offset */
174
175 uint64_t /* 48 bits */
176 /*
177 * Lifecycle state (Mutable after creation)
178 */
179 z_destroyed :1, /* zone is (being) destroyed */
180 z_async_refilling :1, /* asynchronous allocation pending? */
181 z_expanding_wait :1, /* is thread waiting for expansion? */
182 z_expander_vm_priv :1, /* a vm privileged thread is expanding */
183
184 /*
185 * Behavior configuration bits
186 */
187 z_percpu :1, /* the zone is percpu */
188 z_permanent :1, /* the zone allocations are permanent */
189 z_nocaching :1, /* disallow zone caching for this zone */
190 collectable :1, /* garbage collect empty pages */
191 exhaustible :1, /* merely return if empty? */
192 expandable :1, /* expand zone (with message)? */
193 no_callout :1,
194 z_destructible :1, /* zone can be zdestroy()ed */
195
196 _reserved :19,
197
198 /*
199 * Debugging features
200 */
201 alignment_required :1, /* element alignment needs to be preserved */
202 z_pgz_tracked :1, /* this zone is tracked by pgzalloc */
203 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
204 z_gzalloc_tracked :1, /* this zone is tracked by gzalloc */
205 z_nogzalloc :1, /* this zone doesn't participate with (p)gzalloc */
206 kasan_fakestacks :1,
207 kasan_noquarantine :1, /* whether to use the kasan quarantine */
208 z_tags_sizeclass :6, /* idx into zone_tags_sizeclasses to associate
209 * sizeclass for a particualr kalloc tag */
210 z_uses_tags :1,
211 z_tags_inline :1,
212 z_log_on :1, /* zone logging was enabled by boot-arg */
213 z_tbi_tag :1; /* Zone supports tbi tagging */
214
215 /*
216 * often mutated fields
217 */
218
219 lck_spin_t z_lock;
220 struct zone_depot z_recirc;
221
222 /*
223 * Page accounting (wired / VA)
224 *
225 * Those numbers are unscaled for z_percpu zones
226 * (zone_scale_for_percpu() needs to be used to find the true value).
227 */
228 uint32_t z_wired_max; /* how large can this zone grow */
229 uint32_t z_wired_hwm; /* z_wired_cur high watermark */
230 uint32_t z_wired_cur; /* number of pages used by this zone */
231 uint32_t z_wired_empty; /* pages collectable by GC */
232 uint32_t z_va_cur; /* amount of VA used by this zone */
233
234 /*
235 * list of metadata structs, which maintain per-page free element lists
236 */
237 zone_pva_t z_pageq_empty; /* populated, completely empty pages */
238 zone_pva_t z_pageq_partial;/* populated, partially filled pages */
239 zone_pva_t z_pageq_full; /* populated, completely full pages */
240 zone_pva_t z_pageq_va; /* non-populated VA pages */
241
242 /*
243 * Zone statistics
244 *
245 * z_contention_wma:
246 * weighted moving average of the number of contentions per second,
247 * in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
248 *
249 * z_contention_cur:
250 * count of recorded contentions that will be fused in z_contention_wma
251 * at the next period.
252 *
253 * z_recirc_cur:
254 * number of magazines in the recirculation depot.
255 *
256 * z_elems_free:
257 * number of free elements in the zone.
258 *
259 * z_elems_{min,max}:
260 * tracks the low/high watermark of z_elems_free for the current
261 * weighted moving average period.
262 *
263 * z_elems_free_wss:
264 * weighted moving average of the (z_elems_free_max - z_elems_free_min)
265 * amplited which is used by the GC for trim operations.
266 *
267 * z_elems_avail:
268 * number of elements in the zone (at all).
269 */
270 #define Z_CONTENTION_WMA_UNIT (1u << 8)
271 uint32_t z_contention_wma;
272 uint32_t z_contention_cur;
273 uint32_t z_recirc_cur;
274 uint32_t z_elems_free_max;
275 uint32_t z_elems_free_wss;
276 uint32_t z_elems_free_min;
277 uint32_t z_elems_free; /* Number of free elements */
278 uint32_t z_elems_avail; /* Number of elements available */
279
280 #if CONFIG_GZALLOC
281 gzalloc_data_t gz;
282 #endif
283 #if KASAN_ZALLOC
284 uint32_t z_kasan_redzone;
285 spl_t z_kasan_spl;
286 #endif
287 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
288 /*
289 * the allocation logs are used when:
290 *
291 * - zlog<n>= boot-args are used (and then z_log_on is set)
292 *
293 * - the leak detection was triggered for the zone.
294 * In that case, the log can't ever be freed,
295 * but it can be enabled/disabled dynamically.
296 */
297 struct btlog *z_btlog;
298 struct btlog *z_btlog_disabled;
299 #endif
300 #if DEBUG || DEVELOPMENT
301 struct zone *z_kt_next;
302 #endif
303 };
304
305 /*!
306 * @typedef zone_security_flags_t
307 *
308 * @brief
309 * Type used to store the immutable security properties of a zone.
310 *
311 * @description
312 * These properties influence the security nature of a zone and can't be
313 * modified after lockdown.
314 */
315 typedef struct zone_security_flags {
316 uint16_t
317 /*
318 * Security sensitive configuration bits
319 */
320 z_submap_idx :8, /* a Z_SUBMAP_IDX_* value */
321 z_submap_from_end :1, /* allocate from the left or the right ? */
322 z_kheap_id :3, /* zone_kheap_id_t when part of a kalloc heap */
323 z_allows_foreign :1, /* allow non-zalloc space */
324 z_noencrypt :1, /* do not encrypt pages when hibernating */
325 z_va_sequester :1, /* page sequester: no VA reuse with other zones */
326 z_kalloc_type :1; /* zones that does types based seggregation */
327 } zone_security_flags_t;
328
329
330 /*
331 * Zsecurity config to enable sequestering VA of zones
332 */
333 #if KASAN_ZALLOC || !defined(__LP64__)
334 # define ZSECURITY_CONFIG_SEQUESTER OFF
335 #else
336 # define ZSECURITY_CONFIG_SEQUESTER ON
337 #endif
338
339 /*
340 * Zsecurity config to enable creating separate kalloc zones for
341 * bags of bytes
342 */
343 #if KASAN_ZALLOC || !defined(__LP64__)
344 # define ZSECURITY_CONFIG_SUBMAP_USER_DATA OFF
345 #else
346 # define ZSECURITY_CONFIG_SUBMAP_USER_DATA ON
347 #endif
348
349 /*
350 * Leave kext heap on macOS for kalloc/kalloc_type callsites that aren't
351 * in the BootKC.
352 */
353 #if KASAN_ZALLOC || !defined(__LP64__)
354 # define ZSECURITY_CONFIG_SEQUESTER_KEXT_KALLOC OFF
355 #elif PLATFORM_MacOSX
356 # define ZSECURITY_CONFIG_SEQUESTER_KEXT_KALLOC OFF
357 #else
358 # define ZSECURITY_CONFIG_SEQUESTER_KEXT_KALLOC OFF
359 #endif
360
361 /*
362 * Zsecurity config to enable strict free of iokit objects to zone
363 * or heap they were allocated from.
364 *
365 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
366 * not break third party kexts that haven't yet been recompiled
367 * to use the new iokit macros.
368 */
369 #if PLATFORM_MacOSX && __x86_64__
370 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE OFF
371 #else
372 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE ON
373 #endif
374
375 /*
376 * Zsecurity config to enable the read-only allocator
377 */
378 #if KASAN_ZALLOC || !defined(__LP64__)
379 # define ZSECURITY_CONFIG_READ_ONLY OFF
380 #else
381 # define ZSECURITY_CONFIG_READ_ONLY ON
382 #endif
383
384 /*
385 * Zsecurity config to enable making heap feng-shui
386 * less reliable.
387 */
388 #if KASAN_ZALLOC || !defined(__LP64__)
389 # define ZSECURITY_CONFIG_SAD_FENG_SHUI OFF
390 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 1
391 #else
392 # define ZSECURITY_CONFIG_SAD_FENG_SHUI ON
393 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 4
394 #endif
395
396 /*
397 * Zsecurity config to enable kalloc type segregation
398 */
399 #if KASAN_ZALLOC || !defined(__LP64__)
400 # define ZSECURITY_CONFIG_KALLOC_TYPE OFF
401 # define ZSECURITY_CONFIG_KT_BUDGET 0
402 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 0
403 #else
404 # define ZSECURITY_CONFIG_KALLOC_TYPE ON
405 #if XNU_TARGET_OS_WATCH
406 # define ZSECURITY_CONFIG_KT_BUDGET 85
407 #else
408 # define ZSECURITY_CONFIG_KT_BUDGET 200
409 #endif
410 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 3
411 #endif
412
413
414 /*
415 * Zsecurity options that can be toggled, as opposed to configs
416 */
417 __options_decl(zone_security_options_t, uint64_t, {
418 /*
419 * Zsecurity option to enable the kernel and kalloc data maps.
420 */
421 ZSECURITY_OPTIONS_KERNEL_DATA_MAP = 0x00000020,
422 });
423
424 #define ZSECURITY_NOT_A_COMPILE_TIME_CONFIG__OFF() 0
425 #define ZSECURITY_NOT_A_COMPILE_TIME_CONFIG__ON() 1
426 #define ZSECURITY_CONFIG2(v) ZSECURITY_NOT_A_COMPILE_TIME_CONFIG__##v()
427 #define ZSECURITY_CONFIG1(v) ZSECURITY_CONFIG2(v)
428 #define ZSECURITY_CONFIG(opt) ZSECURITY_CONFIG1(ZSECURITY_CONFIG_##opt)
429 #define ZSECURITY_ENABLED(opt) (zsecurity_options & ZSECURITY_OPTIONS_##opt)
430
431 __options_decl(kalloc_type_options_t, uint64_t, {
432 /*
433 * kalloc type option to switch default accounting to private.
434 */
435 KT_OPTIONS_ACCT = 0x00000001,
436 /*
437 * kalloc type option to print additional stats regarding zone
438 * budget distribution and signatures.
439 */
440 KT_OPTIONS_DEBUG = 0x00000002,
441 /*
442 * kalloc type option to allow loose freeing between heaps
443 */
444 KT_OPTIONS_LOOSE_FREE = 0x00000004,
445 });
446
447 __enum_decl(kt_var_heap_id_t, uint32_t, {
448 /*
449 * Fake "data" heap used to link views of data-only allocation that
450 * have been redirected to KHEAP_DATA_BUFFERS
451 */
452 KT_VAR_DATA_HEAP,
453 /*
454 * Heap for pointer arrays
455 */
456 KT_VAR_PTR_HEAP,
457 /*
458 * Indicating first additional heap added
459 */
460 KT_VAR__FIRST_FLEXIBLE_HEAP,
461 });
462
463 /*
464 * Zone submap indices
465 *
466 * Z_SUBMAP_IDX_VM
467 * this map has the special property that its allocations
468 * can be done without ever locking the submap, and doesn't use
469 * VM entries in the map (which limits certain VM map operations on it).
470 *
471 * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
472 *
473 * On LP64 it is also used to restrict VM allocations on LP64 lower
474 * in the kernel VA space, for pointer packing purposes.
475 *
476 * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
477 * used for unrestricted allocations
478 *
479 * Z_SUBMAP_IDX_DATA
480 * used to sequester bags of bytes from all other allocations and allow VA reuse
481 * within the map
482 *
483 * Z_SUBMAP_IDX_READ_ONLY
484 * used for the read-only allocator
485 */
486 __enum_decl(zone_submap_idx_t, uint32_t, {
487 Z_SUBMAP_IDX_VM,
488 Z_SUBMAP_IDX_READ_ONLY,
489 Z_SUBMAP_IDX_GENERAL_0,
490 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
491 Z_SUBMAP_IDX_GENERAL_1,
492 Z_SUBMAP_IDX_GENERAL_2,
493 Z_SUBMAP_IDX_GENERAL_3,
494 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
495 Z_SUBMAP_IDX_DATA,
496
497 Z_SUBMAP_IDX_COUNT,
498 });
499
500 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
501 #define KALLOC_DLUT_SIZE (2048 / KALLOC_MINALIGN)
502
503 struct kheap_zones {
504 struct kalloc_zone_cfg *cfg;
505 struct kalloc_heap *views;
506 zone_kheap_id_t heap_id;
507 uint16_t max_k_zone;
508 uint8_t dlut[KALLOC_DLUT_SIZE]; /* table of indices into k_zone[] */
509 uint8_t k_zindex_start;
510 /* If there's no hit in the DLUT, then start searching from k_zindex_start. */
511 zone_t *k_zone;
512 vm_size_t kalloc_max;
513 };
514
515 /*
516 * Variable kalloc_type heap config
517 */
518 struct kt_heap_zones {
519 zone_id_t kh_zstart;
520 zone_kheap_id_t heap_id;
521 struct kalloc_type_var_view *views;
522 };
523
524 #define KT_VAR_MAX_HEAPS 8
525 #define MAX_ZONES 650
526 extern struct kt_heap_zones kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
527 extern zone_security_options_t zsecurity_options;
528 extern zone_id_t _Atomic num_zones;
529 extern uint32_t zone_view_count;
530 extern struct zone zone_array[];
531 extern zone_security_flags_t zone_security_array[];
532 extern uint16_t zone_ro_elem_size[];
533 extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
534 extern mach_memory_info_t *panic_kext_memory_info;
535 extern vm_size_t panic_kext_memory_size;
536 extern vm_offset_t panic_fault_address;
537
538 #define zone_index_foreach(i) \
539 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
540 i < num_zones_##i; i++)
541
542 #define zone_foreach(z) \
543 for (zone_t z = &zone_array[1], \
544 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
545 z < last_zone_##z; z++)
546
547 struct zone_map_range {
548 vm_offset_t min_address;
549 vm_offset_t max_address;
550 } __attribute__((aligned(2 * sizeof(vm_offset_t))));
551
552 __abortlike
553 extern void zone_invalid_panic(zone_t zone);
554
555 __pure2
556 static inline zone_id_t
zone_index(zone_t z)557 zone_index(zone_t z)
558 {
559 zone_id_t zid = (zone_id_t)(z - zone_array);
560 if (__improbable(zid >= MAX_ZONES)) {
561 zone_invalid_panic(z);
562 }
563 return zid;
564 }
565
566 __pure2
567 static inline zone_t
zone_for_index(zone_id_t zid)568 zone_for_index(zone_id_t zid)
569 {
570 return &zone_array[zid];
571 }
572
573 __pure2
574 static inline bool
zone_is_ro(zone_t zone)575 zone_is_ro(zone_t zone)
576 {
577 return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
578 zone <= &zone_array[ZONE_ID__LAST_RO];
579 }
580
581 __pure2
582 static inline vm_offset_t
zone_elem_size_ro(zone_id_t zid)583 zone_elem_size_ro(zone_id_t zid)
584 {
585 return zone_ro_elem_size[zid];
586 }
587
588 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)589 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
590 {
591 return atop(addr ^ (addr + size - 1)) != 0;
592 }
593
594 __pure2
595 static inline uint16_t
zone_oob_offs(zone_t zone)596 zone_oob_offs(zone_t zone)
597 {
598 uint16_t offs = 0;
599 #if CONFIG_PROB_GZALLOC
600 offs = zone->z_pgz_oob_offs;
601 #else
602 (void)zone;
603 #endif
604 return offs;
605 }
606
607 __pure2
608 static inline vm_offset_t
zone_elem_size(zone_t zone)609 zone_elem_size(zone_t zone)
610 {
611 return zone->z_elem_size;
612 }
613
614 __pure2
615 static inline vm_offset_t
zone_elem_size_safe(zone_t zone)616 zone_elem_size_safe(zone_t zone)
617 {
618 if (zone_is_ro(zone)) {
619 zone_id_t zid = zone_index(zone);
620 return zone_elem_size_ro(zid);
621 }
622 return zone_elem_size(zone);
623 }
624
625 __pure2
626 static inline zone_security_flags_t
zone_security_config(zone_t z)627 zone_security_config(zone_t z)
628 {
629 zone_id_t zid = zone_index(z);
630 return zone_security_array[zid];
631 }
632
633 static inline uint32_t
zone_count_allocated(zone_t zone)634 zone_count_allocated(zone_t zone)
635 {
636 return zone->z_elems_avail - zone->z_elems_free;
637 }
638
639 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)640 zone_scale_for_percpu(zone_t zone, vm_size_t size)
641 {
642 if (zone->z_percpu) {
643 size *= zpercpu_count();
644 }
645 return size;
646 }
647
648 static inline vm_size_t
zone_size_wired(zone_t zone)649 zone_size_wired(zone_t zone)
650 {
651 /*
652 * this either require the zone lock,
653 * or to be used for statistics purposes only.
654 */
655 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
656 return zone_scale_for_percpu(zone, size);
657 }
658
659 static inline vm_size_t
zone_size_free(zone_t zone)660 zone_size_free(zone_t zone)
661 {
662 return zone_scale_for_percpu(zone,
663 (vm_size_t)zone->z_elem_size * zone->z_elems_free);
664 }
665
666 /* Under KASAN builds, this also accounts for quarantined elements. */
667 static inline vm_size_t
zone_size_allocated(zone_t zone)668 zone_size_allocated(zone_t zone)
669 {
670 return zone_scale_for_percpu(zone,
671 (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
672 }
673
674 static inline vm_size_t
zone_size_wasted(zone_t zone)675 zone_size_wasted(zone_t zone)
676 {
677 return zone_size_wired(zone) - zone_scale_for_percpu(zone,
678 (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
679 }
680
681 /*
682 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
683 * userspace reboot is needed. The only other way to query for this information
684 * is via mach_memory_info() which is unavailable on release kernels.
685 */
686 extern uint64_t get_zones_collectable_bytes(void);
687
688 /*!
689 * @enum zone_gc_level_t
690 *
691 * @const ZONE_GC_TRIM
692 * Request a trimming GC: it will trim allocations in excess
693 * of the working set size estimate only.
694 *
695 * @const ZONE_GC_DRAIN
696 * Request a draining GC: this is an aggressive mode that will
697 * cause all caches to be drained and all free pages returned to the system.
698 *
699 * @const ZONE_GC_JETSAM
700 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
701 * @c ZONE_GC_DRAIN depending on the state of the zone map.
702 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
703 * request a @c ZONE_GC_JETSAM level.
704 */
705 __enum_closed_decl(zone_gc_level_t, uint32_t, {
706 ZONE_GC_TRIM,
707 ZONE_GC_DRAIN,
708 ZONE_GC_JETSAM,
709 });
710
711 /*!
712 * @function zone_gc
713 *
714 * @brief
715 * Reduces memory used by zones by trimming caches and freelists.
716 *
717 * @discussion
718 * @c zone_gc() is called:
719 * - by the pageout daemon when the system needs more free pages.
720 * - by the VM when contiguous page allocation requests get stuck
721 * (see vm_page_find_contiguous()).
722 *
723 * @param level The zone GC level requested.
724 */
725 extern void zone_gc(zone_gc_level_t level);
726
727 extern void zone_gc_trim(void);
728 extern void zone_gc_drain(void);
729
730 #define ZONE_WSS_UPDATE_PERIOD 10
731 /*!
732 * @function compute_zone_working_set_size
733 *
734 * @brief
735 * Recomputes the working set size for every zone
736 *
737 * @discussion
738 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
739 * computing an exponential moving average with a weight of 75%,
740 * so that the history of the last minute is the dominating factor.
741 */
742 extern void compute_zone_working_set_size(void *);
743
744 /* Debug logging for zone-map-exhaustion jetsams. */
745 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
746 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
747
748 /* Bootstrap zone module (create zone zone) */
749 extern void zone_bootstrap(void);
750
751 /*!
752 * @function zone_foreign_mem_init
753 *
754 * @brief
755 * Steal memory from pmap (prior to initialization of zalloc)
756 * for the special vm zones that allow foreign memory and store
757 * the range so as to facilitate range checking in zfree.
758 *
759 * @param size the size to steal (must be a page multiple)
760 * @param allow_meta_steal whether allocator metadata should be stolen too
761 * due to a non natural config.
762 */
763 __startup_func
764 extern vm_offset_t zone_foreign_mem_init(
765 vm_size_t size,
766 bool allow_meta_steal);
767
768 /*!
769 * @function zone_get_foreign_alloc_size
770 *
771 * @brief
772 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
773 * of the allocation granule for the zone with the given creation flags and
774 * element size.
775 */
776 __startup_func
777 extern vm_size_t zone_get_foreign_alloc_size(
778 const char *name __unused,
779 vm_size_t elem_size,
780 zone_create_flags_t flags,
781 uint16_t min_pages);
782
783 /*!
784 * @function zone_cram_foreign
785 *
786 * @brief
787 * Cram memory allocated with @c zone_foreign_mem_init() into a zone.
788 *
789 * @param zone The zone to cram memory into.
790 * @param newmem The base address for the memory to cram.
791 * @param size The size of the memory to cram into the zone.
792 */
793 __startup_func
794 extern void zone_cram_foreign(
795 zone_t zone,
796 vm_offset_t newmem,
797 vm_size_t size);
798
799 extern bool zone_maps_owned(
800 vm_address_t addr,
801 vm_size_t size);
802
803 extern void zone_map_sizes(
804 vm_map_size_t *psize,
805 vm_map_size_t *pfree,
806 vm_map_size_t *plargest_free);
807
808 extern bool
809 zone_map_nearing_exhaustion(void);
810
811 #if defined(__LP64__)
812 #define ZONE_POISON 0xdeadbeefdeadbeef
813 #else
814 #define ZONE_POISON 0xdeadbeef
815 #endif
816
817 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)818 zalloc_flags_get_tag(zalloc_flags_t flags)
819 {
820 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
821 }
822
823 extern void *zalloc_ext(
824 zone_t zone,
825 zone_stats_t zstats,
826 zalloc_flags_t flags,
827 vm_size_t elem_size);
828
829 extern void zfree_ext(
830 zone_t zone,
831 zone_stats_t zstats,
832 void *addr,
833 vm_size_t elem_size);
834
835 extern zone_id_t zone_id_for_native_element(
836 void *addr,
837 vm_size_t esize);
838
839 #if CONFIG_PROB_GZALLOC
840 extern void *zone_element_pgz_oob_adjust(
841 void *addr,
842 vm_size_t esize,
843 vm_size_t req_size);
844 #endif /* CONFIG_PROB_GZALLOC */
845
846 extern vm_size_t zone_element_size(
847 void *addr,
848 zone_t *z,
849 bool clear_oob,
850 vm_offset_t *oob_offs);
851
852 __attribute__((overloadable))
853 extern bool zone_range_contains(
854 const struct zone_map_range *r,
855 vm_offset_t addr);
856
857 __attribute__((overloadable))
858 extern bool zone_range_contains(
859 const struct zone_map_range *r,
860 vm_offset_t addr,
861 vm_offset_t size);
862
863 extern vm_size_t zone_range_size(
864 const struct zone_map_range *r);
865
866 /*!
867 * @function zone_spans_ro_va
868 *
869 * @abstract
870 * This function is used to check whether the specified address range
871 * spans through the read-only zone range.
872 *
873 * @discussion
874 * This only checks for the range specified within ZONE_ADDR_READONLY.
875 * The parameters addr_start and addr_end are stripped off of PAC bits
876 * before the check is made.
877 */
878 extern bool zone_spans_ro_va(
879 vm_offset_t addr_start,
880 vm_offset_t addr_end);
881
882 /*!
883 * @function __zalloc_ro_mut_atomic
884 *
885 * @abstract
886 * This function is called from the pmap to perform the specified atomic
887 * operation on memory from the read-only allocator.
888 *
889 * @discussion
890 * This function is for internal use only and should not be called directly.
891 */
892 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)893 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
894 {
895 #define __ZALLOC_RO_MUT_OP(op, op2) \
896 case ZRO_ATOMIC_##op##_8: \
897 return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
898 case ZRO_ATOMIC_##op##_16: \
899 return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
900 case ZRO_ATOMIC_##op##_32: \
901 return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
902 case ZRO_ATOMIC_##op##_64: \
903 return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
904
905 switch (op) {
906 __ZALLOC_RO_MUT_OP(OR, or_orig);
907 __ZALLOC_RO_MUT_OP(XOR, xor_orig);
908 __ZALLOC_RO_MUT_OP(AND, and_orig);
909 __ZALLOC_RO_MUT_OP(ADD, add_orig);
910 __ZALLOC_RO_MUT_OP(XCHG, xchg);
911 default:
912 panic("%s: Invalid atomic operation: %d", __func__, op);
913 }
914
915 #undef __ZALLOC_RO_MUT_OP
916 }
917
918 /*!
919 * @function zone_owns
920 *
921 * @abstract
922 * This function is a soft version of zone_require that checks if a given
923 * pointer belongs to the specified zone and should not be used outside
924 * allocator code.
925 *
926 * @discussion
927 * Note that zone_owns() can only work with:
928 * - zones not allowing foreign memory
929 * - zones in the general submap.
930 *
931 * @param zone the zone the address needs to belong to.
932 * @param addr the element address to check.
933 */
934 extern bool zone_owns(
935 zone_t zone,
936 void *addr);
937
938 /**!
939 * @function zone_submap
940 *
941 * @param zsflags the security flags of a specified zone.
942 * @returns the zone (sub)map this zone allocates from.
943 */
944 __pure2
945 extern vm_map_t zone_submap(
946 zone_security_flags_t zsflags);
947
948 /*
949 * Structure for keeping track of a backtrace, used for leak detection.
950 * This is in the .h file because it is used during panic, see kern/debug.c
951 * A non-zero size indicates that the trace is in use.
952 */
953 struct ztrace {
954 vm_size_t zt_size; /* How much memory are all the allocations referring to this trace taking up? */
955 uint32_t zt_depth; /* depth of stack (0 to MAX_ZTRACE_DEPTH) */
956 void* zt_stack[MAX_ZTRACE_DEPTH]; /* series of return addresses from OSBacktrace */
957 uint32_t zt_collisions; /* How many times did a different stack land here while it was occupied? */
958 uint32_t zt_hit_count; /* for determining effectiveness of hash function */
959 };
960
961 #ifndef VM_TAG_SIZECLASSES
962 #error MAX_TAG_ZONES
963 #endif
964 #if VM_TAG_SIZECLASSES
965
966 extern uint16_t zone_index_from_tag_index(
967 uint32_t tag_zone_index);
968
969 #endif /* VM_TAG_SIZECLASSES */
970
971 extern void kalloc_init_maps(
972 vm_address_t min_address);
973
974 static inline void
zone_lock(zone_t zone)975 zone_lock(zone_t zone)
976 {
977 #if KASAN_ZALLOC
978 spl_t s = 0;
979 if (zone->kasan_fakestacks) {
980 s = splsched();
981 }
982 #endif /* KASAN_ZALLOC */
983 lck_spin_lock(&zone->z_lock);
984 #if KASAN_ZALLOC
985 zone->z_kasan_spl = s;
986 #endif /* KASAN_ZALLOC */
987 }
988
989 static inline void
zone_unlock(zone_t zone)990 zone_unlock(zone_t zone)
991 {
992 #if KASAN_ZALLOC
993 spl_t s = zone->z_kasan_spl;
994 zone->z_kasan_spl = 0;
995 #endif /* KASAN_ZALLOC */
996 lck_spin_unlock(&zone->z_lock);
997 #if KASAN_ZALLOC
998 if (zone->kasan_fakestacks) {
999 splx(s);
1000 }
1001 #endif /* KASAN_ZALLOC */
1002 }
1003
1004 #if CONFIG_GZALLOC
1005 void gzalloc_init(vm_size_t);
1006 void gzalloc_zone_init(zone_t);
1007 void gzalloc_empty_free_cache(zone_t);
1008 boolean_t gzalloc_enabled(void);
1009
1010 vm_offset_t gzalloc_alloc(zone_t, zone_stats_t zstats, zalloc_flags_t flags);
1011 void gzalloc_free(zone_t, zone_stats_t zstats, void *);
1012 boolean_t gzalloc_element_size(void *, zone_t *, vm_size_t *);
1013 #endif /* CONFIG_GZALLOC */
1014
1015 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
1016
1017 int track_this_zone(const char *zonename, const char *logname);
1018 extern bool panic_include_kalloc_types;
1019 extern zone_t kalloc_type_src_zone;
1020 extern zone_t kalloc_type_dst_zone;
1021
1022 #if DEBUG || DEVELOPMENT
1023 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1024 extern bool zalloc_disable_copyio_check;
1025 #else
1026 #define zalloc_disable_copyio_check false
1027 #endif /* DEBUG || DEVELOPMENT */
1028
1029 #pragma GCC visibility pop
1030
1031 __END_DECLS
1032
1033 #endif /* _KERN_ZALLOC_INTERNAL_H_ */
1034