1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #ifndef _KERN_ZALLOC_INTERNAL_H_
60 #define _KERN_ZALLOC_INTERNAL_H_
61
62 #include <kern/zalloc.h>
63 #include <kern/locks.h>
64 #include <kern/simple_lock.h>
65
66 #include <os/atomic_private.h>
67 #include <sys/queue.h>
68 #include <vm/vm_map_internal.h>
69
70 #if KASAN
71 #include <san/kasan.h>
72 #include <kern/spl.h>
73 #endif /* !KASAN */
74
75 #if KASAN_ZALLOC
76 /*
77 * Disable zalloc zero validation under kasan as it is
78 * double-duty with what kasan already does.
79 */
80 #define ZALLOC_ENABLE_ZERO_CHECK 0
81 #define ZONE_ENABLE_LOGGING 0
82 #elif DEBUG || DEVELOPMENT
83 #define ZALLOC_ENABLE_ZERO_CHECK 1
84 #define ZONE_ENABLE_LOGGING 1
85 #else
86 #define ZALLOC_ENABLE_ZERO_CHECK 1
87 #define ZONE_ENABLE_LOGGING 0
88 #endif
89
90 /*!
91 * @file <kern/zalloc_internal.h>
92 *
93 * @abstract
94 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
95 * kalloc subsystems.
96 */
97
98 __BEGIN_DECLS
99
100 #pragma GCC visibility push(hidden)
101
102 /*
103 * A zone is a collection of fixed size blocks for which there
104 * is fast allocation/deallocation access. Kernel routines can
105 * use zones to manage data structures dynamically, creating a zone
106 * for each type of data structure to be managed.
107 *
108 */
109
110 /*!
111 * @typedef zone_pva_t
112 *
113 * @brief
114 * Type used to point to a page virtual address in the zone allocator.
115 *
116 * @description
117 * - Valid pages have the top bit set.
118 * - 0 represents the "NULL" page
119 * - non 0 values with the top bit cleared represent queue heads,
120 * indexed from the beginning of the __DATA section of the kernel.
121 * (see zone_pageq_base).
122 */
123 typedef struct zone_packed_virtual_address {
124 uint32_t packed_address;
125 } zone_pva_t;
126
127 /*!
128 * @struct zone_stats
129 *
130 * @abstract
131 * Per-cpu structure used for basic zone stats.
132 *
133 * @discussion
134 * The values aren't scaled for per-cpu zones.
135 */
136 struct zone_stats {
137 uint64_t zs_mem_allocated;
138 uint64_t zs_mem_freed;
139 uint32_t zs_alloc_rr; /* allocation rr bias */
140 };
141
142 STAILQ_HEAD(zone_depot, zone_magazine);
143
144 /* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
145 #define Z_MAGIC_QUO(s) (((1ull << 32) - 1) / (uint64_t)(s) + 1)
146 #define Z_MAGIC_ALIGNED(s) (~0u / (uint32_t)(s) + 1)
147
148 /*
149 * Returns (offs / size) if offs is small enough
150 * and magic = Z_MAGIC_QUO(size)
151 */
152 static inline uint32_t
Z_FAST_QUO(uint64_t offs,uint64_t magic)153 Z_FAST_QUO(uint64_t offs, uint64_t magic)
154 {
155 return (offs * magic) >> 32;
156 }
157
158 /*
159 * Returns (offs % size) if offs is small enough
160 * and magic = Z_MAGIC_QUO(size)
161 */
162 static inline uint32_t
Z_FAST_MOD(uint64_t offs,uint64_t magic,uint64_t size)163 Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
164 {
165 uint32_t lowbits = (uint32_t)(offs * magic);
166
167 return (lowbits * size) >> 32;
168 }
169
170 /*
171 * Returns whether (offs % size) == 0 if offs is small enough
172 * and magic = Z_MAGIC_ALIGNED(size)
173 */
174 static inline bool
Z_FAST_ALIGNED(uint64_t offs,uint32_t magic)175 Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
176 {
177 return (uint32_t)(offs * magic) < magic;
178 }
179
180 struct zone_size_params {
181 uint32_t z_align_magic; /* magic to use with Z_FAST_ALIGNED() */
182 uint32_t z_elem_size; /* size of an element */
183 };
184
185 struct zone_expand {
186 struct zone_expand *ze_next;
187 thread_t ze_thread;
188 bool ze_pg_wait;
189 bool ze_vm_priv;
190 bool ze_clear_priv;
191 };
192
193 struct zone {
194 /*
195 * Readonly / rarely written fields
196 */
197
198 /*
199 * The first 4 fields match a zone_view.
200 *
201 * z_self points back to the zone when the zone is initialized,
202 * or is NULL else.
203 */
204 struct zone *z_self;
205 zone_stats_t z_stats;
206 const char *z_name;
207 struct zone_view *z_views;
208
209 struct zone_expand *z_expander;
210 struct zone_cache *__zpercpu z_pcpu_cache;
211
212 uint64_t z_quo_magic;
213 uint32_t z_align_magic;
214 uint16_t z_elem_size;
215 uint16_t z_elem_offs;
216 uint16_t z_chunk_pages;
217 uint16_t z_chunk_elems;
218
219 uint32_t /* 32 bits */
220 /*
221 * Lifecycle state (Mutable after creation)
222 */
223 z_destroyed :1, /* zone is (being) destroyed */
224 z_async_refilling :1, /* asynchronous allocation pending? */
225 z_expanding_wait :1, /* is thread waiting for expansion? */
226
227 /*
228 * Behavior configuration bits
229 */
230 z_percpu :1, /* the zone is percpu */
231 z_permanent :1, /* the zone allocations are permanent */
232 z_nocaching :1, /* disallow zone caching for this zone */
233 collectable :1, /* garbage collect empty pages */
234 exhaustible :1, /* merely return if empty? */
235 expandable :1, /* expand zone (with message)? */
236 no_callout :1,
237 z_destructible :1, /* zone can be zdestroy()ed */
238
239 _reserved :6,
240
241 /*
242 * Debugging features
243 */
244 alignment_required :1, /* element alignment needs to be preserved */
245 z_pgz_tracked :1, /* this zone is tracked by pgzalloc */
246 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
247 kasan_fakestacks :1,
248 kasan_noquarantine :1, /* whether to use the kasan quarantine */
249 z_tags_sizeclass :6, /* idx into zone_tags_sizeclasses to associate
250 * sizeclass for a particualr kalloc tag */
251 z_uses_tags :1,
252 z_tags_inline :1,
253 z_log_on :1, /* zone logging was enabled by boot-arg */
254 z_tbi_tag :1; /* Zone supports tbi tagging */
255
256 /*
257 * often mutated fields
258 */
259
260 lck_ticket_t z_lock;
261 struct zone_depot z_recirc;
262
263 /*
264 * Page accounting (wired / VA)
265 *
266 * Those numbers are unscaled for z_percpu zones
267 * (zone_scale_for_percpu() needs to be used to find the true value).
268 */
269 uint32_t z_wired_max; /* how large can this zone grow */
270 uint32_t z_wired_hwm; /* z_wired_cur high watermark */
271 uint32_t z_wired_cur; /* number of pages used by this zone */
272 uint32_t z_wired_empty; /* pages collectable by GC */
273 uint32_t z_va_cur; /* amount of VA used by this zone */
274
275 /*
276 * list of metadata structs, which maintain per-page free element lists
277 */
278 zone_pva_t z_pageq_empty; /* populated, completely empty pages */
279 zone_pva_t z_pageq_partial;/* populated, partially filled pages */
280 zone_pva_t z_pageq_full; /* populated, completely full pages */
281 zone_pva_t z_pageq_va; /* non-populated VA pages */
282
283 /*
284 * Zone statistics
285 *
286 * z_contention_wma:
287 * weighted moving average of the number of contentions per second,
288 * in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
289 *
290 * z_contention_cur:
291 * count of recorded contentions that will be fused in z_contention_wma
292 * at the next period.
293 *
294 * z_recirc_cur:
295 * number of magazines in the recirculation depot.
296 *
297 * z_elems_free:
298 * number of free elements in the zone.
299 *
300 * z_elems_{min,max}:
301 * tracks the low/high watermark of z_elems_free for the current
302 * weighted moving average period.
303 *
304 * z_elems_free_wss:
305 * weighted moving average of the (z_elems_free_max - z_elems_free_min)
306 * amplited which is used by the GC for trim operations.
307 *
308 * z_elems_avail:
309 * number of elements in the zone (at all).
310 */
311 #define Z_CONTENTION_WMA_UNIT (1u << 8)
312 uint32_t z_contention_wma;
313 uint32_t z_contention_cur;
314 uint32_t z_recirc_cur;
315 uint32_t z_elems_free_max;
316 uint32_t z_elems_free_wss;
317 uint32_t z_elems_free_min;
318 uint32_t z_elems_free; /* Number of free elements */
319 uint32_t z_elems_avail; /* Number of elements available */
320 uint32_t z_elems_rsv;
321 uint32_t z_array_size_class;
322
323 #if KASAN_ZALLOC
324 uint32_t z_kasan_redzone;
325 spl_t z_kasan_spl;
326 #endif
327 #if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
328 /*
329 * the allocation logs are used when:
330 *
331 * - zlog<n>= boot-args are used (and then z_log_on is set)
332 *
333 * - the leak detection was triggered for the zone.
334 * In that case, the log can't ever be freed,
335 * but it can be enabled/disabled dynamically.
336 */
337 struct btlog *z_btlog;
338 struct btlog *z_btlog_disabled;
339 #endif
340 #if KASAN_TBI
341 struct btlog *z_btlog_kasan;
342 #endif /* KASAN_TBI */
343 struct zone *z_kt_next;
344 };
345
346 /*!
347 * @typedef zone_security_flags_t
348 *
349 * @brief
350 * Type used to store the immutable security properties of a zone.
351 *
352 * @description
353 * These properties influence the security nature of a zone and can't be
354 * modified after lockdown.
355 */
356 typedef struct zone_security_flags {
357 uint16_t
358 /*
359 * Security sensitive configuration bits
360 */
361 z_submap_idx :8, /* a Z_SUBMAP_IDX_* value */
362 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
363 z_submap_from_end :1, /* allocate from the left or the right ? */
364 z_kheap_id :3, /* zone_kheap_id_t when part of a kalloc heap */
365 z_noencrypt :1, /* do not encrypt pages when hibernating */
366 z_va_sequester :1, /* page sequester: no VA reuse with other zones */
367 z_kalloc_type :1; /* zones that does types based seggregation */
368 } zone_security_flags_t;
369
370
371 /*
372 * Zsecurity config to enable sequestering VA of zones
373 */
374 #if KASAN_ZALLOC || !defined(__LP64__)
375 # define ZSECURITY_CONFIG_SEQUESTER OFF
376 #else
377 # define ZSECURITY_CONFIG_SEQUESTER ON
378 #endif
379
380 /*
381 * Zsecurity config to enable creating separate kalloc zones for
382 * bags of bytes
383 */
384 #if KASAN_ZALLOC || !defined(__LP64__)
385 # define ZSECURITY_CONFIG_SUBMAP_USER_DATA OFF
386 #else
387 # define ZSECURITY_CONFIG_SUBMAP_USER_DATA ON
388 #endif
389
390 /*
391 * Zsecurity config to enable strict free of iokit objects to zone
392 * or heap they were allocated from.
393 *
394 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
395 * not break third party kexts that haven't yet been recompiled
396 * to use the new iokit macros.
397 */
398 #if XNU_PLATFORM_MacOSX && __x86_64__
399 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE OFF
400 #else
401 # define ZSECURITY_CONFIG_STRICT_IOKIT_FREE ON
402 #endif
403
404 /*
405 * Zsecurity config to enable the read-only allocator
406 */
407 #if KASAN_ZALLOC || !defined(__LP64__)
408 # define ZSECURITY_CONFIG_READ_ONLY OFF
409 #else
410 # define ZSECURITY_CONFIG_READ_ONLY ON
411 #endif
412
413 /*
414 * Zsecurity config to enable making heap feng-shui
415 * less reliable.
416 */
417 #if KASAN_ZALLOC || !defined(__LP64__)
418 # define ZSECURITY_CONFIG_SAD_FENG_SHUI OFF
419 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 1
420 #else
421 # define ZSECURITY_CONFIG_SAD_FENG_SHUI ON
422 # define ZSECURITY_CONFIG_GENERAL_SUBMAPS 4
423 #endif
424
425 /*
426 * Zsecurity config to enable adjusting of elements
427 * with PGZ-OOB to right-align them in their space.
428 */
429 #if KASAN || defined(__x86_64__) || !defined(__LP64__)
430 # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST OFF
431 #else
432 # define ZSECURITY_CONFIG_PGZ_OOB_ADJUST ON
433 #endif
434
435 /*
436 * Zsecurity config to enable kalloc type segregation
437 */
438 #if KASAN_ZALLOC || !defined(__LP64__)
439 # define ZSECURITY_CONFIG_KALLOC_TYPE OFF
440 # define ZSECURITY_CONFIG_KT_BUDGET 0
441 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 0
442 #else
443 # define ZSECURITY_CONFIG_KALLOC_TYPE ON
444 #if XNU_TARGET_OS_WATCH
445 # define ZSECURITY_CONFIG_KT_BUDGET 85
446 #else
447 # define ZSECURITY_CONFIG_KT_BUDGET 200
448 #endif
449 # define ZSECURITY_CONFIG_KT_VAR_BUDGET 3
450 #endif
451
452
453 __options_decl(kalloc_type_options_t, uint64_t, {
454 /*
455 * kalloc type option to switch default accounting to private.
456 */
457 KT_OPTIONS_ACCT = 0x00000001,
458 /*
459 * kalloc type option to print additional stats regarding zone
460 * budget distribution and signatures.
461 */
462 KT_OPTIONS_DEBUG = 0x00000002,
463 /*
464 * kalloc type option to allow loose freeing between heaps
465 */
466 KT_OPTIONS_LOOSE_FREE = 0x00000004,
467 });
468
469 __enum_decl(kt_var_heap_id_t, uint32_t, {
470 /*
471 * Fake "data" heap used to link views of data-only allocation that
472 * have been redirected to KHEAP_DATA_BUFFERS
473 */
474 KT_VAR_DATA_HEAP,
475 /*
476 * Heap for pointer arrays
477 */
478 KT_VAR_PTR_HEAP,
479 /*
480 * Indicating first additional heap added
481 */
482 KT_VAR__FIRST_FLEXIBLE_HEAP,
483 });
484
485 /*
486 * Zone submap indices
487 *
488 * Z_SUBMAP_IDX_VM
489 * this map has the special property that its allocations
490 * can be done without ever locking the submap, and doesn't use
491 * VM entries in the map (which limits certain VM map operations on it).
492 *
493 * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
494 *
495 * On LP64 it is also used to restrict VM allocations on LP64 lower
496 * in the kernel VA space, for pointer packing purposes.
497 *
498 * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
499 * used for unrestricted allocations
500 *
501 * Z_SUBMAP_IDX_DATA
502 * used to sequester bags of bytes from all other allocations and allow VA reuse
503 * within the map
504 *
505 * Z_SUBMAP_IDX_READ_ONLY
506 * used for the read-only allocator
507 */
508 __enum_decl(zone_submap_idx_t, uint32_t, {
509 Z_SUBMAP_IDX_VM,
510 Z_SUBMAP_IDX_READ_ONLY,
511 Z_SUBMAP_IDX_GENERAL_0,
512 #if ZSECURITY_CONFIG(SAD_FENG_SHUI)
513 Z_SUBMAP_IDX_GENERAL_1,
514 Z_SUBMAP_IDX_GENERAL_2,
515 Z_SUBMAP_IDX_GENERAL_3,
516 #endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
517 Z_SUBMAP_IDX_DATA,
518
519 Z_SUBMAP_IDX_COUNT,
520 });
521
522 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
523
524 /*
525 * Variable kalloc_type heap config
526 */
527 struct kheap_info {
528 zone_id_t kh_zstart;
529 union {
530 kalloc_heap_t kh_views;
531 kalloc_type_var_view_t kt_views;
532 };
533 };
534 typedef union kalloc_type_views {
535 struct kalloc_type_view *ktv_fixed;
536 struct kalloc_type_var_view *ktv_var;
537 } kalloc_type_views_t;
538
539 #define KT_VAR_MAX_HEAPS 8
540 #define MAX_ZONES 650
541 extern struct kheap_info kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
542 extern zone_id_t _Atomic num_zones;
543 extern uint32_t zone_view_count;
544 extern struct zone zone_array[MAX_ZONES];
545 extern struct zone_size_params zone_ro_size_params[ZONE_ID__LAST_RO + 1];
546 extern zone_security_flags_t zone_security_array[];
547 extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
548 extern mach_memory_info_t *panic_kext_memory_info;
549 extern vm_size_t panic_kext_memory_size;
550 extern vm_offset_t panic_fault_address;
551 extern vm_map_size_t zone_map_size;
552
553 #define zone_index_foreach(i) \
554 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
555 i < num_zones_##i; i++)
556
557 #define zone_foreach(z) \
558 for (zone_t z = &zone_array[1], \
559 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
560 z < last_zone_##z; z++)
561
562 __abortlike
563 extern void zone_invalid_panic(zone_t zone);
564
565 __pure2
566 static inline zone_t
zone_by_id(size_t zid)567 zone_by_id(size_t zid)
568 {
569 return (zone_t)((uintptr_t)zone_array + zid * sizeof(struct zone));
570 }
571
572 __pure2
573 static inline zone_id_t
zone_index(zone_t z)574 zone_index(zone_t z)
575 {
576 unsigned long delta;
577 uint64_t quo;
578
579 delta = (unsigned long)z - (unsigned long)zone_array;
580 if (delta >= MAX_ZONES * sizeof(*z)) {
581 zone_invalid_panic(z);
582 }
583 quo = Z_FAST_QUO(delta, Z_MAGIC_QUO(sizeof(*z)));
584 __builtin_assume(quo < MAX_ZONES);
585 return (zone_id_t)quo;
586 }
587
588 __pure2
589 static inline bool
zone_is_ro(zone_t zone)590 zone_is_ro(zone_t zone)
591 {
592 return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
593 zone <= &zone_array[ZONE_ID__LAST_RO];
594 }
595
596 static inline bool
zone_addr_size_crosses_page(mach_vm_address_t addr,mach_vm_size_t size)597 zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
598 {
599 return atop(addr ^ (addr + size - 1)) != 0;
600 }
601
602 __pure2
603 static inline uint16_t
zone_elem_offs(zone_t zone)604 zone_elem_offs(zone_t zone)
605 {
606 return zone->z_elem_offs;
607 }
608
609 __pure2
610 static inline vm_offset_t
zone_elem_size(zone_t zone)611 zone_elem_size(zone_t zone)
612 {
613 return zone->z_elem_size;
614 }
615
616 __pure2
617 static inline zone_security_flags_t
zone_security_config(zone_t z)618 zone_security_config(zone_t z)
619 {
620 zone_id_t zid = zone_index(z);
621 return zone_security_array[zid];
622 }
623
624 static inline uint32_t
zone_count_allocated(zone_t zone)625 zone_count_allocated(zone_t zone)
626 {
627 return zone->z_elems_avail - zone->z_elems_free;
628 }
629
630 static inline vm_size_t
zone_scale_for_percpu(zone_t zone,vm_size_t size)631 zone_scale_for_percpu(zone_t zone, vm_size_t size)
632 {
633 if (zone->z_percpu) {
634 size *= zpercpu_count();
635 }
636 return size;
637 }
638
639 static inline vm_size_t
zone_size_wired(zone_t zone)640 zone_size_wired(zone_t zone)
641 {
642 /*
643 * this either require the zone lock,
644 * or to be used for statistics purposes only.
645 */
646 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
647 return zone_scale_for_percpu(zone, size);
648 }
649
650 static inline vm_size_t
zone_size_free(zone_t zone)651 zone_size_free(zone_t zone)
652 {
653 return zone_scale_for_percpu(zone,
654 (vm_size_t)zone->z_elem_size * zone->z_elems_free);
655 }
656
657 /* Under KASAN builds, this also accounts for quarantined elements. */
658 static inline vm_size_t
zone_size_allocated(zone_t zone)659 zone_size_allocated(zone_t zone)
660 {
661 return zone_scale_for_percpu(zone,
662 (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
663 }
664
665 static inline vm_size_t
zone_size_wasted(zone_t zone)666 zone_size_wasted(zone_t zone)
667 {
668 return zone_size_wired(zone) - zone_scale_for_percpu(zone,
669 (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
670 }
671
672 /*
673 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
674 * userspace reboot is needed. The only other way to query for this information
675 * is via mach_memory_info() which is unavailable on release kernels.
676 */
677 extern uint64_t get_zones_collectable_bytes(void);
678
679 /*!
680 * @enum zone_gc_level_t
681 *
682 * @const ZONE_GC_TRIM
683 * Request a trimming GC: it will trim allocations in excess
684 * of the working set size estimate only.
685 *
686 * @const ZONE_GC_DRAIN
687 * Request a draining GC: this is an aggressive mode that will
688 * cause all caches to be drained and all free pages returned to the system.
689 *
690 * @const ZONE_GC_JETSAM
691 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
692 * @c ZONE_GC_DRAIN depending on the state of the zone map.
693 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
694 * request a @c ZONE_GC_JETSAM level.
695 */
696 __enum_closed_decl(zone_gc_level_t, uint32_t, {
697 ZONE_GC_TRIM,
698 ZONE_GC_DRAIN,
699 ZONE_GC_JETSAM,
700 });
701
702 /*!
703 * @function zone_gc
704 *
705 * @brief
706 * Reduces memory used by zones by trimming caches and freelists.
707 *
708 * @discussion
709 * @c zone_gc() is called:
710 * - by the pageout daemon when the system needs more free pages.
711 * - by the VM when contiguous page allocation requests get stuck
712 * (see vm_page_find_contiguous()).
713 *
714 * @param level The zone GC level requested.
715 */
716 extern void zone_gc(zone_gc_level_t level);
717
718 extern void zone_gc_trim(void);
719 extern void zone_gc_drain(void);
720
721 #define ZONE_WSS_UPDATE_PERIOD 10
722 /*!
723 * @function compute_zone_working_set_size
724 *
725 * @brief
726 * Recomputes the working set size for every zone
727 *
728 * @discussion
729 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
730 * computing an exponential moving average with a weight of 75%,
731 * so that the history of the last minute is the dominating factor.
732 */
733 extern void compute_zone_working_set_size(void *);
734
735 /* Debug logging for zone-map-exhaustion jetsams. */
736 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
737 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
738
739 /* Bootstrap zone module (create zone zone) */
740 extern void zone_bootstrap(void);
741
742 /*!
743 * @function zone_early_mem_init
744 *
745 * @brief
746 * Steal memory from pmap (prior to initialization of zalloc)
747 * for the special vm zones that allow bootstrap memory and store
748 * the range so as to facilitate range checking in zfree.
749 *
750 * @param size the size to steal (must be a page multiple)
751 */
752 __startup_func
753 extern vm_offset_t zone_early_mem_init(
754 vm_size_t size);
755
756 /*!
757 * @function zone_get_early_alloc_size
758 *
759 * @brief
760 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
761 * of the allocation granule for the zone with the given creation flags and
762 * element size.
763 */
764 __startup_func
765 extern vm_size_t zone_get_early_alloc_size(
766 const char *name __unused,
767 vm_size_t elem_size,
768 zone_create_flags_t flags,
769 vm_size_t min_elems);
770
771 /*!
772 * @function zone_cram_early
773 *
774 * @brief
775 * Cram memory allocated with @c zone_early_mem_init() into a zone.
776 *
777 * @param zone The zone to cram memory into.
778 * @param newmem The base address for the memory to cram.
779 * @param size The size of the memory to cram into the zone.
780 */
781 __startup_func
782 extern void zone_cram_early(
783 zone_t zone,
784 vm_offset_t newmem,
785 vm_size_t size);
786
787 extern bool zone_maps_owned(
788 vm_address_t addr,
789 vm_size_t size);
790
791 #if KASAN_LIGHT
792 extern bool kasan_zone_maps_owned(
793 vm_address_t addr,
794 vm_size_t size);
795 #endif /* KASAN_LIGHT */
796
797 extern void zone_map_sizes(
798 vm_map_size_t *psize,
799 vm_map_size_t *pfree,
800 vm_map_size_t *plargest_free);
801
802 extern bool
803 zone_map_nearing_exhaustion(void);
804
805 #if defined(__LP64__)
806 #define ZONE_POISON 0xdeadbeefdeadbeef
807 #else
808 #define ZONE_POISON 0xdeadbeef
809 #endif
810
811 static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)812 zalloc_flags_get_tag(zalloc_flags_t flags)
813 {
814 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
815 }
816
817 extern struct kalloc_result zalloc_ext(
818 zone_t zone,
819 zone_stats_t zstats,
820 zalloc_flags_t flags);
821
822 extern void zfree_ext(
823 zone_t zone,
824 zone_stats_t zstats,
825 void *addr,
826 vm_size_t elem_size);
827
828 extern zone_id_t zone_id_for_element(
829 void *addr,
830 vm_size_t esize);
831
832 #if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
833 extern void *zone_element_pgz_oob_adjust(
834 void *addr,
835 vm_size_t req_size,
836 vm_size_t elem_size);
837 #endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
838
839 extern void zone_element_bounds_check(
840 vm_address_t addr,
841 vm_size_t len);
842
843 extern vm_size_t zone_element_size(
844 void *addr,
845 zone_t *z,
846 bool clear_oob,
847 vm_offset_t *oob_offs);
848
849 /*!
850 * @function zone_spans_ro_va
851 *
852 * @abstract
853 * This function is used to check whether the specified address range
854 * spans through the read-only zone range.
855 *
856 * @discussion
857 * This only checks for the range specified within ZONE_ADDR_READONLY.
858 * The parameters addr_start and addr_end are stripped off of PAC bits
859 * before the check is made.
860 */
861 extern bool zone_spans_ro_va(
862 vm_offset_t addr_start,
863 vm_offset_t addr_end);
864
865 /*!
866 * @function __zalloc_ro_mut_atomic
867 *
868 * @abstract
869 * This function is called from the pmap to perform the specified atomic
870 * operation on memory from the read-only allocator.
871 *
872 * @discussion
873 * This function is for internal use only and should not be called directly.
874 */
875 static inline uint64_t
__zalloc_ro_mut_atomic(vm_offset_t dst,zro_atomic_op_t op,uint64_t value)876 __zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
877 {
878 #define __ZALLOC_RO_MUT_OP(op, op2) \
879 case ZRO_ATOMIC_##op##_8: \
880 return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
881 case ZRO_ATOMIC_##op##_16: \
882 return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
883 case ZRO_ATOMIC_##op##_32: \
884 return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
885 case ZRO_ATOMIC_##op##_64: \
886 return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
887
888 switch (op) {
889 __ZALLOC_RO_MUT_OP(OR, or_orig);
890 __ZALLOC_RO_MUT_OP(XOR, xor_orig);
891 __ZALLOC_RO_MUT_OP(AND, and_orig);
892 __ZALLOC_RO_MUT_OP(ADD, add_orig);
893 __ZALLOC_RO_MUT_OP(XCHG, xchg);
894 default:
895 panic("%s: Invalid atomic operation: %d", __func__, op);
896 }
897
898 #undef __ZALLOC_RO_MUT_OP
899 }
900
901 /*!
902 * @function zone_owns
903 *
904 * @abstract
905 * This function is a soft version of zone_require that checks if a given
906 * pointer belongs to the specified zone and should not be used outside
907 * allocator code.
908 *
909 * @discussion
910 * Note that zone_owns() can only work with:
911 * - zones not allowing foreign memory
912 * - zones in the general submap.
913 *
914 * @param zone the zone the address needs to belong to.
915 * @param addr the element address to check.
916 */
917 extern bool zone_owns(
918 zone_t zone,
919 void *addr);
920
921 /**!
922 * @function zone_submap
923 *
924 * @param zsflags the security flags of a specified zone.
925 * @returns the zone (sub)map this zone allocates from.
926 */
927 __pure2
928 extern vm_map_t zone_submap(
929 zone_security_flags_t zsflags);
930
931 /*
932 * Structure for keeping track of a backtrace, used for leak detection.
933 * This is in the .h file because it is used during panic, see kern/debug.c
934 * A non-zero size indicates that the trace is in use.
935 */
936 struct ztrace {
937 vm_size_t zt_size; /* How much memory are all the allocations referring to this trace taking up? */
938 uint32_t zt_depth; /* depth of stack (0 to MAX_ZTRACE_DEPTH) */
939 void* zt_stack[MAX_ZTRACE_DEPTH]; /* series of return addresses from OSBacktrace */
940 uint32_t zt_collisions; /* How many times did a different stack land here while it was occupied? */
941 uint32_t zt_hit_count; /* for determining effectiveness of hash function */
942 };
943
944 #ifndef VM_TAG_SIZECLASSES
945 #error MAX_TAG_ZONES
946 #endif
947 #if VM_TAG_SIZECLASSES
948
949 extern uint16_t zone_index_from_tag_index(
950 uint32_t tag_zone_index);
951
952 #endif /* VM_TAG_SIZECLASSES */
953
954 extern lck_grp_t zone_locks_grp;
955
956 static inline void
zone_lock(zone_t zone)957 zone_lock(zone_t zone)
958 {
959 #if KASAN_ZALLOC
960 spl_t s = 0;
961 if (zone->kasan_fakestacks) {
962 s = splsched();
963 }
964 #endif /* KASAN_ZALLOC */
965 lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
966 #if KASAN_ZALLOC
967 zone->z_kasan_spl = s;
968 #endif /* KASAN_ZALLOC */
969 }
970
971 static inline void
zone_unlock(zone_t zone)972 zone_unlock(zone_t zone)
973 {
974 #if KASAN_ZALLOC
975 spl_t s = zone->z_kasan_spl;
976 zone->z_kasan_spl = 0;
977 #endif /* KASAN_ZALLOC */
978 lck_ticket_unlock(&zone->z_lock);
979 #if KASAN_ZALLOC
980 if (zone->kasan_fakestacks) {
981 splx(s);
982 }
983 #endif /* KASAN_ZALLOC */
984 }
985
986 #define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
987
988 int track_this_zone(const char *zonename, const char *logname);
989 extern bool panic_include_kalloc_types;
990 extern zone_t kalloc_type_src_zone;
991 extern zone_t kalloc_type_dst_zone;
992
993 #if DEBUG || DEVELOPMENT
994 extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
995 #endif /* DEBUG || DEVELOPMENT */
996
997 #pragma GCC visibility pop
998
999 __END_DECLS
1000
1001 #endif /* _KERN_ZALLOC_INTERNAL_H_ */
1002