xref: /xnu-11417.140.69/osfmk/vm/vm_page.h (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4) !
1 /*
2  * Copyright (c) 2000-2020 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_page.h
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Resident memory system definitions.
64  */
65 
66 #ifndef _VM_VM_PAGE_H_
67 #define _VM_VM_PAGE_H_
68 
69 #include <debug.h>
70 #include <stdbool.h>
71 #include <vm/vm_options.h>
72 #include <vm/vm_protos.h>
73 #include <vm/vm_far.h>
74 #include <mach/boolean.h>
75 #include <mach/vm_prot.h>
76 #include <mach/vm_param.h>
77 #include <mach/memory_object_types.h> /* for VMP_CS_BITS... */
78 #include <kern/thread.h>
79 #include <kern/queue.h>
80 #include <kern/locks.h>
81 #include <sys/kern_memorystatus_xnu.h>
82 
83 #if __x86_64__
84 #define XNU_VM_HAS_DELAYED_PAGES        1
85 #define XNU_VM_HAS_LOPAGE               1
86 #define XNU_VM_HAS_LINEAR_PAGES_ARRAY   0
87 #else
88 #define XNU_VM_HAS_DELAYED_PAGES        0
89 #define XNU_VM_HAS_LOPAGE               0
90 #define XNU_VM_HAS_LINEAR_PAGES_ARRAY   1
91 #endif
92 
93 
94 
95 /*
96  * in order to make the size of a vm_page_t 64 bytes (cache line size for both arm64 and x86_64)
97  * we'll keep the next_m pointer packed... as long as the kernel virtual space where we allocate
98  * vm_page_t's from doesn't span more then 256 Gbytes, we're safe.   There are live tests in the
99  * vm_page_t array allocation and the zone init code to determine if we can safely pack and unpack
100  * pointers from the 2 ends of these spaces
101  */
102 typedef uint32_t        vm_page_packed_t;
103 
104 struct vm_page_packed_queue_entry {
105 	vm_page_packed_t        next;          /* next element */
106 	vm_page_packed_t        prev;          /* previous element */
107 };
108 
109 typedef struct vm_page_packed_queue_entry       *vm_page_queue_t;
110 typedef struct vm_page_packed_queue_entry       vm_page_queue_head_t;
111 typedef struct vm_page_packed_queue_entry       vm_page_queue_chain_t;
112 typedef struct vm_page_packed_queue_entry       *vm_page_queue_entry_t;
113 
114 typedef vm_page_packed_t                        vm_page_object_t;
115 
116 
117 /*
118  * vm_relocate_reason_t:
119  * A type to describe why a page relocation is being attempted.  Depending on
120  * the reason, certain pages may or may not be relocatable.
121  *
122  * VM_RELOCATE_REASON_CONTIGUOUS:
123  * The relocation is on behalf of the contiguous allocator; it is likely to be
124  * wired, so do not consider pages that cannot be wired for any reason.
125  */
126 __enum_closed_decl(vm_relocate_reason_t, unsigned int, {
127 	VM_RELOCATE_REASON_CONTIGUOUS,
128 
129 	VM_RELOCATE_REASON_COUNT,
130 });
131 
132 /*!
133  * @typedef vm_memory_class_t
134  *
135  * @abstract
136  * A type to describe what kind of memory a page represents.
137  *
138  * @const VM_MEMORY_CLASS_REGULAR
139  * Normal memory, which should participate in the normal page lifecycle.
140  *
141  * @const VM_MEMORY_CLASS_LOPAGE
142  * this exists to support hardware controllers
143  * incapable of generating DMAs with more than 32 bits
144  * of address on platforms with physical memory > 4G...
145  *
146  * @const VM_MEMORY_CLASS_SECLUDED
147  * Denotes memory must be put on the secluded queue,
148  * this is not returned by @c vm_page_get_memory_class().
149  */
150 __enum_closed_decl(vm_memory_class_t, uint8_t, {
151 	VM_MEMORY_CLASS_REGULAR,
152 #if XNU_VM_HAS_LOPAGE
153 	VM_MEMORY_CLASS_LOPAGE,
154 #endif /* XNU_VM_HAS_LOPAGE */
155 #if CONFIG_SECLUDED_MEMORY
156 	VM_MEMORY_CLASS_SECLUDED,
157 #endif
158 });
159 
160 /* pages of compressed data */
161 #define VM_PAGE_COMPRESSOR_COUNT os_atomic_load(&compressor_object->resident_page_count, relaxed)
162 
163 /*
164  *	Management of resident (logical) pages.
165  *
166  *	A small structure is kept for each resident
167  *	page, indexed by page number.  Each structure
168  *	is an element of several lists:
169  *
170  *		A hash table bucket used to quickly
171  *		perform object/offset lookups
172  *
173  *		A list of all pages for a given object,
174  *		so they can be quickly deactivated at
175  *		time of deallocation.
176  *
177  *		An ordered list of pages due for pageout.
178  *
179  *	In addition, the structure contains the object
180  *	and offset to which this page belongs (for pageout),
181  *	and sundry status bits.
182  *
183  *	Fields in this structure are locked either by the lock on the
184  *	object that the page belongs to (O) or by the lock on the page
185  *	queues (P).  [Some fields require that both locks be held to
186  *	change that field; holding either lock is sufficient to read.]
187  */
188 
189 #define VM_PAGE_NULL            ((vm_page_t) 0)
190 
191 __enum_closed_decl(vm_page_q_state_t, uint8_t, {
192 	VM_PAGE_NOT_ON_Q                = 0,    /* page is not present on any queue, nor is it wired... mainly a transient state */
193 	VM_PAGE_IS_WIRED                = 1,    /* page is currently wired */
194 	VM_PAGE_USED_BY_COMPRESSOR      = 2,    /* page is in use by the compressor to hold compressed data */
195 	VM_PAGE_ON_FREE_Q               = 3,    /* page is on the main free queue */
196 	VM_PAGE_ON_FREE_LOCAL_Q         = 4,    /* page is on one of the per-CPU free queues */
197 #if XNU_VM_HAS_LOPAGE
198 	VM_PAGE_ON_FREE_LOPAGE_Q        = 5,    /* page is on the lopage pool free list */
199 #endif /* XNU_VM_HAS_LOPAGE */
200 #if CONFIG_SECLUDED_MEMORY
201 	VM_PAGE_ON_SECLUDED_Q           = 5,    /* page is on secluded queue */
202 #endif /* CONFIG_SECLUDED_MEMORY */
203 	VM_PAGE_ON_THROTTLED_Q          = 6,    /* page is on the throttled queue... we stash anonymous pages here when not paging */
204 	VM_PAGE_ON_PAGEOUT_Q            = 7,    /* page is on one of the pageout queues (internal/external) awaiting processing */
205 	VM_PAGE_ON_SPECULATIVE_Q        = 8,    /* page is on one of the speculative queues */
206 	VM_PAGE_ON_ACTIVE_LOCAL_Q       = 9,    /* page has recently been created and is being held in one of the per-CPU local queues */
207 	VM_PAGE_ON_ACTIVE_Q             = 10,   /* page is in global active queue */
208 	VM_PAGE_ON_INACTIVE_INTERNAL_Q  = 11,   /* page is on the inactive internal queue a.k.a.  anonymous queue */
209 	VM_PAGE_ON_INACTIVE_EXTERNAL_Q  = 12,   /* page in on the inactive external queue a.k.a.  file backed queue */
210 	VM_PAGE_ON_INACTIVE_CLEANED_Q   = 13,   /* page has been cleaned to a backing file and is ready to be stolen */
211 });
212 #define VM_PAGE_Q_STATE_LAST_VALID_VALUE  13    /* we currently use 4 bits for the state... don't let this go beyond 15 */
213 
214 __enum_closed_decl(vm_page_specialq_t, uint8_t, {
215 	VM_PAGE_SPECIAL_Q_EMPTY         = 0,
216 	VM_PAGE_SPECIAL_Q_BG            = 1,
217 	VM_PAGE_SPECIAL_Q_DONATE        = 2,
218 	VM_PAGE_SPECIAL_Q_FG            = 3,
219 });
220 
221 #define VM_PAGE_INACTIVE(m)                     bit_test(vm_page_inactive_states, (m)->vmp_q_state)
222 #define VM_PAGE_ACTIVE_OR_INACTIVE(m)           bit_test(vm_page_active_or_inactive_states, (m)->vmp_q_state)
223 #define VM_PAGE_NON_SPECULATIVE_PAGEABLE(m)     bit_test(vm_page_non_speculative_pageable_states, (m)->vmp_q_state)
224 #define VM_PAGE_PAGEABLE(m)                     bit_test(vm_page_pageable_states, (m)->vmp_q_state)
225 
226 extern const uint16_t vm_page_inactive_states;
227 extern const uint16_t vm_page_active_or_inactive_states;
228 extern const uint16_t vm_page_non_speculative_pageable_states;
229 extern const uint16_t vm_page_pageable_states;
230 
231 
232 /*
233  * The structure itself. See the block comment above for what (O) and (P) mean.
234  */
235 struct vm_page {
236 	union {
237 		vm_page_queue_chain_t   vmp_pageq;      /* queue info for FIFO queue or free list (P) */
238 		struct vm_page         *vmp_snext;
239 	};
240 	vm_page_queue_chain_t           vmp_specialq;   /* anonymous pages in the special queues (P) */
241 
242 	vm_page_queue_chain_t           vmp_listq;      /* all pages in same object (O) */
243 	vm_page_packed_t                vmp_next_m;     /* VP bucket link (O) */
244 
245 	vm_page_object_t                vmp_object;     /* which object am I in (O&P) */
246 	vm_object_offset_t              vmp_offset;     /* offset into that object (O,P) */
247 
248 
249 	/*
250 	 * Either the current page wire count,
251 	 * or the local queue id (if local queues are enabled).
252 	 *
253 	 * See the comments at 'vm_page_queues_remove'
254 	 * as to why this is safe to do.
255 	 */
256 	union {
257 		uint16_t                vmp_wire_count;
258 		uint16_t                vmp_local_id;
259 	};
260 
261 	/*
262 	 * The following word of flags used to be protected by the "page queues" lock.
263 	 * That's no longer true and what lock, if any, is needed may depend on the
264 	 * value of vmp_q_state.
265 	 *
266 	 * This bitfield is kept in its own struct to prevent coalescing
267 	 * with the next one (which C allows the compiler to do) as they
268 	 * are under different locking domains
269 	 */
270 	struct {
271 		vm_page_q_state_t       vmp_q_state:4;      /* which q is the page on (P) */
272 		vm_page_specialq_t      vmp_on_specialq:2;
273 		uint8_t                 vmp_lopage:1;
274 		uint8_t                 vmp_canonical:1;    /* this page is a canonical kernel page (immutable) */
275 	};
276 	struct {
277 		uint8_t                 vmp_gobbled:1;      /* page used internally (P) */
278 		uint8_t                 vmp_laundry:1;      /* page is being cleaned now (P)*/
279 		uint8_t                 vmp_no_cache:1;     /* page is not to be cached and should */
280 		                                            /* be reused ahead of other pages (P) */
281 		uint8_t                 vmp_reference:1;    /* page has been used (P) */
282 		uint8_t                 vmp_realtime:1;     /* page used by realtime thread (P) */
283 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
284 		uint8_t                 vmp_unmodified_ro:1;/* Tracks if an anonymous page is modified after a decompression (O&P).*/
285 #else
286 		uint8_t                 __vmp_reserved1:1;
287 #endif
288 		uint8_t                 __vmp_reserved2:1;
289 		uint8_t                 __vmp_reserved3:1;
290 	};
291 
292 	/*
293 	 * The following word of flags is protected by the "VM object" lock.
294 	 *
295 	 * IMPORTANT: the "vmp_pmapped", "vmp_xpmapped" and "vmp_clustered" bits can be modified while holding the
296 	 * VM object "shared" lock + the page lock provided through the pmap_lock_phys_page function.
297 	 * This is done in vm_fault_enter() and the CONSUME_CLUSTERED macro.
298 	 * It's also ok to modify them behind just the VM object "exclusive" lock.
299 	 */
300 	unsigned int    vmp_busy:1,           /* page is in transit (O) */
301 	    vmp_wanted:1,                     /* someone is waiting for page (O) */
302 	    vmp_tabled:1,                     /* page is in VP table (O) */
303 	    vmp_hashed:1,                     /* page is in vm_page_buckets[] (O) + the bucket lock */
304 	__vmp_unused : 1,
305 	vmp_clustered:1,                      /* page is not the faulted page (O) or (O-shared AND pmap_page) */
306 	    vmp_pmapped:1,                    /* page has at some time been entered into a pmap (O) or */
307 	                                      /* (O-shared AND pmap_page) */
308 	    vmp_xpmapped:1,                   /* page has been entered with execute permission (O) or */
309 	                                      /* (O-shared AND pmap_page) */
310 	    vmp_wpmapped:1,                   /* page has been entered at some point into a pmap for write (O) */
311 	    vmp_free_when_done:1,             /* page is to be freed once cleaning is completed (O) */
312 	    vmp_absent:1,                     /* Data has been requested, but is not yet available (O) */
313 	    vmp_error:1,                      /* Data manager was unable to provide data due to error (O) */
314 	    vmp_dirty:1,                      /* Page must be cleaned (O) */
315 	    vmp_cleaning:1,                   /* Page clean has begun (O) */
316 	    vmp_precious:1,                   /* Page is precious; data must be returned even if clean (O) */
317 	    vmp_overwriting:1,                /* Request to unlock has been made without having data. (O) */
318 	                                      /* [See vm_fault_page_overwrite] */
319 	    vmp_restart:1,                    /* Page was pushed higher in shadow chain by copy_call-related pagers */
320 	                                      /* start again at top of chain */
321 	    vmp_unusual:1,                    /* Page is absent, error, restart or page locked */
322 	    vmp_cs_validated:VMP_CS_BITS,     /* code-signing: page was checked */
323 	    vmp_cs_tainted:VMP_CS_BITS,       /* code-signing: page is tainted */
324 	    vmp_cs_nx:VMP_CS_BITS,            /* code-signing: page is nx */
325 	    vmp_reusable:1,
326 	    vmp_written_by_kernel:1;          /* page was written by kernel (i.e. decompressed) */
327 
328 #if !XNU_VM_HAS_LINEAR_PAGES_ARRAY
329 	/*
330 	 * Physical number of the page
331 	 *
332 	 * Setting this value to or away from vm_page_fictitious_addr
333 	 * must be done with (P) held
334 	 */
335 	ppnum_t                         vmp_phys_page;
336 #endif /* !XNU_VM_HAS_LINEAR_PAGES_ARRAY */
337 };
338 
339 /*!
340  * @var vm_pages
341  * The so called VM pages array
342  *
343  * @var vm_pages_end
344  * The pointer past the last valid page in the VM pages array.
345  *
346  * @var vm_pages_count
347  * The number of elements in the VM pages array.
348  * (vm_pages + vm_pages_count == vm_pages_end).
349  *
350  * @var vm_pages_first_pnum
351  * For linear page arrays, the pnum of the first page in the array.
352  * In other words VM_PAGE_GET_PHYS_PAGE(&vm_pages_array()[0]).
353  */
354 extern vm_page_t        vm_pages_end;
355 extern uint32_t         vm_pages_count;
356 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
357 extern ppnum_t          vm_pages_first_pnum;
358 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
359 
360 /**
361  * Internal accessor which returns the raw vm_pages pointer.
362  *
363  * This pointer must not be indexed directly. Use vm_page_get instead when
364  * indexing into the array.
365  *
366  * __pure2 helps explain to the compiler that the value vm_pages is a constant.
367  */
368 __pure2
369 static inline struct vm_page *
vm_pages_array_internal(void)370 vm_pages_array_internal(void)
371 {
372 	extern vm_page_t vm_pages;
373 	return vm_pages;
374 }
375 
376 /**
377  * Get a pointer to page at index i.
378  *
379  * This getter is the only legal way to index into the vm_pages array.
380  */
381 __pure2
382 static inline vm_page_t
vm_page_get(uint32_t i)383 vm_page_get(uint32_t i)
384 {
385 	return VM_FAR_ADD_PTR_UNBOUNDED(vm_pages_array_internal(), i);
386 }
387 
388 
389 __pure2
390 static inline bool
vm_page_in_array(const struct vm_page * m)391 vm_page_in_array(const struct vm_page *m)
392 {
393 	return vm_pages_array_internal() <= m && m < vm_pages_end;
394 }
395 
396 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
397 struct vm_page_with_ppnum {
398 	struct vm_page          vmp_page;
399 	ppnum_t                 vmp_phys_page;
400 };
401 
402 /*!
403  * @abstract
404  * Looks up the canonical kernel page for a given physical page number.
405  *
406  * @discussion
407  * This function may return VM_PAGE_NULL for kernel pages that aren't managed
408  * by the VM.
409  *
410  * @param pnum          The page number to lookup.  It must be within
411  *                      [pmap_first_pnum, vm_pages_first_pnum + vm_pages_count)
412  */
413 extern vm_page_t vm_page_find_canonical(ppnum_t pnum) __pure2;
414 
415 extern vm_page_t vm_pages_radix_next(uint32_t *cursor, ppnum_t *pnum);
416 
417 #define vm_pages_radix_for_each(mem) \
418 	for (uint32_t __index = 0; ((mem) = vm_pages_radix_next(&__index, NULL)); )
419 
420 #define vm_pages_radix_for_each_pnum(pnum) \
421 	for (uint32_t __index = 0; vm_pages_radix_next(&__index, &pnum); )
422 
423 #else
424 #define vm_page_with_ppnum vm_page
425 #endif /* !XNU_VM_HAS_LINEAR_PAGES_ARRAY */
426 typedef struct vm_page_with_ppnum *vm_page_with_ppnum_t;
427 
428 static inline ppnum_t
VM_PAGE_GET_PHYS_PAGE(const struct vm_page * m)429 VM_PAGE_GET_PHYS_PAGE(const struct vm_page *m)
430 {
431 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
432 	if (vm_page_in_array(m)) {
433 		uintptr_t index = (uintptr_t)(m - vm_pages_array_internal());
434 
435 		return (ppnum_t)(vm_pages_first_pnum + index);
436 	}
437 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
438 	return ((const struct vm_page_with_ppnum *)m)->vmp_phys_page;
439 }
440 
441 static inline void
VM_PAGE_INIT_PHYS_PAGE(struct vm_page * m,ppnum_t pnum)442 VM_PAGE_INIT_PHYS_PAGE(struct vm_page *m, ppnum_t pnum)
443 {
444 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
445 	if (vm_page_in_array(m)) {
446 		assert(pnum == VM_PAGE_GET_PHYS_PAGE(m));
447 		return;
448 	}
449 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
450 	((vm_page_with_ppnum_t)(m))->vmp_phys_page = pnum;
451 }
452 
453 static inline void
VM_PAGE_SET_PHYS_PAGE(struct vm_page * m,ppnum_t pnum)454 VM_PAGE_SET_PHYS_PAGE(struct vm_page *m, ppnum_t pnum)
455 {
456 	assert(!vm_page_in_array(m) && !m->vmp_canonical);
457 	((vm_page_with_ppnum_t)(m))->vmp_phys_page = pnum;
458 }
459 
460 #if defined(__x86_64__)
461 extern unsigned int     vm_clump_mask, vm_clump_shift;
462 #define VM_PAGE_GET_CLUMP_PNUM(pn)      ((pn) >> vm_clump_shift)
463 #define VM_PAGE_GET_CLUMP(m)            VM_PAGE_GET_CLUMP_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
464 #define VM_PAGE_GET_COLOR_PNUM(pn)      (VM_PAGE_GET_CLUMP_PNUM(pn) & vm_color_mask)
465 #define VM_PAGE_GET_COLOR(m)            VM_PAGE_GET_COLOR_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
466 #else
467 #define VM_PAGE_GET_COLOR_PNUM(pn)      ((pn) & vm_color_mask)
468 #define VM_PAGE_GET_COLOR(m)            VM_PAGE_GET_COLOR_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
469 #endif
470 
471 /*
472  * Parameters for pointer packing
473  *
474  *
475  * VM Pages pointers might point to:
476  *
477  * 1. VM_PAGE_PACKED_ALIGNED aligned kernel globals,
478  *
479  * 2. VM_PAGE_PACKED_ALIGNED aligned heap allocated vm pages
480  *
481  * 3. entries in the vm_pages array (whose entries aren't VM_PAGE_PACKED_ALIGNED
482  *    aligned).
483  *
484  *
485  * The current scheme uses 31 bits of storage and 6 bits of shift using the
486  * VM_PACK_POINTER() scheme for (1-2), and packs (3) as an index within the
487  * vm_pages array, setting the top bit (VM_PAGE_PACKED_FROM_ARRAY).
488  *
489  * This scheme gives us a reach of 128G from VM_MIN_KERNEL_AND_KEXT_ADDRESS.
490  */
491 #define VM_VPLQ_ALIGNMENT               128
492 #define VM_PAGE_PACKED_PTR_ALIGNMENT    64              /* must be a power of 2 */
493 #define VM_PAGE_PACKED_ALIGNED          __attribute__((aligned(VM_PAGE_PACKED_PTR_ALIGNMENT)))
494 #define VM_PAGE_PACKED_PTR_BITS         31
495 #define VM_PAGE_PACKED_PTR_SHIFT        6
496 #define VM_PAGE_PACKED_PTR_BASE         ((uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS)
497 
498 #define VM_PAGE_PACKED_FROM_ARRAY       0x80000000
499 
500 static inline vm_page_packed_t
vm_page_pack_ptr(uintptr_t p)501 vm_page_pack_ptr(uintptr_t p)
502 {
503 	if (vm_page_in_array(__unsafe_forge_single(vm_page_t, p))) {
504 		ptrdiff_t diff = (vm_page_t)p - vm_pages_array_internal();
505 		assert((vm_page_t)p == vm_page_get((uint32_t)diff));
506 		return (vm_page_packed_t)(diff | VM_PAGE_PACKED_FROM_ARRAY);
507 	}
508 
509 	VM_ASSERT_POINTER_PACKABLE(p, VM_PAGE_PACKED_PTR);
510 	vm_offset_t packed = VM_PACK_POINTER(p, VM_PAGE_PACKED_PTR);
511 	return CAST_DOWN_EXPLICIT(vm_page_packed_t, packed);
512 }
513 
514 
515 static inline uintptr_t
vm_page_unpack_ptr(uintptr_t p)516 vm_page_unpack_ptr(uintptr_t p)
517 {
518 	if (p >= VM_PAGE_PACKED_FROM_ARRAY) {
519 		p &= ~VM_PAGE_PACKED_FROM_ARRAY;
520 		assert(p < (uintptr_t)vm_pages_count);
521 		return (uintptr_t)vm_page_get((uint32_t)p);
522 	}
523 
524 	return VM_UNPACK_POINTER(p, VM_PAGE_PACKED_PTR);
525 }
526 
527 
528 #define VM_PAGE_PACK_PTR(p)     vm_page_pack_ptr((uintptr_t)(p))
529 #define VM_PAGE_UNPACK_PTR(p)   vm_page_unpack_ptr((uintptr_t)(p))
530 
531 #define VM_OBJECT_PACK(o)       ((vm_page_object_t)VM_PACK_POINTER((uintptr_t)(o), VM_PAGE_PACKED_PTR))
532 #define VM_OBJECT_UNPACK(p)     ((vm_object_t)VM_UNPACK_POINTER(p, VM_PAGE_PACKED_PTR))
533 
534 #define VM_PAGE_OBJECT(p)       VM_OBJECT_UNPACK((p)->vmp_object)
535 #define VM_PAGE_PACK_OBJECT(o)  VM_OBJECT_PACK(o)
536 
537 
538 #define VM_PAGE_ZERO_PAGEQ_ENTRY(p)     \
539 MACRO_BEGIN                             \
540 	(p)->vmp_snext = 0;             \
541 MACRO_END
542 
543 
544 #define VM_PAGE_CONVERT_TO_QUEUE_ENTRY(p)       VM_PAGE_PACK_PTR(p)
545 
546 
547 /*!
548  * @abstract
549  * The type for free queue heads that live in the kernel __DATA segment.
550  *
551  * @discussion
552  * This type must be used so that the queue is properly aligned
553  * for the VM Page packing to be able to represent pointers to this queue.
554  */
555 typedef struct vm_page_queue_free_head {
556 	vm_page_queue_head_t    qhead;
557 } VM_PAGE_PACKED_ALIGNED *vm_page_queue_free_head_t;
558 
559 /*
560  *	Macro:	vm_page_queue_init
561  *	Function:
562  *		Initialize the given queue.
563  *	Header:
564  *	void vm_page_queue_init(q)
565  *		vm_page_queue_t	q;	\* MODIFIED *\
566  */
567 #define vm_page_queue_init(q)               \
568 MACRO_BEGIN                                 \
569 	VM_ASSERT_POINTER_PACKABLE((vm_offset_t)(q), VM_PAGE_PACKED_PTR); \
570 	(q)->next = VM_PAGE_PACK_PTR(q);        \
571 	(q)->prev = VM_PAGE_PACK_PTR(q);        \
572 MACRO_END
573 
574 
575 /*
576  * Macro: vm_page_queue_enter
577  * Function:
578  *     Insert a new element at the tail of the vm_page queue.
579  * Header:
580  *     void vm_page_queue_enter(q, elt, field)
581  *         queue_t q;
582  *         vm_page_t elt;
583  *         <field> is the list field in vm_page_t
584  *
585  * This macro's arguments have to match the generic "queue_enter()" macro which is
586  * what is used for this on 32 bit kernels.
587  */
588 #define vm_page_queue_enter(head, elt, field)                       \
589 MACRO_BEGIN                                                         \
590 	vm_page_packed_t __pck_elt = VM_PAGE_PACK_PTR(elt);         \
591 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);       \
592 	vm_page_packed_t __pck_prev = (head)->prev;                 \
593                                                                     \
594 	if (__pck_head == __pck_prev) {                             \
595 	        (head)->next = __pck_elt;                           \
596 	} else {                                                    \
597 	        vm_page_t __prev;                                   \
598 	        __prev = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_prev); \
599 	        __prev->field.next = __pck_elt;                     \
600 	}                                                           \
601 	(elt)->field.prev = __pck_prev;                             \
602 	(elt)->field.next = __pck_head;                             \
603 	(head)->prev = __pck_elt;                                   \
604 MACRO_END
605 
606 
607 #if defined(__x86_64__)
608 /*
609  * These are helper macros for vm_page_queue_enter_clump to assist
610  * with conditional compilation (release / debug / development)
611  */
612 #if DEVELOPMENT || DEBUG
613 
614 #define __DEBUG_CHECK_BUDDIES(__prev, __p, field)                                             \
615 MACRO_BEGIN                                                                                   \
616 	if (__prev != NULL) {                                                                 \
617 	        assert(__p == (vm_page_t)VM_PAGE_UNPACK_PTR(__prev->next));                   \
618 	        assert(__prev == (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(__p->field.prev)); \
619 	}                                                                                     \
620 MACRO_END
621 
622 #define __DEBUG_VERIFY_LINKS(__first, __n_free, __last_next)                    \
623 MACRO_BEGIN                                                                     \
624 	unsigned int __i;                                                       \
625 	vm_page_queue_entry_t __tmp;                                            \
626 	for (__i = 0, __tmp = __first; __i < __n_free; __i++) {                 \
627 	        __tmp = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(__tmp->next); \
628 	}                                                                       \
629 	assert(__tmp == __last_next);                                           \
630 MACRO_END
631 
632 #define __DEBUG_STAT_INCREMENT_INRANGE              vm_clump_inrange++
633 #define __DEBUG_STAT_INCREMENT_INSERTS              vm_clump_inserts++
634 #define __DEBUG_STAT_INCREMENT_PROMOTES(__n_free)   vm_clump_promotes+=__n_free
635 
636 #else
637 
638 #define __DEBUG_CHECK_BUDDIES(__prev, __p, field)
639 #define __DEBUG_VERIFY_LINKS(__first, __n_free, __last_next)
640 #define __DEBUG_STAT_INCREMENT_INRANGE
641 #define __DEBUG_STAT_INCREMENT_INSERTS
642 #define __DEBUG_STAT_INCREMENT_PROMOTES(__n_free)
643 
644 #endif  /* if DEVELOPMENT || DEBUG */
645 
646 #endif
647 
648 /*
649  * Macro: vm_page_queue_enter_first
650  * Function:
651  *     Insert a new element at the head of the vm_page queue.
652  * Header:
653  *     void queue_enter_first(q, elt, , field)
654  *         queue_t q;
655  *         vm_page_t elt;
656  *         <field> is the linkage field in vm_page
657  *
658  * This macro's arguments have to match the generic "queue_enter_first()" macro which is
659  * what is used for this on 32 bit kernels.
660  */
661 #define vm_page_queue_enter_first(head, elt, field)                 \
662 MACRO_BEGIN                                                         \
663 	vm_page_packed_t __pck_next = (head)->next;                 \
664 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);       \
665 	vm_page_packed_t __pck_elt = VM_PAGE_PACK_PTR(elt);         \
666                                                                     \
667 	if (__pck_head == __pck_next) {                             \
668 	        (head)->prev = __pck_elt;                           \
669 	} else {                                                    \
670 	        vm_page_t __next;                                   \
671 	        __next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next); \
672 	        __next->field.prev = __pck_elt;                     \
673 	}                                                           \
674                                                                     \
675 	(elt)->field.next = __pck_next;                             \
676 	(elt)->field.prev = __pck_head;                             \
677 	(head)->next = __pck_elt;                                   \
678 MACRO_END
679 
680 
681 /*
682  * Macro:	vm_page_queue_remove
683  * Function:
684  *     Remove an arbitrary page from a vm_page queue.
685  * Header:
686  *     void vm_page_queue_remove(q, qe, field)
687  *         arguments as in vm_page_queue_enter
688  *
689  * This macro's arguments have to match the generic "queue_enter()" macro which is
690  * what is used for this on 32 bit kernels.
691  */
692 #define vm_page_queue_remove(head, elt, field)                          \
693 MACRO_BEGIN                                                             \
694 	vm_page_packed_t __pck_next = (elt)->field.next;                \
695 	vm_page_packed_t __pck_prev = (elt)->field.prev;                \
696 	vm_page_t        __next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next); \
697 	vm_page_t        __prev = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_prev); \
698                                                                         \
699 	if ((void *)(head) == (void *)__next) {                         \
700 	        (head)->prev = __pck_prev;                              \
701 	} else {                                                        \
702 	        __next->field.prev = __pck_prev;                        \
703 	}                                                               \
704                                                                         \
705 	if ((void *)(head) == (void *)__prev) {                         \
706 	        (head)->next = __pck_next;                              \
707 	} else {                                                        \
708 	        __prev->field.next = __pck_next;                        \
709 	}                                                               \
710                                                                         \
711 	(elt)->field.next = 0;                                          \
712 	(elt)->field.prev = 0;                                          \
713 MACRO_END
714 
715 
716 /*
717  * Macro: vm_page_queue_remove_first
718  *
719  * Function:
720  *     Remove and return the entry at the head of a vm_page queue.
721  *
722  * Header:
723  *     vm_page_queue_remove_first(head, entry, field)
724  *     N.B. entry is returned by reference
725  *
726  * This macro's arguments have to match the generic "queue_remove_first()" macro which is
727  * what is used for this on 32 bit kernels.
728  */
729 #define vm_page_queue_remove_first(head, entry, field)            \
730 MACRO_BEGIN                                                       \
731 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);     \
732 	vm_page_packed_t __pck_next;                              \
733 	vm_page_t        __next;                                  \
734                                                                   \
735 	(entry) = (vm_page_t)VM_PAGE_UNPACK_PTR((head)->next);    \
736 	__pck_next = (entry)->field.next;                         \
737 	__next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next);       \
738                                                                   \
739 	if (__pck_head == __pck_next) {                           \
740 	        (head)->prev = __pck_head;                        \
741 	} else {                                                  \
742 	        __next->field.prev = __pck_head;                  \
743 	}                                                         \
744                                                                   \
745 	(head)->next = __pck_next;                                \
746 	(entry)->field.next = 0;                                  \
747 	(entry)->field.prev = 0;                                  \
748 MACRO_END
749 
750 
751 #if defined(__x86_64__)
752 /*
753  * Macro:  vm_page_queue_remove_first_with_clump
754  * Function:
755  *     Remove and return the entry at the head of the free queue
756  *     end is set to 1 to indicate that we just returned the last page in a clump
757  *
758  * Header:
759  *     vm_page_queue_remove_first_with_clump(head, entry, end)
760  *     entry is returned by reference
761  *     end is returned by reference
762  */
763 #define vm_page_queue_remove_first_with_clump(head, entry, end)              \
764 MACRO_BEGIN                                                                  \
765 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);                \
766 	vm_page_packed_t __pck_next;                                         \
767 	vm_page_t        __next;                                             \
768                                                                              \
769 	(entry) = (vm_page_t)VM_PAGE_UNPACK_PTR((head)->next);               \
770 	__pck_next = (entry)->vmp_pageq.next;                                \
771 	__next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next);                  \
772                                                                              \
773 	(end) = 0;                                                           \
774 	if (__pck_head == __pck_next) {                                      \
775 	        (head)->prev = __pck_head;                                   \
776 	        (end) = 1;                                                   \
777 	} else {                                                             \
778 	        __next->vmp_pageq.prev = __pck_head;                         \
779 	        if (VM_PAGE_GET_CLUMP(entry) != VM_PAGE_GET_CLUMP(__next)) { \
780 	                (end) = 1;                                           \
781 	        }                                                            \
782 	}                                                                    \
783                                                                              \
784 	(head)->next = __pck_next;                                           \
785 	(entry)->vmp_pageq.next = 0;                                         \
786 	(entry)->vmp_pageq.prev = 0;                                         \
787 MACRO_END
788 #endif
789 
790 /*
791  *	Macro:	vm_page_queue_end
792  *	Function:
793  *	Tests whether a new entry is really the end of
794  *		the queue.
795  *	Header:
796  *		boolean_t vm_page_queue_end(q, qe)
797  *			vm_page_queue_t q;
798  *			vm_page_queue_entry_t qe;
799  */
800 #define vm_page_queue_end(q, qe)        ((q) == (qe))
801 
802 
803 /*
804  *	Macro:	vm_page_queue_empty
805  *	Function:
806  *		Tests whether a queue is empty.
807  *	Header:
808  *		boolean_t vm_page_queue_empty(q)
809  *			vm_page_queue_t q;
810  */
811 #define vm_page_queue_empty(q)          vm_page_queue_end((q), ((vm_page_queue_entry_t)vm_page_queue_first(q)))
812 
813 
814 
815 /*
816  *	Macro:	vm_page_queue_first
817  *	Function:
818  *		Returns the first entry in the queue,
819  *	Header:
820  *		uintpr_t vm_page_queue_first(q)
821  *			vm_page_queue_t q;	\* IN *\
822  */
823 #define vm_page_queue_first(q)          (VM_PAGE_UNPACK_PTR((q)->next))
824 
825 
826 
827 /*
828  *	Macro:		vm_page_queue_last
829  *	Function:
830  *		Returns the last entry in the queue.
831  *	Header:
832  *		vm_page_queue_entry_t queue_last(q)
833  *			queue_t	q;		\* IN *\
834  */
835 #define vm_page_queue_last(q)           (VM_PAGE_UNPACK_PTR((q)->prev))
836 
837 
838 
839 /*
840  *	Macro:	vm_page_queue_next
841  *	Function:
842  *		Returns the entry after an item in the queue.
843  *	Header:
844  *		uintpr_t vm_page_queue_next(qc)
845  *			vm_page_queue_t qc;
846  */
847 #define vm_page_queue_next(qc)          (VM_PAGE_UNPACK_PTR((qc)->next))
848 
849 
850 
851 /*
852  *	Macro:	vm_page_queue_prev
853  *	Function:
854  *		Returns the entry before an item in the queue.
855  *	Header:
856  *		uinptr_t vm_page_queue_prev(qc)
857  *			vm_page_queue_t qc;
858  */
859 #define vm_page_queue_prev(qc)          (VM_PAGE_UNPACK_PTR((qc)->prev))
860 
861 
862 
863 /*
864  *	Macro:	vm_page_queue_iterate
865  *	Function:
866  *		iterate over each item in a vm_page queue.
867  *		Generates a 'for' loop, setting elt to
868  *		each item in turn (by reference).
869  *	Header:
870  *		vm_page_queue_iterate(q, elt, field)
871  *			queue_t q;
872  *			vm_page_t elt;
873  *			<field> is the chain field in vm_page_t
874  */
875 #define vm_page_queue_iterate(head, elt, field)                       \
876 	for ((elt) = (vm_page_t)vm_page_queue_first(head);            \
877 	    !vm_page_queue_end((head), (vm_page_queue_entry_t)(elt)); \
878 	    (elt) = (vm_page_t)vm_page_queue_next(&(elt)->field))     \
879 
880 
881 /*
882  * VM_PAGE_MIN_SPECULATIVE_AGE_Q through vm_page_max_speculative_age_q
883  * represents a set of aging bins that are 'protected'...
884  *
885  * VM_PAGE_SPECULATIVE_AGED_Q is a list of the speculative pages that have
886  * not yet been 'claimed' but have been aged out of the protective bins
887  * this occurs in vm_page_speculate when it advances to the next bin
888  * and discovers that it is still occupied... at that point, all of the
889  * pages in that bin are moved to the VM_PAGE_SPECULATIVE_AGED_Q.  the pages
890  * in that bin are all guaranteed to have reached at least the maximum age
891  * we allow for a protected page... they can be older if there is no
892  * memory pressure to pull them from the bin, or there are no new speculative pages
893  * being generated to push them out.
894  * this list is the one that vm_pageout_scan will prefer when looking
895  * for pages to move to the underweight free list
896  *
897  * vm_page_max_speculative_age_q * VM_PAGE_SPECULATIVE_Q_AGE_MS
898  * defines the amount of time a speculative page is normally
899  * allowed to live in the 'protected' state (i.e. not available
900  * to be stolen if vm_pageout_scan is running and looking for
901  * pages)...  however, if the total number of speculative pages
902  * in the protected state exceeds our limit (defined in vm_pageout.c)
903  * and there are none available in VM_PAGE_SPECULATIVE_AGED_Q, then
904  * vm_pageout_scan is allowed to steal pages from the protected
905  * bucket even if they are underage.
906  *
907  * vm_pageout_scan is also allowed to pull pages from a protected
908  * bin if the bin has reached the "age of consent" we've set
909  */
910 #define VM_PAGE_RESERVED_SPECULATIVE_AGE_Q      40
911 #define VM_PAGE_DEFAULT_MAX_SPECULATIVE_AGE_Q   10
912 #define VM_PAGE_MIN_SPECULATIVE_AGE_Q   1
913 #define VM_PAGE_SPECULATIVE_AGED_Q      0
914 
915 #define VM_PAGE_SPECULATIVE_Q_AGE_MS    500
916 
917 struct vm_speculative_age_q {
918 	/*
919 	 * memory queue for speculative pages via clustered pageins
920 	 */
921 	vm_page_queue_head_t    age_q;
922 	mach_timespec_t age_ts;
923 } VM_PAGE_PACKED_ALIGNED;
924 
925 
926 
927 extern
928 struct vm_speculative_age_q     vm_page_queue_speculative[];
929 
930 extern int                      speculative_steal_index;
931 extern int                      speculative_age_index;
932 extern unsigned int             vm_page_speculative_q_age_ms;
933 extern unsigned int             vm_page_max_speculative_age_q;
934 
935 
936 typedef struct vm_locks_array {
937 	char    pad  __attribute__ ((aligned(64)));
938 	lck_mtx_t       vm_page_queue_lock2 __attribute__ ((aligned(64)));
939 	lck_mtx_t       vm_page_queue_free_lock2 __attribute__ ((aligned(64)));
940 	char    pad2  __attribute__ ((aligned(64)));
941 } vm_locks_array_t;
942 
943 
944 #define VM_PAGE_WIRED(m)        ((m)->vmp_q_state == VM_PAGE_IS_WIRED)
945 #define NEXT_PAGE(m)            ((m)->vmp_snext)
946 #define NEXT_PAGE_PTR(m)        (&(m)->vmp_snext)
947 
948 /*!
949  * @abstract
950  * Represents a singly linked list of pages with a count.
951  *
952  * @discussion
953  * This type is used as a way to exchange transient collections of VM pages
954  * by various subsystems.
955  *
956  * This type is designed to be less than sizeof(_Complex) which means
957  * it that can be passed by value efficiently (either as a function argument
958  * or its result).
959  *
960  *
961  * @field vmpl_head
962  * The head of the list, or VM_PAGE_NULL.
963  *
964  * @field vmpl_count
965  * How many pages are on that list.
966  *
967  * @field vmpl_has_realtime
968  * At least one page on the list has vmp_realtime set.
969  */
970 typedef struct {
971 	vm_page_t vmpl_head;
972 	uint32_t  vmpl_count;
973 	bool      vmpl_has_realtime;
974 } vm_page_list_t;
975 
976 
977 /*!
978  * @abstract
979  * Low level function that pushes a page on a naked singly linked list of VM
980  * pages.
981  *
982  * @param head          The list head.
983  * @param mem           The page to push on the list.
984  */
985 static inline void
_vm_page_list_push(vm_page_t * head,vm_page_t mem)986 _vm_page_list_push(vm_page_t *head, vm_page_t mem)
987 {
988 	NEXT_PAGE(mem) = *head;
989 	*head = mem;
990 }
991 
992 /*!
993  * @abstract
994  * Pushes a page onto a VM page list, adjusting its properties.
995  *
996  * @param list          The VM page list to push onto
997  * @param mem           The page to push on the list.
998  */
999 static inline void
vm_page_list_push(vm_page_list_t * list,vm_page_t mem)1000 vm_page_list_push(vm_page_list_t *list, vm_page_t mem)
1001 {
1002 	_vm_page_list_push(&list->vmpl_head, mem);
1003 	list->vmpl_count++;
1004 	if (mem->vmp_realtime) {
1005 		list->vmpl_has_realtime = true;
1006 	}
1007 }
1008 
1009 /*!
1010  * @abstract
1011  * Conveniency function that creates a VM page list from a single page.
1012  *
1013  * @param mem           The VM page to put on the list.
1014  */
1015 static inline vm_page_list_t
vm_page_list_for_page(vm_page_t mem)1016 vm_page_list_for_page(vm_page_t mem)
1017 {
1018 	assert(NEXT_PAGE(mem) == VM_PAGE_NULL);
1019 	return (vm_page_list_t){
1020 		       .vmpl_head  = mem,
1021 		       .vmpl_count = 1,
1022 		       .vmpl_has_realtime = mem->vmp_realtime,
1023 	};
1024 }
1025 
1026 /*!
1027  * @abstract
1028  * Low level function that pops a page from a naked singly linked list of VM
1029  * pages.
1030  *
1031  * @param head          The list head.
1032  *
1033  * @returns             The first page that was on the list
1034  *                      or VM_PAGE_NULL if it was empty.
1035  */
1036 static inline vm_page_t
_vm_page_list_pop(vm_page_t * head)1037 _vm_page_list_pop(vm_page_t *head)
1038 {
1039 	vm_page_t mem = *head;
1040 
1041 	if (mem) {
1042 		*head = NEXT_PAGE(mem);
1043 		VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
1044 	}
1045 
1046 	return mem;
1047 }
1048 
1049 /*!
1050  * @abstract
1051  * Pops a page from a VM page list, adjusting its properties.
1052  *
1053  * @param list          The VM page list to pop from.
1054  *
1055  * @returns             The first page that was on the list
1056  *                      or VM_PAGE_NULL if it was empty.
1057  */
1058 static inline vm_page_t
vm_page_list_pop(vm_page_list_t * list)1059 vm_page_list_pop(vm_page_list_t *list)
1060 {
1061 	if (list->vmpl_head) {
1062 		list->vmpl_count--;
1063 		return _vm_page_list_pop(&list->vmpl_head);
1064 	}
1065 	*list = (vm_page_list_t){ };
1066 	return VM_PAGE_NULL;
1067 }
1068 
1069 
1070 /*!
1071  * @abstract
1072  * Reverses a list of VM pages in place.
1073  *
1074  * @param list          The VM page list to reverse.
1075  */
1076 static inline void
vm_page_list_reverse(vm_page_list_t * list)1077 vm_page_list_reverse(vm_page_list_t *list)
1078 {
1079 	vm_page_t cur, next;
1080 
1081 	cur = list->vmpl_head;
1082 	list->vmpl_head = NULL;
1083 
1084 	while (cur) {
1085 		next = NEXT_PAGE(cur);
1086 		_vm_page_list_push(&list->vmpl_head, cur);
1087 		cur = next;
1088 	}
1089 }
1090 
1091 
1092 /*!
1093  * @abstract
1094  * Low level iterator over all pages on a naked singly linked list
1095  * of VM pages.
1096  *
1097  * @discussion
1098  * Mutating the list during enumeration is undefined.
1099  *
1100  * @param mem           The variable to use for iteration.
1101  * @param head          The list head.
1102  */
1103 #define _vm_page_list_foreach(mem, list) \
1104 	for ((mem) = (list); (mem); (mem) = NEXT_PAGE(mem))
1105 
1106 
1107 /*!
1108  * @abstract
1109  * Iterator over a VM page list.
1110  *
1111  * @discussion
1112  * Mutating the list during enumeration is undefined.
1113  *
1114  * @param mem           The variable to use for iteration.
1115  * @param head          The list head.
1116  */
1117 #define vm_page_list_foreach(mem, list) \
1118 	_vm_page_list_foreach(mem, (list).vmpl_head)
1119 
1120 
1121 /*!
1122  * @abstract
1123  * Low level iterator over all pages on a naked singly linked list
1124  * of VM pages, that also consumes the list as it iterates.
1125  *
1126  * @discussion
1127  * Each element is removed from the list as it is being iterated.
1128  *
1129  * @param mem           The variable to use for iteration.
1130  * @param head          The list head.
1131  */
1132 #define _vm_page_list_foreach_consume(mem, list) \
1133 	while (((mem) = _vm_page_list_pop((list))))
1134 
1135 /*!
1136  * @abstract
1137  * Iterator over a VM page list, that consumes the list.
1138  *
1139  * @discussion
1140  * Each element is removed from the list as it is being iterated.
1141  *
1142  * @param mem           The variable to use for iteration.
1143  * @param head          The list head.
1144  */
1145 #define vm_page_list_foreach_consume(mem, list) \
1146 	while (((mem) = vm_page_list_pop((list))))
1147 
1148 
1149 /*
1150  * XXX	The unusual bit should not be necessary.  Most of the bit
1151  * XXX	fields above really want to be masks.
1152  */
1153 
1154 /*
1155  *	For debugging, this macro can be defined to perform
1156  *	some useful check on a page structure.
1157  *	INTENTIONALLY left as a no-op so that the
1158  *	current call-sites can be left intact for future uses.
1159  */
1160 
1161 #define VM_PAGE_CHECK(mem)                      \
1162 	MACRO_BEGIN                             \
1163 	MACRO_END
1164 
1165 /*     Page coloring:
1166  *
1167  *     The free page list is actually n lists, one per color,
1168  *     where the number of colors is a function of the machine's
1169  *     cache geometry set at system initialization.  To disable
1170  *     coloring, set vm_colors to 1 and vm_color_mask to 0.
1171  *     The boot-arg "colors" may be used to override vm_colors.
1172  *     Note that there is little harm in having more colors than needed.
1173  */
1174 
1175 #define MAX_COLORS      128
1176 #define DEFAULT_COLORS  32
1177 
1178 /*
1179  * Page free queue type.  Abstracts the notion of a free queue of pages, that
1180  * contains free pages of a particular memory class, and maintains a count of
1181  * the number of pages in the free queue.
1182  *
1183  * Pages in the queue will be marked VM_PAGE_ON_FREE_Q when they are added to
1184  * the free queue, and VM_PAGE_NOT_ON_Q when they are removed.
1185  *
1186  * These free queues will color pages, consistent with MachVMs color mask.
1187  */
1188 typedef struct vm_page_free_queue {
1189 	struct vm_page_queue_free_head vmpfq_queues[MAX_COLORS];
1190 	uint32_t                       vmpfq_count;
1191 } *vm_page_free_queue_t;
1192 
1193 extern unsigned int    vm_colors;              /* must be in range 1..MAX_COLORS */
1194 extern unsigned int    vm_color_mask;          /* must be (vm_colors-1) */
1195 extern unsigned int    vm_cache_geometry_colors; /* optimal #colors based on cache geometry */
1196 extern unsigned int    vm_free_magazine_refill_limit;
1197 
1198 /*
1199  * Wired memory is a very limited resource and we can't let users exhaust it
1200  * and deadlock the entire system.  We enforce the following limits:
1201  *
1202  * vm_per_task_user_wire_limit
1203  *      how much memory can be user-wired in one user task
1204  *
1205  * vm_global_user_wire_limit (default: same as vm_per_task_user_wire_limit)
1206  *      how much memory can be user-wired in all user tasks
1207  *
1208  * These values are set to defaults based on the number of pages managed
1209  * by the VM system. They can be overriden via sysctls.
1210  * See kmem_set_user_wire_limits for details on the default values.
1211  *
1212  * Regardless of the amount of memory in the system, we never reserve
1213  * more than VM_NOT_USER_WIREABLE_MAX bytes as unlockable.
1214  */
1215 #define VM_NOT_USER_WIREABLE_MAX (32ULL*1024*1024*1024)     /* 32GB */
1216 
1217 extern vm_map_size_t   vm_per_task_user_wire_limit;
1218 extern vm_map_size_t   vm_global_user_wire_limit;
1219 extern uint64_t        vm_add_wire_count_over_global_limit;
1220 extern uint64_t        vm_add_wire_count_over_user_limit;
1221 
1222 /*
1223  *	Each pageable resident page falls into one of three lists:
1224  *
1225  *	free
1226  *		Available for allocation now.  The free list is
1227  *		actually an array of lists, one per color.
1228  *	inactive
1229  *		Not referenced in any map, but still has an
1230  *		object/offset-page mapping, and may be dirty.
1231  *		This is the list of pages that should be
1232  *		paged out next.  There are actually two
1233  *		inactive lists, one for pages brought in from
1234  *		disk or other backing store, and another
1235  *		for "zero-filled" pages.  See vm_pageout_scan()
1236  *		for the distinction and usage.
1237  *	active
1238  *		A list of pages which have been placed in
1239  *		at least one physical map.  This list is
1240  *		ordered, in LRU-like fashion.
1241  */
1242 
1243 
1244 #define VPL_LOCK_SPIN 1
1245 
1246 struct vpl {
1247 	vm_page_queue_head_t    vpl_queue;
1248 	unsigned int    vpl_count;
1249 	unsigned int    vpl_internal_count;
1250 	unsigned int    vpl_external_count;
1251 	lck_spin_t      vpl_lock;
1252 };
1253 
1254 extern
1255 struct vpl     * /* __zpercpu */ vm_page_local_q;
1256 extern
1257 unsigned int    vm_page_local_q_soft_limit;
1258 extern
1259 unsigned int    vm_page_local_q_hard_limit;
1260 extern
1261 vm_locks_array_t vm_page_locks;
1262 
1263 extern
1264 vm_page_queue_head_t    vm_page_queue_active;   /* active memory queue */
1265 extern
1266 vm_page_queue_head_t    vm_page_queue_inactive; /* inactive memory queue for normal pages */
1267 #if CONFIG_SECLUDED_MEMORY
1268 extern
1269 vm_page_queue_head_t    vm_page_queue_secluded; /* reclaimable pages secluded for Camera */
1270 #endif /* CONFIG_SECLUDED_MEMORY */
1271 extern
1272 vm_page_queue_head_t    vm_page_queue_cleaned; /* clean-queue inactive memory */
1273 extern
1274 vm_page_queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
1275 extern
1276 vm_page_queue_head_t    vm_page_queue_throttled;        /* memory queue for throttled pageout pages */
1277 
1278 extern
1279 queue_head_t    vm_objects_wired;
1280 extern
1281 lck_spin_t      vm_objects_wired_lock;
1282 
1283 #define VM_PAGE_DONATE_DISABLED     0
1284 #define VM_PAGE_DONATE_ENABLED      1
1285 extern
1286 uint32_t        vm_page_donate_mode;
1287 extern
1288 bool        vm_page_donate_queue_ripe;
1289 
1290 #define VM_PAGE_BACKGROUND_TARGET_MAX   50000
1291 #define VM_PAGE_BG_DISABLED     0
1292 #define VM_PAGE_BG_ENABLED     1
1293 
1294 extern
1295 vm_page_queue_head_t    vm_page_queue_background;
1296 extern
1297 uint64_t        vm_page_background_promoted_count;
1298 extern
1299 uint32_t        vm_page_background_count;
1300 extern
1301 uint32_t        vm_page_background_target;
1302 extern
1303 uint32_t        vm_page_background_internal_count;
1304 extern
1305 uint32_t        vm_page_background_external_count;
1306 extern
1307 uint32_t        vm_page_background_mode;
1308 extern
1309 uint32_t        vm_page_background_exclude_external;
1310 
1311 extern
1312 vm_page_queue_head_t    vm_page_queue_donate;
1313 extern
1314 uint32_t        vm_page_donate_count;
1315 extern
1316 uint32_t        vm_page_donate_target_low;
1317 extern
1318 uint32_t        vm_page_donate_target_high;
1319 #define VM_PAGE_DONATE_TARGET_LOWWATER  (100)
1320 #define VM_PAGE_DONATE_TARGET_HIGHWATER ((unsigned int)(atop_64(max_mem) / 8))
1321 
1322 extern
1323 vm_offset_t     first_phys_addr;        /* physical address for first_page */
1324 extern
1325 vm_offset_t     last_phys_addr;         /* physical address for last_page */
1326 
1327 extern
1328 unsigned int    vm_page_free_count;     /* How many pages are free? (sum of all colors) */
1329 extern
1330 unsigned int    vm_page_active_count;   /* How many pages are active? */
1331 extern
1332 unsigned int    vm_page_inactive_count; /* How many pages are inactive? */
1333 extern
1334 unsigned int vm_page_kernelcache_count; /* How many pages are used for the kernelcache? */
1335 extern
1336 unsigned int vm_page_realtime_count;    /* How many pages are used by realtime threads? */
1337 #if CONFIG_SECLUDED_MEMORY
1338 extern
1339 unsigned int    vm_page_secluded_count; /* How many pages are secluded? */
1340 extern
1341 unsigned int    vm_page_secluded_count_free; /* how many of them are free? */
1342 extern
1343 unsigned int    vm_page_secluded_count_inuse; /* how many of them are in use? */
1344 /*
1345  * We keep filling the secluded pool with new eligible pages and
1346  * we can overshoot our target by a lot.
1347  * When there's memory pressure, vm_pageout_scan() will re-balance the queues,
1348  * pushing the extra secluded pages to the active or free queue.
1349  * Since these "over target" secluded pages are actually "available", jetsam
1350  * should consider them as such, so make them visible to jetsam via the
1351  * "vm_page_secluded_count_over_target" counter and update it whenever we
1352  * update vm_page_secluded_count or vm_page_secluded_target.
1353  */
1354 extern
1355 unsigned int    vm_page_secluded_count_over_target;
1356 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE()                     \
1357 	MACRO_BEGIN                                                     \
1358 	if (vm_page_secluded_count > vm_page_secluded_target) {         \
1359 	        vm_page_secluded_count_over_target =                    \
1360 	                (vm_page_secluded_count - vm_page_secluded_target); \
1361 	} else {                                                        \
1362 	        vm_page_secluded_count_over_target = 0;                 \
1363 	}                                                               \
1364 	MACRO_END
1365 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET() vm_page_secluded_count_over_target
1366 #else /* CONFIG_SECLUDED_MEMORY */
1367 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE() \
1368 	MACRO_BEGIN                                 \
1369 	MACRO_END
1370 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET() 0
1371 #endif /* CONFIG_SECLUDED_MEMORY */
1372 extern
1373 unsigned int    vm_page_cleaned_count; /* How many pages are in the clean queue? */
1374 extern
1375 unsigned int    vm_page_throttled_count;/* How many inactives are throttled */
1376 extern
1377 unsigned int    vm_page_speculative_count;      /* How many speculative pages are unclaimed? */
1378 extern unsigned int     vm_page_pageable_internal_count;
1379 extern unsigned int     vm_page_pageable_external_count;
1380 extern
1381 unsigned int    vm_page_xpmapped_external_count;        /* How many pages are mapped executable? */
1382 extern
1383 unsigned int    vm_page_external_count; /* How many pages are file-backed? */
1384 extern
1385 unsigned int    vm_page_internal_count; /* How many pages are anonymous? */
1386 extern
1387 unsigned int    vm_page_wire_count;             /* How many pages are wired? */
1388 extern
1389 unsigned int    vm_page_wire_count_initial;     /* How many pages wired at startup */
1390 extern
1391 unsigned int    vm_page_wire_count_on_boot;     /* even earlier than _initial */
1392 extern
1393 unsigned int    vm_page_free_target;    /* How many do we want free? */
1394 extern
1395 unsigned int    vm_page_free_min;       /* When to wakeup pageout */
1396 extern
1397 unsigned int    vm_page_throttle_limit; /* When to throttle new page creation */
1398 extern
1399 unsigned int    vm_page_inactive_target;/* How many do we want inactive? */
1400 #if CONFIG_SECLUDED_MEMORY
1401 extern
1402 unsigned int    vm_page_secluded_target;/* How many do we want secluded? */
1403 #endif /* CONFIG_SECLUDED_MEMORY */
1404 extern
1405 unsigned int    vm_page_anonymous_min;  /* When it's ok to pre-clean */
1406 extern
1407 unsigned int    vm_page_free_reserved;  /* How many pages reserved to do pageout */
1408 extern
1409 unsigned int    vm_page_gobble_count;
1410 extern
1411 unsigned int    vm_page_stolen_count;   /* Count of stolen pages not acccounted in zones */
1412 extern
1413 unsigned int    vm_page_kern_lpage_count;   /* Count of large pages used in early boot */
1414 
1415 
1416 #if DEVELOPMENT || DEBUG
1417 extern
1418 unsigned int    vm_page_speculative_used;
1419 #endif
1420 
1421 extern
1422 unsigned int    vm_page_purgeable_count;/* How many pages are purgeable now ? */
1423 extern
1424 unsigned int    vm_page_purgeable_wired_count;/* How many purgeable pages are wired now ? */
1425 extern
1426 uint64_t        vm_page_purged_count;   /* How many pages got purged so far ? */
1427 
1428 extern unsigned int     vm_page_free_wanted;
1429 /* how many threads are waiting for memory */
1430 
1431 extern unsigned int     vm_page_free_wanted_privileged;
1432 /* how many VM privileged threads are waiting for memory */
1433 #if CONFIG_SECLUDED_MEMORY
1434 extern unsigned int     vm_page_free_wanted_secluded;
1435 /* how many threads are waiting for secluded memory */
1436 #endif /* CONFIG_SECLUDED_MEMORY */
1437 
1438 extern const ppnum_t    vm_page_fictitious_addr;
1439 /* (fake) phys_addr of fictitious pages */
1440 
1441 extern const ppnum_t    vm_page_guard_addr;
1442 /* (fake) phys_addr of guard pages */
1443 
1444 
1445 extern boolean_t        vm_page_deactivate_hint;
1446 
1447 extern int              vm_compressor_mode;
1448 
1449 #if __x86_64__
1450 /*
1451  * Defaults to true, so highest memory is used first.
1452  */
1453 extern boolean_t        vm_himemory_mode;
1454 #else
1455 #define vm_himemory_mode TRUE
1456 #endif
1457 
1458 #if XNU_VM_HAS_LOPAGE
1459 extern bool             vm_lopage_needed;
1460 extern bool             vm_lopage_refill;
1461 extern uint32_t         vm_lopage_free_count;
1462 extern uint32_t         vm_lopage_free_limit;
1463 extern uint32_t         vm_lopage_lowater;
1464 #else
1465 #define vm_lopage_needed        0
1466 #define vm_lopage_free_count    0
1467 #endif
1468 extern uint64_t         max_valid_dma_address;
1469 extern ppnum_t          max_valid_low_ppnum;
1470 
1471 /*!
1472  * @abstract
1473  * Options that alter the behavior of vm_page_grab_options().
1474  *
1475  * @const VM_PAGE_GRAB_OPTIONS_NONE
1476  * The default value when no other specific options are required.
1477  *
1478  * @const VM_PAGE_GRAB_Q_LOCK_HELD
1479  * Denotes the caller is holding the vm page queues lock held.
1480  *
1481  * @const VM_PAGE_GRAB_NOPAGEWAIT
1482  * Denotes that the caller never wants @c vm_page_grab_options() to call
1483  * @c VM_PAGE_WAIT(), even if the thread is privileged.
1484  *
1485  * @const VM_PAGE_GRAB_SECLUDED
1486  * The caller is eligible to the secluded pool.
1487  */
1488 __enum_decl(vm_grab_options_t, uint32_t, {
1489 	VM_PAGE_GRAB_OPTIONS_NONE               = 0x00000000,
1490 	VM_PAGE_GRAB_Q_LOCK_HELD                = 0x00000001,
1491 	VM_PAGE_GRAB_NOPAGEWAIT                 = 0x00000002,
1492 
1493 	/* architecture/platform-specific flags */
1494 #if CONFIG_SECLUDED_MEMORY
1495 	VM_PAGE_GRAB_SECLUDED                   = 0x00010000,
1496 #endif /* CONFIG_SECLUDED_MEMORY */
1497 });
1498 
1499 /*
1500  * Prototypes for functions exported by this module.
1501  */
1502 
1503 extern void             vm_page_init_local_q(unsigned int num_cpus);
1504 
1505 extern void             vm_page_create_canonical(ppnum_t pnum);
1506 
1507 extern void             vm_page_create_retired(ppnum_t pn);
1508 
1509 #if XNU_VM_HAS_DELAYED_PAGES
1510 extern void             vm_free_delayed_pages(void);
1511 #endif /* XNU_VM_HAS_DELAYED_PAGES */
1512 
1513 extern void             vm_pages_array_finalize(void);
1514 
1515 extern void             vm_page_reactivate_all_throttled(void);
1516 
1517 extern void vm_pressure_response(void);
1518 
1519 #define AVAILABLE_NON_COMPRESSED_MEMORY         (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count)
1520 #define AVAILABLE_MEMORY                        (AVAILABLE_NON_COMPRESSED_MEMORY + VM_PAGE_COMPRESSOR_COUNT)
1521 
1522 #if CONFIG_JETSAM
1523 
1524 #define VM_CHECK_MEMORYSTATUS \
1525 	memorystatus_update_available_page_count( \
1526 	        vm_page_pageable_external_count + \
1527 	        vm_page_free_count +              \
1528 	        VM_PAGE_SECLUDED_COUNT_OVER_TARGET() + \
1529 	        (VM_DYNAMIC_PAGING_ENABLED() ? 0 : vm_page_purgeable_count) \
1530 	        )
1531 
1532 #else /* CONFIG_JETSAM */
1533 
1534 #if !XNU_TARGET_OS_OSX
1535 
1536 #define VM_CHECK_MEMORYSTATUS do {} while(0)
1537 
1538 #else /* !XNU_TARGET_OS_OSX */
1539 
1540 #define VM_CHECK_MEMORYSTATUS memorystatus_update_available_page_count(AVAILABLE_NON_COMPRESSED_MEMORY)
1541 
1542 #endif /* !XNU_TARGET_OS_OSX */
1543 
1544 #endif /* CONFIG_JETSAM */
1545 
1546 #define vm_page_queue_lock (vm_page_locks.vm_page_queue_lock2)
1547 #define vm_page_queue_free_lock (vm_page_locks.vm_page_queue_free_lock2)
1548 
1549 #ifdef MACH_KERNEL_PRIVATE
1550 static inline void
vm_page_lock_queues(void)1551 vm_page_lock_queues(void)
1552 {
1553 	lck_mtx_lock(&vm_page_queue_lock);
1554 }
1555 
1556 static inline boolean_t
vm_page_trylock_queues(void)1557 vm_page_trylock_queues(void)
1558 {
1559 	boolean_t ret;
1560 	ret = lck_mtx_try_lock(&vm_page_queue_lock);
1561 	return ret;
1562 }
1563 
1564 static inline void
vm_page_unlock_queues(void)1565 vm_page_unlock_queues(void)
1566 {
1567 	lck_mtx_unlock(&vm_page_queue_lock);
1568 }
1569 
1570 static inline void
vm_page_lockspin_queues(void)1571 vm_page_lockspin_queues(void)
1572 {
1573 	lck_mtx_lock_spin(&vm_page_queue_lock);
1574 }
1575 
1576 static inline boolean_t
vm_page_trylockspin_queues(void)1577 vm_page_trylockspin_queues(void)
1578 {
1579 	boolean_t ret;
1580 	ret = lck_mtx_try_lock_spin(&vm_page_queue_lock);
1581 	return ret;
1582 }
1583 
1584 extern void kdp_vm_page_sleep_find_owner(
1585 	event64_t          wait_event,
1586 	thread_waitinfo_t *waitinfo);
1587 
1588 #endif /* MACH_KERNEL_PRIVATE */
1589 
1590 extern unsigned int vm_max_delayed_work_limit;
1591 
1592 #if CONFIG_SECLUDED_MEMORY
1593 extern uint64_t secluded_shutoff_trigger;
1594 extern uint64_t secluded_shutoff_headroom;
1595 extern void start_secluded_suppression(task_t);
1596 extern void stop_secluded_suppression(task_t);
1597 #endif /* CONFIG_SECLUDED_MEMORY */
1598 
1599 #endif  /* _VM_VM_PAGE_H_ */
1600