xref: /xnu-12377.1.9/osfmk/vm/vm_page.h (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2020 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_page.h
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Resident memory system definitions.
64  */
65 
66 #ifndef _VM_VM_PAGE_H_
67 #define _VM_VM_PAGE_H_
68 
69 #include <debug.h>
70 #include <stdbool.h>
71 #include <vm/vm_options.h>
72 #include <vm/vm_protos.h>
73 #include <vm/vm_far.h>
74 #include <mach/boolean.h>
75 #include <mach/vm_prot.h>
76 #include <mach/vm_param.h>
77 #include <mach/memory_object_types.h> /* for VMP_CS_BITS... */
78 #include <kern/thread.h>
79 #include <kern/queue.h>
80 #include <kern/locks.h>
81 #include <sys/kern_memorystatus_xnu.h>
82 
83 #if __x86_64__
84 #define XNU_VM_HAS_DELAYED_PAGES        1
85 #define XNU_VM_HAS_LOPAGE               1
86 #define XNU_VM_HAS_LINEAR_PAGES_ARRAY   0
87 #else
88 #define XNU_VM_HAS_DELAYED_PAGES        0
89 #define XNU_VM_HAS_LOPAGE               0
90 #define XNU_VM_HAS_LINEAR_PAGES_ARRAY   1
91 #endif
92 
93 
94 
95 /*
96  * in order to make the size of a vm_page_t 64 bytes (cache line size for both arm64 and x86_64)
97  * we'll keep the next_m pointer packed... as long as the kernel virtual space where we allocate
98  * vm_page_t's from doesn't span more then 256 Gbytes, we're safe.   There are live tests in the
99  * vm_page_t array allocation and the zone init code to determine if we can safely pack and unpack
100  * pointers from the 2 ends of these spaces
101  */
102 typedef uint32_t        vm_page_packed_t;
103 
104 struct vm_page_packed_queue_entry {
105 	vm_page_packed_t        next;          /* next element */
106 	vm_page_packed_t        prev;          /* previous element */
107 };
108 
109 typedef struct vm_page_packed_queue_entry       *vm_page_queue_t;
110 typedef struct vm_page_packed_queue_entry       vm_page_queue_head_t;
111 typedef struct vm_page_packed_queue_entry       vm_page_queue_chain_t;
112 typedef struct vm_page_packed_queue_entry       *vm_page_queue_entry_t;
113 
114 typedef vm_page_packed_t                        vm_page_object_t;
115 
116 
117 /*
118  * vm_relocate_reason_t:
119  * A type to describe why a page relocation is being attempted.  Depending on
120  * the reason, certain pages may or may not be relocatable.
121  *
122  * VM_RELOCATE_REASON_CONTIGUOUS:
123  * The relocation is on behalf of the contiguous allocator; it is likely to be
124  * wired, so do not consider pages that cannot be wired for any reason.
125  */
126 __enum_closed_decl(vm_relocate_reason_t, unsigned int, {
127 	VM_RELOCATE_REASON_CONTIGUOUS,
128 
129 	VM_RELOCATE_REASON_COUNT,
130 });
131 
132 /*!
133  * @typedef vm_memory_class_t
134  *
135  * @abstract
136  * A type to describe what kind of memory a page represents.
137  *
138  * @const VM_MEMORY_CLASS_REGULAR
139  * Normal memory, which should participate in the normal page lifecycle.
140  *
141  * @const VM_MEMORY_CLASS_LOPAGE
142  * this exists to support hardware controllers
143  * incapable of generating DMAs with more than 32 bits
144  * of address on platforms with physical memory > 4G...
145  *
146  * @const VM_MEMORY_CLASS_SECLUDED
147  * Denotes memory must be put on the secluded queue,
148  * this is not returned by @c vm_page_get_memory_class().
149  */
150 __enum_closed_decl(vm_memory_class_t, uint8_t, {
151 	VM_MEMORY_CLASS_REGULAR,
152 #if XNU_VM_HAS_LOPAGE
153 	VM_MEMORY_CLASS_LOPAGE,
154 #endif /* XNU_VM_HAS_LOPAGE */
155 #if CONFIG_SECLUDED_MEMORY
156 	VM_MEMORY_CLASS_SECLUDED,
157 #endif
158 });
159 
160 /* pages of compressed data */
161 #define VM_PAGE_COMPRESSOR_COUNT os_atomic_load(&compressor_object->resident_page_count, relaxed)
162 
163 /*
164  *	Management of resident (logical) pages.
165  *
166  *	A small structure is kept for each resident
167  *	page, indexed by page number.  Each structure
168  *	is an element of several lists:
169  *
170  *		A hash table bucket used to quickly
171  *		perform object/offset lookups
172  *
173  *		A list of all pages for a given object,
174  *		so they can be quickly deactivated at
175  *		time of deallocation.
176  *
177  *		An ordered list of pages due for pageout.
178  *
179  *	In addition, the structure contains the object
180  *	and offset to which this page belongs (for pageout),
181  *	and sundry status bits.
182  *
183  *	Fields in this structure are locked either by the lock on the
184  *	object that the page belongs to (O) or by the lock on the page
185  *	queues (P).  [Some fields require that both locks be held to
186  *	change that field; holding either lock is sufficient to read.]
187  */
188 
189 #define VM_PAGE_NULL            ((vm_page_t) 0)
190 
191 __enum_closed_decl(vm_page_q_state_t, uint8_t, {
192 	VM_PAGE_NOT_ON_Q                = 0,    /* page is not present on any queue, nor is it wired... mainly a transient state */
193 	VM_PAGE_IS_WIRED                = 1,    /* page is currently wired */
194 	VM_PAGE_USED_BY_COMPRESSOR      = 2,    /* page is in use by the compressor to hold compressed data */
195 	VM_PAGE_ON_FREE_Q               = 3,    /* page is on the main free queue */
196 	VM_PAGE_ON_FREE_LOCAL_Q         = 4,    /* page is on one of the per-CPU free queues */
197 #if XNU_VM_HAS_LOPAGE
198 	VM_PAGE_ON_FREE_LOPAGE_Q        = 5,    /* page is on the lopage pool free list */
199 #endif /* XNU_VM_HAS_LOPAGE */
200 #if CONFIG_SECLUDED_MEMORY
201 	VM_PAGE_ON_SECLUDED_Q           = 5,    /* page is on secluded queue */
202 #endif /* CONFIG_SECLUDED_MEMORY */
203 	VM_PAGE_ON_THROTTLED_Q          = 6,    /* page is on the throttled queue... we stash anonymous pages here when not paging */
204 	VM_PAGE_ON_PAGEOUT_Q            = 7,    /* page is on one of the pageout queues (internal/external) awaiting processing */
205 	VM_PAGE_ON_SPECULATIVE_Q        = 8,    /* page is on one of the speculative queues */
206 	VM_PAGE_ON_ACTIVE_LOCAL_Q       = 9,    /* page has recently been created and is being held in one of the per-CPU local queues */
207 	VM_PAGE_ON_ACTIVE_Q             = 10,   /* page is in global active queue */
208 	VM_PAGE_ON_INACTIVE_INTERNAL_Q  = 11,   /* page is on the inactive internal queue a.k.a.  anonymous queue */
209 	VM_PAGE_ON_INACTIVE_EXTERNAL_Q  = 12,   /* page in on the inactive external queue a.k.a.  file backed queue */
210 	VM_PAGE_ON_INACTIVE_CLEANED_Q   = 13,   /* page has been cleaned to a backing file and is ready to be stolen */
211 });
212 #define VM_PAGE_Q_STATE_LAST_VALID_VALUE  13    /* we currently use 4 bits for the state... don't let this go beyond 15 */
213 
214 __enum_closed_decl(vm_page_specialq_t, uint8_t, {
215 	VM_PAGE_SPECIAL_Q_EMPTY         = 0,
216 	VM_PAGE_SPECIAL_Q_BG            = 1,
217 	VM_PAGE_SPECIAL_Q_DONATE        = 2,
218 	VM_PAGE_SPECIAL_Q_FG            = 3,
219 });
220 
221 #define VM_PAGE_INACTIVE(m)                     bit_test(vm_page_inactive_states, (m)->vmp_q_state)
222 #define VM_PAGE_ACTIVE_OR_INACTIVE(m)           bit_test(vm_page_active_or_inactive_states, (m)->vmp_q_state)
223 #define VM_PAGE_NON_SPECULATIVE_PAGEABLE(m)     bit_test(vm_page_non_speculative_pageable_states, (m)->vmp_q_state)
224 #define VM_PAGE_PAGEABLE(m)                     bit_test(vm_page_pageable_states, (m)->vmp_q_state)
225 
226 extern const uint16_t vm_page_inactive_states;
227 extern const uint16_t vm_page_active_or_inactive_states;
228 extern const uint16_t vm_page_non_speculative_pageable_states;
229 extern const uint16_t vm_page_pageable_states;
230 
231 
232 /*
233  * The structure itself. See the block comment above for what (O) and (P) mean.
234  */
235 struct vm_page {
236 	union {
237 		vm_page_queue_chain_t   vmp_pageq;      /* queue info for FIFO queue or free list (P) */
238 		struct vm_page         *vmp_snext;
239 	};
240 	vm_page_queue_chain_t           vmp_specialq;   /* anonymous pages in the special queues (P) */
241 
242 	vm_page_queue_chain_t           vmp_listq;      /* all pages in same object (O) */
243 	vm_page_packed_t                vmp_next_m;     /* VP bucket link (O) */
244 
245 	vm_page_object_t                vmp_object;     /* which object am I in (O&P) */
246 	vm_object_offset_t              vmp_offset;     /* offset into that object (O,P) */
247 
248 
249 	/*
250 	 * Either the current page wire count,
251 	 * or the local queue id (if local queues are enabled).
252 	 *
253 	 * See the comments at 'vm_page_queues_remove'
254 	 * as to why this is safe to do.
255 	 */
256 	union {
257 		uint16_t                vmp_wire_count;
258 		uint16_t                vmp_local_id;
259 	};
260 
261 	/*
262 	 * The following word of flags used to be protected by the "page queues" lock.
263 	 * That's no longer true and what lock, if any, is needed may depend on the
264 	 * value of vmp_q_state.
265 	 *
266 	 * This bitfield is kept in its own struct to prevent coalescing
267 	 * with the next one (which C allows the compiler to do) as they
268 	 * are under different locking domains
269 	 */
270 	struct {
271 		vm_page_q_state_t       vmp_q_state:4;      /* which q is the page on (P) */
272 		vm_page_specialq_t      vmp_on_specialq:2;
273 		uint8_t                 vmp_lopage:1;
274 		uint8_t                 vmp_canonical:1;    /* this page is a canonical kernel page (immutable) */
275 	};
276 	struct {
277 		uint8_t                 vmp_gobbled:1;      /* page used internally (P) */
278 		uint8_t                 vmp_laundry:1;      /* page is being cleaned now (P)*/
279 		uint8_t                 vmp_no_cache:1;     /* page is not to be cached and should */
280 		                                            /* be reused ahead of other pages (P) */
281 		uint8_t                 vmp_reference:1;    /* page has been used (P) */
282 		uint8_t                 vmp_realtime:1;     /* page used by realtime thread (P) */
283 		uint8_t                 vmp_iopl_wired:1;   /* page has been wired for I/O UPL (O&P) */
284 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
285 		uint8_t                 vmp_unmodified_ro:1;/* Tracks if an anonymous page is modified after a decompression (O&P).*/
286 #else
287 		uint8_t                 __vmp_reserved1:1;
288 #endif
289 		uint8_t                 __vmp_reserved2:1;
290 	};
291 
292 	/*
293 	 * The following word of flags is protected by the "VM object" lock.
294 	 *
295 	 * IMPORTANT: the "vmp_pmapped", "vmp_xpmapped" and "vmp_clustered" bits can be modified while holding the
296 	 * VM object "shared" lock + the page lock provided through the pmap_lock_phys_page function.
297 	 * This is done in vm_fault_enter() and the CONSUME_CLUSTERED macro.
298 	 * It's also ok to modify them behind just the VM object "exclusive" lock.
299 	 */
300 	unsigned int    vmp_busy:1,           /* page is in transit (O) */
301 	    vmp_wanted:1,                     /* someone is waiting for page (O) */
302 	    vmp_tabled:1,                     /* page is in VP table (O) */
303 	    vmp_hashed:1,                     /* page is in vm_page_buckets[] (O) + the bucket lock */
304 	__vmp_unused : 1,
305 	vmp_clustered:1,                      /* page is not the faulted page (O) or (O-shared AND pmap_page) */
306 	    vmp_pmapped:1,                    /* page has at some time been entered into a pmap (O) or */
307 	                                      /* (O-shared AND pmap_page) */
308 	    vmp_xpmapped:1,                   /* page has been entered with execute permission (O) or */
309 	                                      /* (O-shared AND pmap_page) */
310 	    vmp_wpmapped:1,                   /* page has been entered at some point into a pmap for write (O) */
311 	    vmp_free_when_done:1,             /* page is to be freed once cleaning is completed (O) */
312 	    vmp_absent:1,                     /* Data has been requested, but is not yet available (O) */
313 	    vmp_error:1,                      /* Data manager was unable to provide data due to error (O) */
314 	    vmp_dirty:1,                      /* Page must be cleaned (O) */
315 	    vmp_cleaning:1,                   /* Page clean has begun (O) */
316 	    vmp_precious:1,                   /* Page is precious; data must be returned even if clean (O) */
317 	    vmp_overwriting:1,                /* Request to unlock has been made without having data. (O) */
318 	                                      /* [See vm_fault_page_overwrite] */
319 	    vmp_restart:1,                    /* Page was pushed higher in shadow chain by copy_call-related pagers */
320 	                                      /* start again at top of chain */
321 	    vmp_unusual:1,                    /* Page is absent, error, restart or page locked */
322 	    vmp_cs_validated:VMP_CS_BITS,     /* code-signing: page was checked */
323 	    vmp_cs_tainted:VMP_CS_BITS,       /* code-signing: page is tainted */
324 	    vmp_cs_nx:VMP_CS_BITS,            /* code-signing: page is nx */
325 	    vmp_reusable:1,
326 	    vmp_written_by_kernel:1;          /* page was written by kernel (i.e. decompressed) */
327 
328 #if !XNU_VM_HAS_LINEAR_PAGES_ARRAY
329 	/*
330 	 * Physical number of the page
331 	 *
332 	 * Setting this value to or away from vm_page_fictitious_addr
333 	 * must be done with (P) held
334 	 */
335 	ppnum_t                         vmp_phys_page;
336 #endif /* !XNU_VM_HAS_LINEAR_PAGES_ARRAY */
337 };
338 
339 /*!
340  * @var vm_pages
341  * The so called VM pages array
342  *
343  * @var vm_pages_end
344  * The pointer past the last valid page in the VM pages array.
345  *
346  * @var vm_pages_count
347  * The number of elements in the VM pages array.
348  * (vm_pages + vm_pages_count == vm_pages_end).
349  *
350  * @var vm_pages_first_pnum
351  * For linear page arrays, the pnum of the first page in the array.
352  * In other words VM_PAGE_GET_PHYS_PAGE(&vm_pages_array()[0]).
353  */
354 extern vm_page_t        vm_pages_end;
355 extern uint32_t         vm_pages_count;
356 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
357 extern ppnum_t          vm_pages_first_pnum;
358 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
359 
360 /**
361  * Internal accessor which returns the raw vm_pages pointer.
362  *
363  * This pointer must not be indexed directly. Use vm_page_get instead when
364  * indexing into the array.
365  *
366  * __pure2 helps explain to the compiler that the value vm_pages is a constant.
367  */
368 __pure2
369 static inline struct vm_page *
vm_pages_array_internal(void)370 vm_pages_array_internal(void)
371 {
372 	extern vm_page_t vm_pages;
373 	return vm_pages;
374 }
375 
376 /**
377  * Get a pointer to page at index i.
378  *
379  * This getter is the only legal way to index into the vm_pages array.
380  */
381 __pure2
382 static inline vm_page_t
vm_page_get(uint32_t i)383 vm_page_get(uint32_t i)
384 {
385 	return VM_FAR_ADD_PTR_UNBOUNDED(vm_pages_array_internal(), i);
386 }
387 
388 
389 __pure2
390 static inline bool
vm_page_in_array(const struct vm_page * m)391 vm_page_in_array(const struct vm_page *m)
392 {
393 	return vm_pages_array_internal() <= m && m < vm_pages_end;
394 }
395 
396 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
397 struct vm_page_with_ppnum {
398 	struct vm_page          vmp_page;
399 	ppnum_t                 vmp_phys_page;
400 };
401 
402 /*!
403  * @abstract
404  * Looks up the canonical kernel page for a given physical page number.
405  *
406  * @discussion
407  * This function may return VM_PAGE_NULL for kernel pages that aren't managed
408  * by the VM.
409  *
410  * @param pnum          The page number to lookup.  It must be within
411  *                      [pmap_first_pnum, vm_pages_first_pnum + vm_pages_count)
412  */
413 extern vm_page_t vm_page_find_canonical(ppnum_t pnum) __pure2;
414 
415 extern vm_page_t vm_pages_radix_next(uint32_t *cursor, ppnum_t *pnum);
416 
417 #define vm_pages_radix_for_each(mem) \
418 	for (uint32_t __index = 0; ((mem) = vm_pages_radix_next(&__index, NULL)); )
419 
420 #define vm_pages_radix_for_each_pnum(pnum) \
421 	for (uint32_t __index = 0; vm_pages_radix_next(&__index, &pnum); )
422 
423 #else
424 #define vm_page_with_ppnum vm_page
425 #endif /* !XNU_VM_HAS_LINEAR_PAGES_ARRAY */
426 typedef struct vm_page_with_ppnum *vm_page_with_ppnum_t;
427 
428 static inline ppnum_t
VM_PAGE_GET_PHYS_PAGE(const struct vm_page * m)429 VM_PAGE_GET_PHYS_PAGE(const struct vm_page *m)
430 {
431 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
432 	if (vm_page_in_array(m)) {
433 		uintptr_t index = (uintptr_t)(m - vm_pages_array_internal());
434 
435 		return (ppnum_t)(vm_pages_first_pnum + index);
436 	}
437 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
438 	return ((const struct vm_page_with_ppnum *)m)->vmp_phys_page;
439 }
440 
441 static inline void
VM_PAGE_INIT_PHYS_PAGE(struct vm_page * m,ppnum_t pnum)442 VM_PAGE_INIT_PHYS_PAGE(struct vm_page *m, ppnum_t pnum)
443 {
444 #if XNU_VM_HAS_LINEAR_PAGES_ARRAY
445 	if (vm_page_in_array(m)) {
446 		assert(pnum == VM_PAGE_GET_PHYS_PAGE(m));
447 		return;
448 	}
449 #endif /* XNU_VM_HAS_LINEAR_PAGES_ARRAY */
450 	((vm_page_with_ppnum_t)(m))->vmp_phys_page = pnum;
451 }
452 
453 static inline void
VM_PAGE_SET_PHYS_PAGE(struct vm_page * m,ppnum_t pnum)454 VM_PAGE_SET_PHYS_PAGE(struct vm_page *m, ppnum_t pnum)
455 {
456 	assert(!vm_page_in_array(m) && !m->vmp_canonical);
457 	((vm_page_with_ppnum_t)(m))->vmp_phys_page = pnum;
458 }
459 
460 #if defined(__x86_64__)
461 extern unsigned int     vm_clump_mask, vm_clump_shift;
462 #define VM_PAGE_GET_CLUMP_PNUM(pn)      ((pn) >> vm_clump_shift)
463 #define VM_PAGE_GET_CLUMP(m)            VM_PAGE_GET_CLUMP_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
464 #define VM_PAGE_GET_COLOR_PNUM(pn)      (VM_PAGE_GET_CLUMP_PNUM(pn) & vm_color_mask)
465 #define VM_PAGE_GET_COLOR(m)            VM_PAGE_GET_COLOR_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
466 #else
467 #define VM_PAGE_GET_COLOR_PNUM(pn)      ((pn) & vm_color_mask)
468 #define VM_PAGE_GET_COLOR(m)            VM_PAGE_GET_COLOR_PNUM(VM_PAGE_GET_PHYS_PAGE(m))
469 #endif
470 
471 /*
472  * Parameters for pointer packing
473  *
474  *
475  * VM Pages pointers might point to:
476  *
477  * 1. VM_PAGE_PACKED_ALIGNED aligned kernel globals,
478  *
479  * 2. VM_PAGE_PACKED_ALIGNED aligned heap allocated vm pages
480  *
481  * 3. entries in the vm_pages array (whose entries aren't VM_PAGE_PACKED_ALIGNED
482  *    aligned).
483  *
484  *
485  * The current scheme uses 31 bits of storage and 6 bits of shift using the
486  * VM_PACK_POINTER() scheme for (1-2), and packs (3) as an index within the
487  * vm_pages array, setting the top bit (VM_PAGE_PACKED_FROM_ARRAY).
488  *
489  * This scheme gives us a reach of 128G from VM_MIN_KERNEL_AND_KEXT_ADDRESS.
490  */
491 #define VM_VPLQ_ALIGNMENT               128
492 #define VM_PAGE_PACKED_PTR_ALIGNMENT    64              /* must be a power of 2 */
493 #define VM_PAGE_PACKED_ALIGNED          __attribute__((aligned(VM_PAGE_PACKED_PTR_ALIGNMENT)))
494 #define VM_PAGE_PACKED_PTR_BITS         31
495 #define VM_PAGE_PACKED_PTR_SHIFT        6
496 #ifndef __BUILDING_XNU_LIB_UNITTEST__
497 #define VM_PAGE_PACKED_PTR_BASE         ((uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS)
498 #else
499 extern uintptr_t mock_page_ptr_base;
500 #define VM_PAGE_PACKED_PTR_BASE         (mock_page_ptr_base)
501 #endif
502 #define VM_PAGE_PACKED_FROM_ARRAY       0x80000000
503 
504 static inline vm_page_packed_t
vm_page_pack_ptr(uintptr_t p)505 vm_page_pack_ptr(uintptr_t p)
506 {
507 	if (vm_page_in_array(__unsafe_forge_single(vm_page_t, p))) {
508 		ptrdiff_t diff = (vm_page_t)p - vm_pages_array_internal();
509 		assert((vm_page_t)p == vm_page_get((uint32_t)diff));
510 		return (vm_page_packed_t)(diff | VM_PAGE_PACKED_FROM_ARRAY);
511 	}
512 
513 	VM_ASSERT_POINTER_PACKABLE(p, VM_PAGE_PACKED_PTR);
514 	vm_offset_t packed = VM_PACK_POINTER(p, VM_PAGE_PACKED_PTR);
515 	return CAST_DOWN_EXPLICIT(vm_page_packed_t, packed);
516 }
517 
518 
519 static inline uintptr_t
vm_page_unpack_ptr(uintptr_t p)520 vm_page_unpack_ptr(uintptr_t p)
521 {
522 	if (p >= VM_PAGE_PACKED_FROM_ARRAY) {
523 		p &= ~VM_PAGE_PACKED_FROM_ARRAY;
524 		assert(p < (uintptr_t)vm_pages_count);
525 		return (uintptr_t)vm_page_get((uint32_t)p);
526 	}
527 
528 	return VM_UNPACK_POINTER(p, VM_PAGE_PACKED_PTR);
529 }
530 
531 
532 #define VM_PAGE_PACK_PTR(p)     vm_page_pack_ptr((uintptr_t)(p))
533 #define VM_PAGE_UNPACK_PTR(p)   vm_page_unpack_ptr((uintptr_t)(p))
534 
535 #define VM_OBJECT_PACK(o)       ((vm_page_object_t)VM_PACK_POINTER((uintptr_t)(o), VM_PAGE_PACKED_PTR))
536 #define VM_OBJECT_UNPACK(p)     ((vm_object_t)VM_UNPACK_POINTER(p, VM_PAGE_PACKED_PTR))
537 
538 #define VM_PAGE_OBJECT(p)       VM_OBJECT_UNPACK((p)->vmp_object)
539 #define VM_PAGE_PACK_OBJECT(o)  VM_OBJECT_PACK(o)
540 
541 
542 #define VM_PAGE_ZERO_PAGEQ_ENTRY(p)     \
543 MACRO_BEGIN                             \
544 	(p)->vmp_snext = 0;             \
545 MACRO_END
546 
547 
548 #define VM_PAGE_CONVERT_TO_QUEUE_ENTRY(p)       VM_PAGE_PACK_PTR(p)
549 
550 
551 /*!
552  * @abstract
553  * The type for free queue heads that live in the kernel __DATA segment.
554  *
555  * @discussion
556  * This type must be used so that the queue is properly aligned
557  * for the VM Page packing to be able to represent pointers to this queue.
558  */
559 typedef struct vm_page_queue_free_head {
560 	vm_page_queue_head_t    qhead;
561 } VM_PAGE_PACKED_ALIGNED *vm_page_queue_free_head_t;
562 
563 /*
564  *	Macro:	vm_page_queue_init
565  *	Function:
566  *		Initialize the given queue.
567  *	Header:
568  *	void vm_page_queue_init(q)
569  *		vm_page_queue_t	q;	\* MODIFIED *\
570  */
571 #define vm_page_queue_init(q)               \
572 MACRO_BEGIN                                 \
573 	VM_ASSERT_POINTER_PACKABLE((vm_offset_t)(q), VM_PAGE_PACKED_PTR); \
574 	(q)->next = VM_PAGE_PACK_PTR(q);        \
575 	(q)->prev = VM_PAGE_PACK_PTR(q);        \
576 MACRO_END
577 
578 
579 /*
580  * Macro: vm_page_queue_enter
581  * Function:
582  *     Insert a new element at the tail of the vm_page queue.
583  * Header:
584  *     void vm_page_queue_enter(q, elt, field)
585  *         queue_t q;
586  *         vm_page_t elt;
587  *         <field> is the list field in vm_page_t
588  *
589  * This macro's arguments have to match the generic "queue_enter()" macro which is
590  * what is used for this on 32 bit kernels.
591  */
592 #define vm_page_queue_enter(head, elt, field)                       \
593 MACRO_BEGIN                                                         \
594 	vm_page_packed_t __pck_elt = VM_PAGE_PACK_PTR(elt);         \
595 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);       \
596 	vm_page_packed_t __pck_prev = (head)->prev;                 \
597                                                                     \
598 	if (__pck_head == __pck_prev) {                             \
599 	        (head)->next = __pck_elt;                           \
600 	} else {                                                    \
601 	        vm_page_t __prev;                                   \
602 	        __prev = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_prev); \
603 	        __prev->field.next = __pck_elt;                     \
604 	}                                                           \
605 	(elt)->field.prev = __pck_prev;                             \
606 	(elt)->field.next = __pck_head;                             \
607 	(head)->prev = __pck_elt;                                   \
608 MACRO_END
609 
610 
611 #if defined(__x86_64__)
612 /*
613  * These are helper macros for vm_page_queue_enter_clump to assist
614  * with conditional compilation (release / debug / development)
615  */
616 #if DEVELOPMENT || DEBUG
617 
618 #define __DEBUG_CHECK_BUDDIES(__prev, __p, field)                                             \
619 MACRO_BEGIN                                                                                   \
620 	if (__prev != NULL) {                                                                 \
621 	        assert(__p == (vm_page_t)VM_PAGE_UNPACK_PTR(__prev->next));                   \
622 	        assert(__prev == (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(__p->field.prev)); \
623 	}                                                                                     \
624 MACRO_END
625 
626 #define __DEBUG_VERIFY_LINKS(__first, __n_free, __last_next)                    \
627 MACRO_BEGIN                                                                     \
628 	unsigned int __i;                                                       \
629 	vm_page_queue_entry_t __tmp;                                            \
630 	for (__i = 0, __tmp = __first; __i < __n_free; __i++) {                 \
631 	        __tmp = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(__tmp->next); \
632 	}                                                                       \
633 	assert(__tmp == __last_next);                                           \
634 MACRO_END
635 
636 #define __DEBUG_STAT_INCREMENT_INRANGE              vm_clump_inrange++
637 #define __DEBUG_STAT_INCREMENT_INSERTS              vm_clump_inserts++
638 #define __DEBUG_STAT_INCREMENT_PROMOTES(__n_free)   vm_clump_promotes+=__n_free
639 
640 #else
641 
642 #define __DEBUG_CHECK_BUDDIES(__prev, __p, field)
643 #define __DEBUG_VERIFY_LINKS(__first, __n_free, __last_next)
644 #define __DEBUG_STAT_INCREMENT_INRANGE
645 #define __DEBUG_STAT_INCREMENT_INSERTS
646 #define __DEBUG_STAT_INCREMENT_PROMOTES(__n_free)
647 
648 #endif  /* if DEVELOPMENT || DEBUG */
649 
650 #endif
651 
652 /*
653  * Macro: vm_page_queue_enter_first
654  * Function:
655  *     Insert a new element at the head of the vm_page queue.
656  * Header:
657  *     void queue_enter_first(q, elt, , field)
658  *         queue_t q;
659  *         vm_page_t elt;
660  *         <field> is the linkage field in vm_page
661  *
662  * This macro's arguments have to match the generic "queue_enter_first()" macro which is
663  * what is used for this on 32 bit kernels.
664  */
665 #define vm_page_queue_enter_first(head, elt, field)                 \
666 MACRO_BEGIN                                                         \
667 	vm_page_packed_t __pck_next = (head)->next;                 \
668 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);       \
669 	vm_page_packed_t __pck_elt = VM_PAGE_PACK_PTR(elt);         \
670                                                                     \
671 	if (__pck_head == __pck_next) {                             \
672 	        (head)->prev = __pck_elt;                           \
673 	} else {                                                    \
674 	        vm_page_t __next;                                   \
675 	        __next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next); \
676 	        __next->field.prev = __pck_elt;                     \
677 	}                                                           \
678                                                                     \
679 	(elt)->field.next = __pck_next;                             \
680 	(elt)->field.prev = __pck_head;                             \
681 	(head)->next = __pck_elt;                                   \
682 MACRO_END
683 
684 
685 /*
686  * Macro:	vm_page_queue_remove
687  * Function:
688  *     Remove an arbitrary page from a vm_page queue.
689  * Header:
690  *     void vm_page_queue_remove(q, qe, field)
691  *         arguments as in vm_page_queue_enter
692  *
693  * This macro's arguments have to match the generic "queue_enter()" macro which is
694  * what is used for this on 32 bit kernels.
695  */
696 #define vm_page_queue_remove(head, elt, field)                          \
697 MACRO_BEGIN                                                             \
698 	vm_page_packed_t __pck_next = (elt)->field.next;                \
699 	vm_page_packed_t __pck_prev = (elt)->field.prev;                \
700 	vm_page_t        __next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next); \
701 	vm_page_t        __prev = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_prev); \
702                                                                         \
703 	if ((void *)(head) == (void *)__next) {                         \
704 	        (head)->prev = __pck_prev;                              \
705 	} else {                                                        \
706 	        __next->field.prev = __pck_prev;                        \
707 	}                                                               \
708                                                                         \
709 	if ((void *)(head) == (void *)__prev) {                         \
710 	        (head)->next = __pck_next;                              \
711 	} else {                                                        \
712 	        __prev->field.next = __pck_next;                        \
713 	}                                                               \
714                                                                         \
715 	(elt)->field.next = 0;                                          \
716 	(elt)->field.prev = 0;                                          \
717 MACRO_END
718 
719 
720 /*
721  * Macro: vm_page_queue_remove_first
722  *
723  * Function:
724  *     Remove and return the entry at the head of a vm_page queue.
725  *
726  * Header:
727  *     vm_page_queue_remove_first(head, entry, field)
728  *     N.B. entry is returned by reference
729  *
730  * This macro's arguments have to match the generic "queue_remove_first()" macro which is
731  * what is used for this on 32 bit kernels.
732  */
733 #define vm_page_queue_remove_first(head, entry, field)            \
734 MACRO_BEGIN                                                       \
735 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);     \
736 	vm_page_packed_t __pck_next;                              \
737 	vm_page_t        __next;                                  \
738                                                                   \
739 	(entry) = (vm_page_t)VM_PAGE_UNPACK_PTR((head)->next);    \
740 	__pck_next = (entry)->field.next;                         \
741 	__next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next);       \
742                                                                   \
743 	if (__pck_head == __pck_next) {                           \
744 	        (head)->prev = __pck_head;                        \
745 	} else {                                                  \
746 	        __next->field.prev = __pck_head;                  \
747 	}                                                         \
748                                                                   \
749 	(head)->next = __pck_next;                                \
750 	(entry)->field.next = 0;                                  \
751 	(entry)->field.prev = 0;                                  \
752 MACRO_END
753 
754 
755 #if defined(__x86_64__)
756 /*
757  * Macro:  vm_page_queue_remove_first_with_clump
758  * Function:
759  *     Remove and return the entry at the head of the free queue
760  *     end is set to 1 to indicate that we just returned the last page in a clump
761  *
762  * Header:
763  *     vm_page_queue_remove_first_with_clump(head, entry, end)
764  *     entry is returned by reference
765  *     end is returned by reference
766  */
767 #define vm_page_queue_remove_first_with_clump(head, entry, end)              \
768 MACRO_BEGIN                                                                  \
769 	vm_page_packed_t __pck_head = VM_PAGE_PACK_PTR(head);                \
770 	vm_page_packed_t __pck_next;                                         \
771 	vm_page_t        __next;                                             \
772                                                                              \
773 	(entry) = (vm_page_t)VM_PAGE_UNPACK_PTR((head)->next);               \
774 	__pck_next = (entry)->vmp_pageq.next;                                \
775 	__next = (vm_page_t)VM_PAGE_UNPACK_PTR(__pck_next);                  \
776                                                                              \
777 	(end) = 0;                                                           \
778 	if (__pck_head == __pck_next) {                                      \
779 	        (head)->prev = __pck_head;                                   \
780 	        (end) = 1;                                                   \
781 	} else {                                                             \
782 	        __next->vmp_pageq.prev = __pck_head;                         \
783 	        if (VM_PAGE_GET_CLUMP(entry) != VM_PAGE_GET_CLUMP(__next)) { \
784 	                (end) = 1;                                           \
785 	        }                                                            \
786 	}                                                                    \
787                                                                              \
788 	(head)->next = __pck_next;                                           \
789 	(entry)->vmp_pageq.next = 0;                                         \
790 	(entry)->vmp_pageq.prev = 0;                                         \
791 MACRO_END
792 #endif
793 
794 /*
795  *	Macro:	vm_page_queue_end
796  *	Function:
797  *	Tests whether a new entry is really the end of
798  *		the queue.
799  *	Header:
800  *		boolean_t vm_page_queue_end(q, qe)
801  *			vm_page_queue_t q;
802  *			vm_page_queue_entry_t qe;
803  */
804 #define vm_page_queue_end(q, qe)        ((q) == (qe))
805 
806 
807 /*
808  *	Macro:	vm_page_queue_empty
809  *	Function:
810  *		Tests whether a queue is empty.
811  *	Header:
812  *		boolean_t vm_page_queue_empty(q)
813  *			vm_page_queue_t q;
814  */
815 #define vm_page_queue_empty(q)          vm_page_queue_end((q), ((vm_page_queue_entry_t)vm_page_queue_first(q)))
816 
817 
818 
819 /*
820  *	Macro:	vm_page_queue_first
821  *	Function:
822  *		Returns the first entry in the queue,
823  *	Header:
824  *		uintpr_t vm_page_queue_first(q)
825  *			vm_page_queue_t q;	\* IN *\
826  */
827 #define vm_page_queue_first(q)          (VM_PAGE_UNPACK_PTR((q)->next))
828 
829 
830 
831 /*
832  *	Macro:		vm_page_queue_last
833  *	Function:
834  *		Returns the last entry in the queue.
835  *	Header:
836  *		vm_page_queue_entry_t queue_last(q)
837  *			queue_t	q;		\* IN *\
838  */
839 #define vm_page_queue_last(q)           (VM_PAGE_UNPACK_PTR((q)->prev))
840 
841 
842 
843 /*
844  *	Macro:	vm_page_queue_next
845  *	Function:
846  *		Returns the entry after an item in the queue.
847  *	Header:
848  *		uintpr_t vm_page_queue_next(qc)
849  *			vm_page_queue_t qc;
850  */
851 #define vm_page_queue_next(qc)          (VM_PAGE_UNPACK_PTR((qc)->next))
852 
853 
854 
855 /*
856  *	Macro:	vm_page_queue_prev
857  *	Function:
858  *		Returns the entry before an item in the queue.
859  *	Header:
860  *		uinptr_t vm_page_queue_prev(qc)
861  *			vm_page_queue_t qc;
862  */
863 #define vm_page_queue_prev(qc)          (VM_PAGE_UNPACK_PTR((qc)->prev))
864 
865 
866 
867 /*
868  *	Macro:	vm_page_queue_iterate
869  *	Function:
870  *		iterate over each item in a vm_page queue.
871  *		Generates a 'for' loop, setting elt to
872  *		each item in turn (by reference).
873  *	Header:
874  *		vm_page_queue_iterate(q, elt, field)
875  *			queue_t q;
876  *			vm_page_t elt;
877  *			<field> is the chain field in vm_page_t
878  */
879 #define vm_page_queue_iterate(head, elt, field)                       \
880 	for ((elt) = (vm_page_t)vm_page_queue_first(head);            \
881 	    !vm_page_queue_end((head), (vm_page_queue_entry_t)(elt)); \
882 	    (elt) = (vm_page_t)vm_page_queue_next(&(elt)->field))     \
883 
884 
885 /*
886  * VM_PAGE_MIN_SPECULATIVE_AGE_Q through vm_page_max_speculative_age_q
887  * represents a set of aging bins that are 'protected'...
888  *
889  * VM_PAGE_SPECULATIVE_AGED_Q is a list of the speculative pages that have
890  * not yet been 'claimed' but have been aged out of the protective bins
891  * this occurs in vm_page_speculate when it advances to the next bin
892  * and discovers that it is still occupied... at that point, all of the
893  * pages in that bin are moved to the VM_PAGE_SPECULATIVE_AGED_Q.  the pages
894  * in that bin are all guaranteed to have reached at least the maximum age
895  * we allow for a protected page... they can be older if there is no
896  * memory pressure to pull them from the bin, or there are no new speculative pages
897  * being generated to push them out.
898  * this list is the one that vm_pageout_scan will prefer when looking
899  * for pages to move to the underweight free list
900  *
901  * vm_page_max_speculative_age_q * VM_PAGE_SPECULATIVE_Q_AGE_MS
902  * defines the amount of time a speculative page is normally
903  * allowed to live in the 'protected' state (i.e. not available
904  * to be stolen if vm_pageout_scan is running and looking for
905  * pages)...  however, if the total number of speculative pages
906  * in the protected state exceeds our limit (defined in vm_pageout.c)
907  * and there are none available in VM_PAGE_SPECULATIVE_AGED_Q, then
908  * vm_pageout_scan is allowed to steal pages from the protected
909  * bucket even if they are underage.
910  *
911  * vm_pageout_scan is also allowed to pull pages from a protected
912  * bin if the bin has reached the "age of consent" we've set
913  */
914 #define VM_PAGE_RESERVED_SPECULATIVE_AGE_Q      40
915 #define VM_PAGE_DEFAULT_MAX_SPECULATIVE_AGE_Q   10
916 #define VM_PAGE_MIN_SPECULATIVE_AGE_Q   1
917 #define VM_PAGE_SPECULATIVE_AGED_Q      0
918 
919 #define VM_PAGE_SPECULATIVE_Q_AGE_MS    500
920 
921 struct vm_speculative_age_q {
922 	/*
923 	 * memory queue for speculative pages via clustered pageins
924 	 */
925 	vm_page_queue_head_t    age_q;
926 	mach_timespec_t age_ts;
927 } VM_PAGE_PACKED_ALIGNED;
928 
929 
930 
931 extern
932 struct vm_speculative_age_q     vm_page_queue_speculative[];
933 
934 extern int                      speculative_steal_index;
935 extern int                      speculative_age_index;
936 extern unsigned int             vm_page_speculative_q_age_ms;
937 extern unsigned int             vm_page_max_speculative_age_q;
938 
939 
940 typedef struct vm_locks_array {
941 	char    pad  __attribute__ ((aligned(64)));
942 	lck_mtx_t       vm_page_queue_lock2 __attribute__ ((aligned(64)));
943 	lck_mtx_t       vm_page_queue_free_lock2 __attribute__ ((aligned(64)));
944 	char    pad2  __attribute__ ((aligned(64)));
945 } vm_locks_array_t;
946 
947 
948 #define VM_PAGE_WIRED(m)        ((m)->vmp_q_state == VM_PAGE_IS_WIRED)
949 #define NEXT_PAGE(m)            ((m)->vmp_snext)
950 #define NEXT_PAGE_PTR(m)        (&(m)->vmp_snext)
951 
952 /*!
953  * @abstract
954  * Represents a singly linked list of pages with a count.
955  *
956  * @discussion
957  * This type is used as a way to exchange transient collections of VM pages
958  * by various subsystems.
959  *
960  * This type is designed to be less than sizeof(_Complex) which means
961  * it that can be passed by value efficiently (either as a function argument
962  * or its result).
963  *
964  *
965  * @field vmpl_head
966  * The head of the list, or VM_PAGE_NULL.
967  *
968  * @field vmpl_count
969  * How many pages are on that list.
970  *
971  * @field vmpl_has_realtime
972  * At least one page on the list has vmp_realtime set.
973  */
974 typedef struct {
975 	vm_page_t vmpl_head;
976 	uint32_t  vmpl_count;
977 	bool      vmpl_has_realtime;
978 } vm_page_list_t;
979 
980 
981 /*!
982  * @abstract
983  * Low level function that pushes a page on a naked singly linked list of VM
984  * pages.
985  *
986  * @param head          The list head.
987  * @param mem           The page to push on the list.
988  */
989 static inline void
_vm_page_list_push(vm_page_t * head,vm_page_t mem)990 _vm_page_list_push(vm_page_t *head, vm_page_t mem)
991 {
992 	NEXT_PAGE(mem) = *head;
993 	*head = mem;
994 }
995 
996 /*!
997  * @abstract
998  * Pushes a page onto a VM page list, adjusting its properties.
999  *
1000  * @param list          The VM page list to push onto
1001  * @param mem           The page to push on the list.
1002  */
1003 static inline void
vm_page_list_push(vm_page_list_t * list,vm_page_t mem)1004 vm_page_list_push(vm_page_list_t *list, vm_page_t mem)
1005 {
1006 	_vm_page_list_push(&list->vmpl_head, mem);
1007 	list->vmpl_count++;
1008 	if (mem->vmp_realtime) {
1009 		list->vmpl_has_realtime = true;
1010 	}
1011 }
1012 
1013 /*!
1014  * @abstract
1015  * Conveniency function that creates a VM page list from a single page.
1016  *
1017  * @param mem           The VM page to put on the list.
1018  */
1019 static inline vm_page_list_t
vm_page_list_for_page(vm_page_t mem)1020 vm_page_list_for_page(vm_page_t mem)
1021 {
1022 	assert(NEXT_PAGE(mem) == VM_PAGE_NULL);
1023 	return (vm_page_list_t){
1024 		       .vmpl_head  = mem,
1025 		       .vmpl_count = 1,
1026 		       .vmpl_has_realtime = mem->vmp_realtime,
1027 	};
1028 }
1029 
1030 /*!
1031  * @abstract
1032  * Low level function that pops a page from a naked singly linked list of VM
1033  * pages.
1034  *
1035  * @param head          The list head.
1036  *
1037  * @returns             The first page that was on the list
1038  *                      or VM_PAGE_NULL if it was empty.
1039  */
1040 static inline vm_page_t
_vm_page_list_pop(vm_page_t * head)1041 _vm_page_list_pop(vm_page_t *head)
1042 {
1043 	vm_page_t mem = *head;
1044 
1045 	if (mem) {
1046 		*head = NEXT_PAGE(mem);
1047 		VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
1048 	}
1049 
1050 	return mem;
1051 }
1052 
1053 /*!
1054  * @abstract
1055  * Pops a page from a VM page list, adjusting its properties.
1056  *
1057  * @param list          The VM page list to pop from.
1058  *
1059  * @returns             The first page that was on the list
1060  *                      or VM_PAGE_NULL if it was empty.
1061  */
1062 static inline vm_page_t
vm_page_list_pop(vm_page_list_t * list)1063 vm_page_list_pop(vm_page_list_t *list)
1064 {
1065 	if (list->vmpl_head) {
1066 		list->vmpl_count--;
1067 		return _vm_page_list_pop(&list->vmpl_head);
1068 	}
1069 	*list = (vm_page_list_t){ };
1070 	return VM_PAGE_NULL;
1071 }
1072 
1073 
1074 /*!
1075  * @abstract
1076  * Reverses a list of VM pages in place.
1077  *
1078  * @param list          The VM page list to reverse.
1079  */
1080 static inline void
vm_page_list_reverse(vm_page_list_t * list)1081 vm_page_list_reverse(vm_page_list_t *list)
1082 {
1083 	vm_page_t cur, next;
1084 
1085 	cur = list->vmpl_head;
1086 	list->vmpl_head = NULL;
1087 
1088 	while (cur) {
1089 		next = NEXT_PAGE(cur);
1090 		_vm_page_list_push(&list->vmpl_head, cur);
1091 		cur = next;
1092 	}
1093 }
1094 
1095 
1096 /*!
1097  * @abstract
1098  * Low level iterator over all pages on a naked singly linked list
1099  * of VM pages.
1100  *
1101  * @discussion
1102  * Mutating the list during enumeration is undefined.
1103  *
1104  * @param mem           The variable to use for iteration.
1105  * @param head          The list head.
1106  */
1107 #define _vm_page_list_foreach(mem, list) \
1108 	for ((mem) = (list); (mem); (mem) = NEXT_PAGE(mem))
1109 
1110 
1111 /*!
1112  * @abstract
1113  * Iterator over a VM page list.
1114  *
1115  * @discussion
1116  * Mutating the list during enumeration is undefined.
1117  *
1118  * @param mem           The variable to use for iteration.
1119  * @param head          The list head.
1120  */
1121 #define vm_page_list_foreach(mem, list) \
1122 	_vm_page_list_foreach(mem, (list).vmpl_head)
1123 
1124 
1125 /*!
1126  * @abstract
1127  * Low level iterator over all pages on a naked singly linked list
1128  * of VM pages, that also consumes the list as it iterates.
1129  *
1130  * @discussion
1131  * Each element is removed from the list as it is being iterated.
1132  *
1133  * @param mem           The variable to use for iteration.
1134  * @param head          The list head.
1135  */
1136 #define _vm_page_list_foreach_consume(mem, list) \
1137 	while (((mem) = _vm_page_list_pop((list))))
1138 
1139 /*!
1140  * @abstract
1141  * Iterator over a VM page list, that consumes the list.
1142  *
1143  * @discussion
1144  * Each element is removed from the list as it is being iterated.
1145  *
1146  * @param mem           The variable to use for iteration.
1147  * @param head          The list head.
1148  */
1149 #define vm_page_list_foreach_consume(mem, list) \
1150 	while (((mem) = vm_page_list_pop((list))))
1151 
1152 
1153 /*
1154  * XXX	The unusual bit should not be necessary.  Most of the bit
1155  * XXX	fields above really want to be masks.
1156  */
1157 
1158 /*
1159  *	For debugging, this macro can be defined to perform
1160  *	some useful check on a page structure.
1161  *	INTENTIONALLY left as a no-op so that the
1162  *	current call-sites can be left intact for future uses.
1163  */
1164 
1165 #define VM_PAGE_CHECK(mem)                      \
1166 	MACRO_BEGIN                             \
1167 	MACRO_END
1168 
1169 /*     Page coloring:
1170  *
1171  *     The free page list is actually n lists, one per color,
1172  *     where the number of colors is a function of the machine's
1173  *     cache geometry set at system initialization.  To disable
1174  *     coloring, set vm_colors to 1 and vm_color_mask to 0.
1175  *     The boot-arg "colors" may be used to override vm_colors.
1176  *     Note that there is little harm in having more colors than needed.
1177  */
1178 
1179 #define MAX_COLORS      128
1180 #define DEFAULT_COLORS  32
1181 
1182 /*
1183  * Page free queue type.  Abstracts the notion of a free queue of pages, that
1184  * contains free pages of a particular memory class, and maintains a count of
1185  * the number of pages in the free queue.
1186  *
1187  * Pages in the queue will be marked VM_PAGE_ON_FREE_Q when they are added to
1188  * the free queue, and VM_PAGE_NOT_ON_Q when they are removed.
1189  *
1190  * These free queues will color pages, consistent with MachVMs color mask.
1191  */
1192 typedef struct vm_page_free_queue {
1193 	struct vm_page_queue_free_head vmpfq_queues[MAX_COLORS];
1194 	uint32_t                       vmpfq_count;
1195 } *vm_page_free_queue_t;
1196 
1197 extern unsigned int    vm_colors;              /* must be in range 1..MAX_COLORS */
1198 extern unsigned int    vm_color_mask;          /* must be (vm_colors-1) */
1199 extern unsigned int    vm_cache_geometry_colors; /* optimal #colors based on cache geometry */
1200 extern unsigned int    vm_free_magazine_refill_limit;
1201 
1202 /*
1203  * Wired memory is a very limited resource and we can't let users exhaust it
1204  * and deadlock the entire system.  We enforce the following limits:
1205  *
1206  * vm_per_task_user_wire_limit
1207  *      how much memory can be user-wired in one user task
1208  *
1209  * vm_global_user_wire_limit (default: same as vm_per_task_user_wire_limit)
1210  *      how much memory can be user-wired in all user tasks
1211  *
1212  * These values are set to defaults based on the number of pages managed
1213  * by the VM system. They can be overriden via sysctls.
1214  * See kmem_set_user_wire_limits for details on the default values.
1215  *
1216  * Regardless of the amount of memory in the system, we never reserve
1217  * more than VM_NOT_USER_WIREABLE_MAX bytes as unlockable.
1218  */
1219 #define VM_NOT_USER_WIREABLE_MAX (32ULL*1024*1024*1024)     /* 32GB */
1220 
1221 extern vm_map_size_t   vm_per_task_user_wire_limit;
1222 extern vm_map_size_t   vm_global_user_wire_limit;
1223 extern uint64_t        vm_add_wire_count_over_global_limit;
1224 extern uint64_t        vm_add_wire_count_over_user_limit;
1225 
1226 /*
1227  *	Each pageable resident page falls into one of three lists:
1228  *
1229  *	free
1230  *		Available for allocation now.  The free list is
1231  *		actually an array of lists, one per color.
1232  *	inactive
1233  *		Not referenced in any map, but still has an
1234  *		object/offset-page mapping, and may be dirty.
1235  *		This is the list of pages that should be
1236  *		paged out next.  There are actually two
1237  *		inactive lists, one for pages brought in from
1238  *		disk or other backing store, and another
1239  *		for "zero-filled" pages.  See vm_pageout_scan()
1240  *		for the distinction and usage.
1241  *	active
1242  *		A list of pages which have been placed in
1243  *		at least one physical map.  This list is
1244  *		ordered, in LRU-like fashion.
1245  */
1246 
1247 
1248 #define VPL_LOCK_SPIN 1
1249 
1250 struct vpl {
1251 	vm_page_queue_head_t    vpl_queue;
1252 	unsigned int    vpl_count;
1253 	unsigned int    vpl_internal_count;
1254 	unsigned int    vpl_external_count;
1255 	lck_spin_t      vpl_lock;
1256 };
1257 
1258 extern
1259 struct vpl     * /* __zpercpu */ vm_page_local_q;
1260 extern
1261 unsigned int    vm_page_local_q_soft_limit;
1262 extern
1263 unsigned int    vm_page_local_q_hard_limit;
1264 extern
1265 vm_locks_array_t vm_page_locks;
1266 
1267 extern
1268 vm_page_queue_head_t    vm_page_queue_active;   /* active memory queue */
1269 extern
1270 vm_page_queue_head_t    vm_page_queue_inactive; /* inactive memory queue for normal pages */
1271 #if CONFIG_SECLUDED_MEMORY
1272 extern
1273 vm_page_queue_head_t    vm_page_queue_secluded; /* reclaimable pages secluded for Camera */
1274 #endif /* CONFIG_SECLUDED_MEMORY */
1275 extern
1276 vm_page_queue_head_t    vm_page_queue_cleaned; /* clean-queue inactive memory */
1277 extern
1278 vm_page_queue_head_t    vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
1279 extern
1280 vm_page_queue_head_t    vm_page_queue_throttled;        /* memory queue for throttled pageout pages */
1281 
1282 extern
1283 queue_head_t    vm_objects_wired;
1284 extern
1285 lck_spin_t      vm_objects_wired_lock;
1286 
1287 #define VM_PAGE_DONATE_DISABLED     0
1288 #define VM_PAGE_DONATE_ENABLED      1
1289 extern
1290 uint32_t        vm_page_donate_mode;
1291 extern
1292 bool        vm_page_donate_queue_ripe;
1293 
1294 #define VM_PAGE_BACKGROUND_TARGET_MAX   50000
1295 #define VM_PAGE_BG_DISABLED     0
1296 #define VM_PAGE_BG_ENABLED     1
1297 
1298 extern
1299 vm_page_queue_head_t    vm_page_queue_background;
1300 extern
1301 uint64_t        vm_page_background_promoted_count;
1302 extern
1303 uint32_t        vm_page_background_count;
1304 extern
1305 uint32_t        vm_page_background_target;
1306 extern
1307 uint32_t        vm_page_background_internal_count;
1308 extern
1309 uint32_t        vm_page_background_external_count;
1310 extern
1311 uint32_t        vm_page_background_mode;
1312 extern
1313 uint32_t        vm_page_background_exclude_external;
1314 
1315 extern
1316 vm_page_queue_head_t    vm_page_queue_donate;
1317 extern
1318 uint32_t        vm_page_donate_count;
1319 extern
1320 uint32_t        vm_page_donate_target_low;
1321 extern
1322 uint32_t        vm_page_donate_target_high;
1323 #define VM_PAGE_DONATE_TARGET_LOWWATER  (100)
1324 #define VM_PAGE_DONATE_TARGET_HIGHWATER ((unsigned int)(atop_64(max_mem) / 8))
1325 
1326 extern
1327 vm_offset_t     first_phys_addr;        /* physical address for first_page */
1328 extern
1329 vm_offset_t     last_phys_addr;         /* physical address for last_page */
1330 
1331 extern
1332 unsigned int    vm_page_free_count;     /* How many pages are free? (sum of all colors) */
1333 extern
1334 unsigned int    vm_page_active_count;   /* How many pages are active? */
1335 extern
1336 unsigned int    vm_page_inactive_count; /* How many pages are inactive? */
1337 extern
1338 unsigned int vm_page_kernelcache_count; /* How many pages are used for the kernelcache? */
1339 extern
1340 unsigned int vm_page_realtime_count;    /* How many pages are used by realtime threads? */
1341 #if CONFIG_SECLUDED_MEMORY
1342 extern
1343 unsigned int    vm_page_secluded_count; /* How many pages are secluded? */
1344 extern
1345 unsigned int    vm_page_secluded_count_free; /* how many of them are free? */
1346 extern
1347 unsigned int    vm_page_secluded_count_inuse; /* how many of them are in use? */
1348 /*
1349  * We keep filling the secluded pool with new eligible pages and
1350  * we can overshoot our target by a lot.
1351  * When there's memory pressure, vm_pageout_scan() will re-balance the queues,
1352  * pushing the extra secluded pages to the active or free queue.
1353  * Since these "over target" secluded pages are actually "available", jetsam
1354  * should consider them as such, so make them visible to jetsam via the
1355  * "vm_page_secluded_count_over_target" counter and update it whenever we
1356  * update vm_page_secluded_count or vm_page_secluded_target.
1357  */
1358 extern
1359 unsigned int    vm_page_secluded_count_over_target;
1360 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE()                     \
1361 	MACRO_BEGIN                                                     \
1362 	if (vm_page_secluded_count > vm_page_secluded_target) {         \
1363 	        vm_page_secluded_count_over_target =                    \
1364 	                (vm_page_secluded_count - vm_page_secluded_target); \
1365 	} else {                                                        \
1366 	        vm_page_secluded_count_over_target = 0;                 \
1367 	}                                                               \
1368 	MACRO_END
1369 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET() vm_page_secluded_count_over_target
1370 #else /* CONFIG_SECLUDED_MEMORY */
1371 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE() \
1372 	MACRO_BEGIN                                 \
1373 	MACRO_END
1374 #define VM_PAGE_SECLUDED_COUNT_OVER_TARGET() 0
1375 #endif /* CONFIG_SECLUDED_MEMORY */
1376 extern
1377 unsigned int    vm_page_cleaned_count; /* How many pages are in the clean queue? */
1378 extern
1379 unsigned int    vm_page_throttled_count;/* How many inactives are throttled */
1380 extern
1381 unsigned int    vm_page_speculative_count;      /* How many speculative pages are unclaimed? */
1382 extern unsigned int     vm_page_pageable_internal_count;
1383 extern unsigned int     vm_page_pageable_external_count;
1384 extern
1385 unsigned int    vm_page_xpmapped_external_count;        /* How many pages are mapped executable? */
1386 extern
1387 unsigned int    vm_page_external_count; /* How many pages are file-backed? */
1388 extern
1389 unsigned int    vm_page_internal_count; /* How many pages are anonymous? */
1390 extern
1391 unsigned int    vm_page_wire_count;             /* How many pages are wired? */
1392 extern
1393 unsigned int    vm_page_wire_count_initial;     /* How many pages wired at startup */
1394 extern
1395 unsigned int    vm_page_wire_count_on_boot;     /* even earlier than _initial */
1396 extern
1397 unsigned int    vm_page_free_target;    /* How many do we want free? */
1398 extern
1399 unsigned int    vm_page_free_min;       /* When to wakeup pageout */
1400 extern
1401 unsigned int    vm_page_throttle_limit; /* When to throttle new page creation */
1402 extern
1403 unsigned int    vm_page_inactive_target;/* How many do we want inactive? */
1404 #if CONFIG_SECLUDED_MEMORY
1405 extern
1406 unsigned int    vm_page_secluded_target;/* How many do we want secluded? */
1407 #endif /* CONFIG_SECLUDED_MEMORY */
1408 extern
1409 unsigned int    vm_page_anonymous_min;  /* When it's ok to pre-clean */
1410 extern
1411 unsigned int    vm_page_free_reserved;  /* How many pages reserved to do pageout */
1412 extern
1413 unsigned int    vm_page_gobble_count;
1414 extern
1415 unsigned int    vm_page_stolen_count;   /* Count of stolen pages not acccounted in zones */
1416 extern
1417 unsigned int    vm_page_kern_lpage_count;   /* Count of large pages used in early boot */
1418 
1419 
1420 #if DEVELOPMENT || DEBUG
1421 extern
1422 unsigned int    vm_page_speculative_used;
1423 #endif
1424 
1425 extern
1426 unsigned int    vm_page_purgeable_count;/* How many pages are purgeable now ? */
1427 extern
1428 unsigned int    vm_page_purgeable_wired_count;/* How many purgeable pages are wired now ? */
1429 extern
1430 uint64_t        vm_page_purged_count;   /* How many pages got purged so far ? */
1431 
1432 extern
1433 _Atomic unsigned int vm_page_swapped_count;
1434 /* How many pages are swapped to disk? */
1435 
1436 extern unsigned int     vm_page_free_wanted;
1437 /* how many threads are waiting for memory */
1438 
1439 extern unsigned int     vm_page_free_wanted_privileged;
1440 /* how many VM privileged threads are waiting for memory */
1441 #if CONFIG_SECLUDED_MEMORY
1442 extern unsigned int     vm_page_free_wanted_secluded;
1443 /* how many threads are waiting for secluded memory */
1444 #endif /* CONFIG_SECLUDED_MEMORY */
1445 
1446 extern const ppnum_t    vm_page_fictitious_addr;
1447 /* (fake) phys_addr of fictitious pages */
1448 
1449 extern const ppnum_t    vm_page_guard_addr;
1450 /* (fake) phys_addr of guard pages */
1451 
1452 
1453 extern boolean_t        vm_page_deactivate_hint;
1454 
1455 extern int              vm_compressor_mode;
1456 
1457 #if __x86_64__
1458 /*
1459  * Defaults to true, so highest memory is used first.
1460  */
1461 extern boolean_t        vm_himemory_mode;
1462 #else
1463 #define vm_himemory_mode TRUE
1464 #endif
1465 
1466 #if XNU_VM_HAS_LOPAGE
1467 extern bool             vm_lopage_needed;
1468 extern bool             vm_lopage_refill;
1469 extern uint32_t         vm_lopage_free_count;
1470 extern uint32_t         vm_lopage_free_limit;
1471 extern uint32_t         vm_lopage_lowater;
1472 #else
1473 #define vm_lopage_needed        0
1474 #define vm_lopage_free_count    0
1475 #endif
1476 extern uint64_t         max_valid_dma_address;
1477 extern ppnum_t          max_valid_low_ppnum;
1478 
1479 /*!
1480  * @abstract
1481  * Options that alter the behavior of vm_page_grab_options().
1482  *
1483  * @const VM_PAGE_GRAB_OPTIONS_NONE
1484  * The default value when no other specific options are required.
1485  *
1486  * @const VM_PAGE_GRAB_Q_LOCK_HELD
1487  * Denotes the caller is holding the vm page queues lock held.
1488  *
1489  * @const VM_PAGE_GRAB_NOPAGEWAIT
1490  * Denotes that the caller never wants @c vm_page_grab_options() to call
1491  * @c VM_PAGE_WAIT(), even if the thread is privileged.
1492  *
1493  * @const VM_PAGE_GRAB_SECLUDED
1494  * The caller is eligible to the secluded pool.
1495  */
1496 __enum_decl(vm_grab_options_t, uint32_t, {
1497 	VM_PAGE_GRAB_OPTIONS_NONE               = 0x00000000,
1498 	VM_PAGE_GRAB_Q_LOCK_HELD                = 0x00000001,
1499 	VM_PAGE_GRAB_NOPAGEWAIT                 = 0x00000002,
1500 
1501 	/* architecture/platform-specific flags */
1502 #if CONFIG_SECLUDED_MEMORY
1503 	VM_PAGE_GRAB_SECLUDED                   = 0x00010000,
1504 #endif /* CONFIG_SECLUDED_MEMORY */
1505 });
1506 
1507 /*
1508  * Prototypes for functions exported by this module.
1509  */
1510 
1511 extern void             vm_page_init_local_q(unsigned int num_cpus);
1512 
1513 extern vm_page_t        vm_page_create(ppnum_t phys_page, bool canonical, zalloc_flags_t flags);
1514 extern void             vm_page_create_canonical(ppnum_t pnum);
1515 
1516 extern void             vm_page_create_retired(ppnum_t pn);
1517 
1518 #if XNU_VM_HAS_DELAYED_PAGES
1519 extern void             vm_free_delayed_pages(void);
1520 #endif /* XNU_VM_HAS_DELAYED_PAGES */
1521 
1522 extern void             vm_pages_array_finalize(void);
1523 
1524 extern void             vm_page_reactivate_all_throttled(void);
1525 
1526 extern void vm_pressure_response(void);
1527 
1528 #define AVAILABLE_NON_COMPRESSED_MEMORY         (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count)
1529 #define AVAILABLE_MEMORY                        (AVAILABLE_NON_COMPRESSED_MEMORY + VM_PAGE_COMPRESSOR_COUNT)
1530 
1531 #if CONFIG_JETSAM
1532 
1533 #define VM_CHECK_MEMORYSTATUS \
1534 	memorystatus_update_available_page_count( \
1535 	        vm_page_pageable_external_count + \
1536 	        vm_page_free_count +              \
1537 	        VM_PAGE_SECLUDED_COUNT_OVER_TARGET() + \
1538 	        (VM_DYNAMIC_PAGING_ENABLED() ? 0 : vm_page_purgeable_count) \
1539 	        )
1540 
1541 #else /* CONFIG_JETSAM */
1542 
1543 #if !XNU_TARGET_OS_OSX
1544 
1545 #define VM_CHECK_MEMORYSTATUS do {} while(0)
1546 
1547 #else /* !XNU_TARGET_OS_OSX */
1548 
1549 #define VM_CHECK_MEMORYSTATUS memorystatus_update_available_page_count(AVAILABLE_NON_COMPRESSED_MEMORY)
1550 
1551 #endif /* !XNU_TARGET_OS_OSX */
1552 
1553 #endif /* CONFIG_JETSAM */
1554 
1555 #define vm_page_queue_lock (vm_page_locks.vm_page_queue_lock2)
1556 #define vm_page_queue_free_lock (vm_page_locks.vm_page_queue_free_lock2)
1557 
1558 #ifdef MACH_KERNEL_PRIVATE
1559 static inline void
vm_page_lock_queues(void)1560 vm_page_lock_queues(void)
1561 {
1562 	lck_mtx_lock(&vm_page_queue_lock);
1563 }
1564 
1565 static inline boolean_t
vm_page_trylock_queues(void)1566 vm_page_trylock_queues(void)
1567 {
1568 	boolean_t ret;
1569 	ret = lck_mtx_try_lock(&vm_page_queue_lock);
1570 	return ret;
1571 }
1572 
1573 static inline void
vm_page_unlock_queues(void)1574 vm_page_unlock_queues(void)
1575 {
1576 	lck_mtx_unlock(&vm_page_queue_lock);
1577 }
1578 
1579 static inline void
vm_page_lockspin_queues(void)1580 vm_page_lockspin_queues(void)
1581 {
1582 	lck_mtx_lock_spin(&vm_page_queue_lock);
1583 }
1584 
1585 static inline boolean_t
vm_page_trylockspin_queues(void)1586 vm_page_trylockspin_queues(void)
1587 {
1588 	boolean_t ret;
1589 	ret = lck_mtx_try_lock_spin(&vm_page_queue_lock);
1590 	return ret;
1591 }
1592 
1593 extern void kdp_vm_page_sleep_find_owner(
1594 	event64_t          wait_event,
1595 	thread_waitinfo_t *waitinfo);
1596 
1597 #endif /* MACH_KERNEL_PRIVATE */
1598 
1599 extern unsigned int vm_max_delayed_work_limit;
1600 
1601 #if CONFIG_SECLUDED_MEMORY
1602 extern uint64_t secluded_shutoff_trigger;
1603 extern uint64_t secluded_shutoff_headroom;
1604 extern void start_secluded_suppression(task_t);
1605 extern void stop_secluded_suppression(task_t);
1606 #endif /* CONFIG_SECLUDED_MEMORY */
1607 
1608 #endif  /* _VM_VM_PAGE_H_ */
1609