xref: /xnu-12377.61.12/osfmk/vm/vm_page_internal.h (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _VM_VM_PAGE_INTERNAL_H_
30 #define _VM_VM_PAGE_INTERNAL_H_
31 
32 #include <sys/cdefs.h>
33 #include <vm/vm_page.h>
34 
35 __BEGIN_DECLS
36 #ifdef XNU_KERNEL_PRIVATE
37 
38 PERCPU_DECL(unsigned int, start_color);
39 
40 extern struct vm_page_free_queue vm_page_queue_free;
41 
42 /*
43  * @var vm_page_deactivate_behind
44  * @brief Whether the system should proactively deactivate pages in large
45  *        sequential vm-objects/xfers to prevent file-cache/compressor
46  *        thrashing.
47  */
48 extern bool vm_page_deactivate_behind;
49 
50 /*
51  * @var vm_page_deactivate_behind_min_resident_ratio
52  * @brief The minimum size of an xfer/vm-object at which proactive deactivation
53  *        should be engaged to prevent file-cache/compressor thrashing.
54  */
55 extern uint32_t vm_page_deactivate_behind_min_resident_ratio;
56 
57 
58 /*!
59  * @abstract
60  * Applies a signed delta to a VM counter that is not meant to ever overflow.
61  *
62  * @discussion
63  * This is not meant for counters counting "events", but for counters that
64  * maintain how many objects there is in a given state (free pages, ...).
65  *
66  * @param counter         A pointer to a counter of any integer type.
67  * @param value           The signed delta to apply.
68  * @returns               The new value of the counter.
69  */
70 #define VM_COUNTER_DELTA(counter, value)  ({ \
71 	__auto_type __counter = (counter);                                      \
72 	release_assert(!os_add_overflow(*__counter, value, __counter));         \
73 	*__counter;                                                             \
74 })
75 #define VM_COUNTER_ATOMIC_DELTA(counter, value)  ({ \
76 	__auto_type __value = (value);                                          \
77 	__auto_type __orig  = os_atomic_add_orig(counter, __value, relaxed);    \
78 	release_assert(!os_add_overflow(__orig, __value, &__orig));             \
79 	__orig + __value;                                                       \
80 })
81 
82 
83 /*!
84  * @abstract
85  * Applies an unsigned increment to a VM counter that is not meant to ever
86  * overflow.
87  *
88  * @discussion
89  * This is not meant for counters counting "events", but for counters that
90  * maintain how many objects there is in a given state (free pages, ...).
91  *
92  * @param counter         A pointer to a counter of any integer type.
93  * @param value           The unsigned value to add.
94  * @returns               The new value of the counter.
95  */
96 #define VM_COUNTER_ADD(counter, value)  ({ \
97 	__auto_type __counter = (counter);                                      \
98 	release_assert(!os_add_overflow(*__counter, value, __counter));         \
99 	*__counter;                                                             \
100 })
101 #define VM_COUNTER_ATOMIC_ADD(counter, value)  ({ \
102 	__auto_type __value = (value);                                          \
103 	__auto_type __orig  = os_atomic_add_orig(counter, __value, relaxed);    \
104 	release_assert(!os_add_overflow(__orig, __value, &__orig));             \
105 	__orig + __value;                                                       \
106 })
107 
108 /*!
109  * @abstract
110  * Applies an unsigned decrement to a VM counter that is not meant to ever
111  * overflow.
112  *
113  * @discussion
114  * This is not meant for counters counting "events", but for counters that
115  * maintain how many objects there is in a given state (free pages, ...).
116  *
117  * @param counter         A pointer to a counter of any integer type.
118  * @param value           The unsigned value to substract.
119  * @returns               The new value of the counter.
120  */
121 #define VM_COUNTER_SUB(counter, value)  ({ \
122 	__auto_type __counter = (counter);                                      \
123 	release_assert(!os_sub_overflow(*__counter, value, __counter));         \
124 	*__counter;                                                             \
125 })
126 #define VM_COUNTER_ATOMIC_SUB(counter, value)  ({ \
127 	__auto_type __value = (value);                                          \
128 	__auto_type __orig  = os_atomic_sub_orig(counter, __value, relaxed);    \
129 	release_assert(!os_sub_overflow(__orig, __value, &__orig));             \
130 	__orig - __value;                                                       \
131 })
132 
133 
134 /*!
135  * @abstract
136  * Convenience wrapper to increment a VM counter.
137  *
138  * @discussion
139  * This is not meant for counters counting "events", but for counters that
140  * maintain how many objects there is in a given state (free pages, ...).
141  *
142  * @param counter         A pointer to a counter of any integer type.
143  * @returns               The new value of the counter.
144  */
145 #define VM_COUNTER_INC(counter)         VM_COUNTER_ADD(counter, 1)
146 #define VM_COUNTER_ATOMIC_INC(counter)  VM_COUNTER_ATOMIC_ADD(counter, 1)
147 
148 /*!
149  * @abstract
150  * Convenience wrapper to decrement a VM counter.
151  *
152  * @discussion
153  * This is not meant for counters counting "events", but for counters that
154  * maintain how many objects there is in a given state (free pages, ...).
155  *
156  * @param counter         A pointer to a counter of any integer type.
157  * @returns               The new value of the counter.
158  */
159 #define VM_COUNTER_DEC(counter)         VM_COUNTER_SUB(counter, 1)
160 #define VM_COUNTER_ATOMIC_DEC(counter)  VM_COUNTER_ATOMIC_SUB(counter, 1)
161 
162 static inline int
VMP_CS_FOR_OFFSET(vm_map_offset_t fault_phys_offset)163 VMP_CS_FOR_OFFSET(
164 	vm_map_offset_t fault_phys_offset)
165 {
166 	assertf(fault_phys_offset < PAGE_SIZE &&
167 	    !(fault_phys_offset & FOURK_PAGE_MASK),
168 	    "offset 0x%llx\n", (uint64_t)fault_phys_offset);
169 	return 1 << (fault_phys_offset >> FOURK_PAGE_SHIFT);
170 }
171 static inline bool
VMP_CS_VALIDATED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)172 VMP_CS_VALIDATED(
173 	vm_page_t p,
174 	vm_map_size_t fault_page_size,
175 	vm_map_offset_t fault_phys_offset)
176 {
177 	assertf(fault_page_size <= PAGE_SIZE,
178 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
179 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
180 	if (fault_page_size == PAGE_SIZE) {
181 		return p->vmp_cs_validated == VMP_CS_ALL_TRUE;
182 	}
183 	return p->vmp_cs_validated & VMP_CS_FOR_OFFSET(fault_phys_offset);
184 }
185 static inline bool
VMP_CS_TAINTED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)186 VMP_CS_TAINTED(
187 	vm_page_t p,
188 	vm_map_size_t fault_page_size,
189 	vm_map_offset_t fault_phys_offset)
190 {
191 	assertf(fault_page_size <= PAGE_SIZE,
192 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
193 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
194 	if (fault_page_size == PAGE_SIZE) {
195 		return p->vmp_cs_tainted != VMP_CS_ALL_FALSE;
196 	}
197 	return p->vmp_cs_tainted & VMP_CS_FOR_OFFSET(fault_phys_offset);
198 }
199 static inline bool
VMP_CS_NX(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)200 VMP_CS_NX(
201 	vm_page_t p,
202 	vm_map_size_t fault_page_size,
203 	vm_map_offset_t fault_phys_offset)
204 {
205 	assertf(fault_page_size <= PAGE_SIZE,
206 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
207 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
208 	if (fault_page_size == PAGE_SIZE) {
209 		return p->vmp_cs_nx != VMP_CS_ALL_FALSE;
210 	}
211 	return p->vmp_cs_nx & VMP_CS_FOR_OFFSET(fault_phys_offset);
212 }
213 static inline void
VMP_CS_SET_VALIDATED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)214 VMP_CS_SET_VALIDATED(
215 	vm_page_t p,
216 	vm_map_size_t fault_page_size,
217 	vm_map_offset_t fault_phys_offset,
218 	boolean_t value)
219 {
220 	assertf(fault_page_size <= PAGE_SIZE,
221 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
222 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
223 	if (value) {
224 		if (fault_page_size == PAGE_SIZE) {
225 			p->vmp_cs_validated = VMP_CS_ALL_TRUE;
226 		}
227 		p->vmp_cs_validated |= VMP_CS_FOR_OFFSET(fault_phys_offset);
228 	} else {
229 		if (fault_page_size == PAGE_SIZE) {
230 			p->vmp_cs_validated = VMP_CS_ALL_FALSE;
231 		}
232 		p->vmp_cs_validated &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
233 	}
234 }
235 static inline void
VMP_CS_SET_TAINTED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)236 VMP_CS_SET_TAINTED(
237 	vm_page_t p,
238 	vm_map_size_t fault_page_size,
239 	vm_map_offset_t fault_phys_offset,
240 	boolean_t value)
241 {
242 	assertf(fault_page_size <= PAGE_SIZE,
243 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
244 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
245 	if (value) {
246 		if (fault_page_size == PAGE_SIZE) {
247 			p->vmp_cs_tainted = VMP_CS_ALL_TRUE;
248 		}
249 		p->vmp_cs_tainted |= VMP_CS_FOR_OFFSET(fault_phys_offset);
250 	} else {
251 		if (fault_page_size == PAGE_SIZE) {
252 			p->vmp_cs_tainted = VMP_CS_ALL_FALSE;
253 		}
254 		p->vmp_cs_tainted &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
255 	}
256 }
257 static inline void
VMP_CS_SET_NX(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)258 VMP_CS_SET_NX(
259 	vm_page_t p,
260 	vm_map_size_t fault_page_size,
261 	vm_map_offset_t fault_phys_offset,
262 	boolean_t value)
263 {
264 	assertf(fault_page_size <= PAGE_SIZE,
265 	    "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
266 	    (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
267 	if (value) {
268 		if (fault_page_size == PAGE_SIZE) {
269 			p->vmp_cs_nx = VMP_CS_ALL_TRUE;
270 		}
271 		p->vmp_cs_nx |= VMP_CS_FOR_OFFSET(fault_phys_offset);
272 	} else {
273 		if (fault_page_size == PAGE_SIZE) {
274 			p->vmp_cs_nx = VMP_CS_ALL_FALSE;
275 		}
276 		p->vmp_cs_nx &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
277 	}
278 }
279 
280 
281 #if defined(__LP64__)
282 static __inline__ void
vm_page_enqueue_tail(vm_page_queue_t que,vm_page_queue_entry_t elt)283 vm_page_enqueue_tail(
284 	vm_page_queue_t         que,
285 	vm_page_queue_entry_t   elt)
286 {
287 	vm_page_queue_entry_t   old_tail;
288 
289 	old_tail = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(que->prev);
290 	elt->next = VM_PAGE_PACK_PTR(que);
291 	elt->prev = que->prev;
292 	que->prev = old_tail->next = VM_PAGE_PACK_PTR(elt);
293 }
294 
295 static __inline__ void
vm_page_remque(vm_page_queue_entry_t elt)296 vm_page_remque(
297 	vm_page_queue_entry_t elt)
298 {
299 	vm_page_queue_entry_t next;
300 	vm_page_queue_entry_t prev;
301 	vm_page_packed_t      next_pck = elt->next;
302 	vm_page_packed_t      prev_pck = elt->prev;
303 
304 	next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(next_pck);
305 
306 	/* next may equal prev (and the queue head) if elt was the only element */
307 	prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(prev_pck);
308 
309 	next->prev = prev_pck;
310 	prev->next = next_pck;
311 
312 	elt->next = 0;
313 	elt->prev = 0;
314 }
315 
316 #if defined(__x86_64__)
317 /*
318  * Insert a new page into a free queue and clump pages within the same 16K boundary together
319  */
320 static inline void
vm_page_queue_enter_clump(vm_page_queue_t head,vm_page_t elt)321 vm_page_queue_enter_clump(
322 	vm_page_queue_t       head,
323 	vm_page_t             elt)
324 {
325 	vm_page_queue_entry_t first = NULL;    /* first page in the clump */
326 	vm_page_queue_entry_t last = NULL;     /* last page in the clump */
327 	vm_page_queue_entry_t prev = NULL;
328 	vm_page_queue_entry_t next;
329 	uint_t                n_free = 1;
330 	extern unsigned int   vm_clump_size, vm_clump_promote_threshold;
331 	extern unsigned long  vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
332 
333 	/*
334 	 * If elt is part of the vm_pages[] array, find its neighboring buddies in the array.
335 	 */
336 	if (vm_page_in_array(elt)) {
337 		vm_page_t p;
338 		uint_t    i;
339 		uint_t    n;
340 		ppnum_t   clump_num;
341 
342 		first = last = (vm_page_queue_entry_t)elt;
343 		clump_num = VM_PAGE_GET_CLUMP(elt);
344 		n = VM_PAGE_GET_PHYS_PAGE(elt) & vm_clump_mask;
345 
346 		/*
347 		 * Check for preceeding vm_pages[] entries in the same chunk
348 		 */
349 		for (i = 0, p = elt - 1; i < n && vm_page_get(0) <= p; i++, p--) {
350 			if (p->vmp_q_state == VM_PAGE_ON_FREE_Q && clump_num == VM_PAGE_GET_CLUMP(p)) {
351 				if (prev == NULL) {
352 					prev = (vm_page_queue_entry_t)p;
353 				}
354 				first = (vm_page_queue_entry_t)p;
355 				n_free++;
356 			}
357 		}
358 
359 		/*
360 		 * Check the following vm_pages[] entries in the same chunk
361 		 */
362 		for (i = n + 1, p = elt + 1; i < vm_clump_size && p < vm_page_get(vm_pages_count); i++, p++) {
363 			if (p->vmp_q_state == VM_PAGE_ON_FREE_Q && clump_num == VM_PAGE_GET_CLUMP(p)) {
364 				if (last == (vm_page_queue_entry_t)elt) {               /* first one only */
365 					__DEBUG_CHECK_BUDDIES(prev, p, vmp_pageq);
366 				}
367 
368 				if (prev == NULL) {
369 					prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev);
370 				}
371 				last = (vm_page_queue_entry_t)p;
372 				n_free++;
373 			}
374 		}
375 		__DEBUG_STAT_INCREMENT_INRANGE;
376 	}
377 
378 	/* if elt is not part of vm_pages or if 1st page in clump, insert at tail */
379 	if (prev == NULL) {
380 		prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(head->prev);
381 	}
382 
383 	/* insert the element */
384 	next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(prev->next);
385 	elt->vmp_pageq.next = prev->next;
386 	elt->vmp_pageq.prev = next->prev;
387 	prev->next = next->prev = VM_PAGE_PACK_PTR(elt);
388 	__DEBUG_STAT_INCREMENT_INSERTS;
389 
390 	/*
391 	 * Check if clump needs to be promoted to head.
392 	 */
393 	if (n_free >= vm_clump_promote_threshold && n_free > 1) {
394 		vm_page_queue_entry_t first_prev;
395 
396 		first_prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(first->prev);
397 
398 		/* If not at head already */
399 		if (first_prev != head) {
400 			vm_page_queue_entry_t last_next;
401 			vm_page_queue_entry_t head_next;
402 
403 			last_next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(last->next);
404 
405 			/* verify that the links within the clump are consistent */
406 			__DEBUG_VERIFY_LINKS(first, n_free, last_next);
407 
408 			/* promote clump to head */
409 			first_prev->next = last->next;
410 			last_next->prev = first->prev;
411 			first->prev = VM_PAGE_PACK_PTR(head);
412 			last->next = head->next;
413 
414 			head_next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(head->next);
415 			head_next->prev = VM_PAGE_PACK_PTR(last);
416 			head->next = VM_PAGE_PACK_PTR(first);
417 			__DEBUG_STAT_INCREMENT_PROMOTES(n_free);
418 		}
419 	}
420 }
421 #endif /* __x86_64__ */
422 #endif /* __LP64__ */
423 
424 
425 /*!
426  * @abstract
427  * The number of pages to try to free/process at once while under
428  * the free page queue lock.
429  *
430  * @discussion
431  * The value is chosen to be a trade off between:
432  * - creating a lot of contention on the free page queue lock
433  *   taking and dropping it all the time,
434  * - avoiding to hold the free page queue lock for too long periods of time.
435  */
436 #define VMP_FREE_BATCH_SIZE     64
437 
438 /*!
439  * @function vm_page_free_queue_init()
440  *
441  * @abstract
442  * Initialize a free queue.
443  *
444  * @param free_queue    The free queue to initialize.
445  */
446 extern void vm_page_free_queue_init(
447 	vm_page_free_queue_t    free_queue);
448 
449 /*!
450  * @function vm_page_free_queue_enter()
451  *
452  * @abstract
453  * Add a page to a free queue.
454  *
455  * @discussion
456  * Internally, the free queue is not synchronized, so any locking must be done
457  * outside of this function.
458  *
459  * The page queue state will be set to the appropriate free queue state for the
460  * memory class (typically VM_PAGE_ON_FREE_Q).
461  *
462  * Note that the callers are responsible for making sure that this operation is
463  * a valid transition.  This is a helper to abstract handling of the several
464  * free page queues on the system which sits above vm_page_queue_enter() and
465  * maintains counters as well, but is otherwise oblivious to the page state
466  * machine.
467  *
468  * Most clients should use a wrapper around this function (typically
469  * vm_page_release() or vm_page_free_list()) and not call it directly.
470  *
471  * @param mem_class     The memory class to free pages to.
472  * @param page          The page to free.
473  * @param pnum          the physical address of @c page
474  */
475 extern void vm_page_free_queue_enter(
476 	vm_memory_class_t       mem_class,
477 	vm_page_t               page,
478 	ppnum_t                 pnum);
479 
480 /*!
481  * @function vm_page_free_queue_remove()
482  *
483  * @abstract
484  * Removes an arbitrary free page from the given free queue.
485  *
486  * @discussion
487  * The given page must be in the given free queue, or state may be corrupted.
488  *
489  * Internally, the free queue is not synchronized, so any locking must be done
490  * outside of this function.
491  *
492  * Note that the callers are responsible for making sure that the requested
493  * queue state corresponds to a valid transition. This is a helper to abstract
494  * handling of the several free page queues on the system which sits above
495  * vm_page_queue_remove() and maintains counters as well, but is otherwise
496  * oblivious to the page state machine.
497  *
498  * Most clients should use a wrapper around this function (typically
499  * vm_page_free_queue_steal()) and not call it directly.
500  *
501  * @param class         The memory class corresponding to the free queue
502  *                      @c page is enqueued on.
503  * @param mem           The page to remove.
504  * @param pnum          The physical address of @c page
505  * @param q_state       The desired queue state for the page.
506  */
507 __attribute__((always_inline))
508 extern void vm_page_free_queue_remove(
509 	vm_memory_class_t       class,
510 	vm_page_t               mem,
511 	ppnum_t                 pnum,
512 	vm_page_q_state_t       q_state);
513 
514 /*!
515  * @function vm_page_free_queue_grab()
516  *
517  * @abstract
518  * Gets pages from the free queue.
519  *
520  * @discussion
521  * Clients cannot get more pages than the free queue has; attempting to do so
522  * will cause a panic.
523  *
524  * Internally, the free queue is not synchronized, so any locking must be done
525  * outside of this function.
526  *
527  * Note that the callers are responsible for making sure that the requested
528  * queue state corresponds to a valid transition. This is a helper to abstract
529  * handling of the several free page queues on the system which sits above
530  * vm_page_queue_remove() and maintains counters as well, but is otherwise
531  * oblivious to the page state machine.
532  *
533  * Most clients should use a wrapper (typically vm_page_grab_options())
534  * around this function and not call it directly.
535  *
536  * @param options       The grab options.
537  * @param mem_class     The memory class to allocate from.
538  * @param num_pages     The number of pages to grab.
539  * @param q_state       The vmp_q_state to set on the page.
540  *
541  * @returns
542  * A list of pages; the list will be num_pages long.
543  */
544 extern vm_page_list_t vm_page_free_queue_grab(
545 	vm_grab_options_t       options,
546 	vm_memory_class_t       mem_class,
547 	unsigned int            num_pages,
548 	vm_page_q_state_t       q_state);
549 
550 /*!
551  * @abstract
552  * Perform a wakeup for a free page queue wait event.
553  *
554  * @param event         the free page queue event to wake up
555  * @param n             the number of threads to try to wake up
556  *                      (UINT32_MAX means all).
557  */
558 extern void vm_page_free_wakeup(event_t event, uint32_t n);
559 
560 
561 extern  void    vm_page_assign_special_state(vm_page_t mem, vm_page_specialq_t mode);
562 extern  void    vm_page_update_special_state(vm_page_t mem);
563 extern  void    vm_page_add_to_specialq(vm_page_t mem, boolean_t first);
564 extern  void    vm_page_remove_from_specialq(vm_page_t mem);
565 
566 
567 /*
568  * Prototypes for functions exported by this module.
569  */
570 extern void             vm_page_bootstrap(
571 	vm_offset_t     *startp,
572 	vm_offset_t     *endp);
573 
574 extern vm_page_t        kdp_vm_page_lookup(
575 	vm_object_t             object,
576 	vm_object_offset_t      offset);
577 
578 extern vm_page_t        vm_page_lookup(
579 	vm_object_t             object,
580 	vm_object_offset_t      offset);
581 
582 /*!
583  * @abstract
584  * Creates a fictitious page.
585  *
586  * @discussion
587  * This function never returns VM_PAGE_NULL;
588  *
589  * Pages made by this function have the @c vm_page_fictitious_addr
590  * fake physical address.
591  */
592 extern vm_page_t        vm_page_create_fictitious(void);
593 
594 /*!
595  * @abstract
596  * Returns a kernel guard page (used by @c kmem_alloc_guard()).
597  *
598  * @discussion
599  * Pages returned by this function have the @c vm_page_guard_addr
600  * fake physical address.
601  *
602  * @param canwait       Whether the caller can wait, if true,
603  *                      this function never returns VM_PAGE_NULL.
604  */
605 extern vm_page_t        vm_page_create_guard(bool canwait);
606 
607 /*!
608  * @abstract
609  * Create a private VM page.
610  *
611  * @discussion
612  * These pages allow for non canonical references to the same physical page.
613  * Its @c VM_PAGE_GET_PHYS_PAGE() will be @c base_page.
614  *
615  * Such pages must not be released back to the free queues directly,
616  * @c vm_page_reset_private() must be called first.
617  *
618  * This function never returns VM_PAGE_NULL
619  *
620  * @param base_page     The physical page this private page represents.
621  */
622 extern vm_page_t        vm_page_create_private(ppnum_t base_page);
623 
624 /*!
625  * @abstract
626  * Returns whether this is the canonical page for a regular managed kernel page.
627  *
628  * @discussion
629  * A kernel page is the canonical @c vm_page_t for a given pmap managed physical
630  * page.  These pages are made at startup, or when @c ml_static_mfree() is
631  * called, and are never freed.
632  *
633  * Its @c VM_PAGE_GET_PHYS_PAGE() will be a valid @c ppnum_t value.
634  *
635  * A page can either be:
636  * - a canonical page (@c vm_page_is_canonical())
637  * - a fictitious page (@c vm_page_is_fictitious()),
638  *   of which guard pages are a special case (@c vm_page_is_guard())
639  * - a private page (@c vm_page_is_private())
640  */
641 extern bool             vm_page_is_canonical(const struct vm_page *m) __pure2;
642 
643 /*!
644  * @abstract
645  * Returns whether this page is fictitious (made by @c vm_page_create_guard()
646  * or by @c vm_page_create_fictitious()).
647  *
648  * @discussion
649  * A page can either be:
650  * - a canonical page (@c vm_page_is_canonical())
651  * - a fictitious page (@c vm_page_is_fictitious()),
652  *   of which guard pages are a special case (@c vm_page_is_guard())
653  * - a private page (@c vm_page_is_private())
654  */
655 extern bool             vm_page_is_fictitious(const struct vm_page *m);
656 
657 /*!
658  * @abstract
659  * Returns whether this is a kernel guard page that was made by
660  * @c vm_page_create_guard().
661  */
662 extern bool             vm_page_is_guard(const struct vm_page *m) __pure2;
663 
664 /*!
665  * @abstract
666  * Returns whether a page is private (made by @c vm_page_create_private(),
667  * or converted from a fictitious page by @c vm_page_make_private()).
668  *
669  * @discussion
670  * A page can either be:
671  * - a canonical page (@c vm_page_is_canonical())
672  * - a fictitious page (@c vm_page_is_fictitious()),
673  *   of which guard pages are a special case (@c vm_page_is_guard())
674  * - a private page (@c vm_page_is_private())
675  */
676 extern bool             vm_page_is_private(const struct vm_page *m);
677 
678 /*!
679  * @abstract
680  * Converts a fictitious page made by @c vm_page_create_fictitious()
681  * into a private page.
682  *
683  * @param m             The fictitious page to convert into a private one.
684  * @param base_page     The physical page that this page will represent
685  *                      (@c vm_page_create_private()).
686  */
687 extern void             vm_page_make_private(vm_page_t m, ppnum_t base_page);
688 
689 /*!
690  * @abstract
691  * Converts a private page into a fictitious page (as if made by
692  * @c vm_page_create_fictitious()).
693  *
694  * @discussion
695  * Private pages can't be released with @c vm_page_release()
696  * without being turned into a fictitious page first using this function.
697  */
698 extern void             vm_page_reset_private(vm_page_t m);
699 
700 #if HAS_MTE
701 
702 /*!
703  * @abstract
704  * Returns whether the specified physical page number is actual tag storage.
705  *
706  * @discussion
707  * This returns fails for pages in the tag storage range that is recursive or
708  * unmanaged, unlike pmap_in_tag_storage_range().
709  *
710  * Note that it might return "true" for pages that the MTE Info data structure
711  * considers covering "unmanaged" memory.
712  *
713  * @param page          A canonical VM page.
714  * @param pnum          The page physical number for @c page.
715  */
716 extern bool vm_page_is_tag_storage_pnum(vm_page_t page, ppnum_t pnum) __pure2;
717 
718 static inline bool
vm_page_is_tag_storage(vm_page_t page)719 vm_page_is_tag_storage(vm_page_t page)
720 {
721 	return vm_page_is_tag_storage_pnum(page, VM_PAGE_GET_PHYS_PAGE(page));
722 }
723 
724 #endif /* HAS_MTE */
725 
726 extern bool             vm_pool_low(void);
727 
728 /*!
729  * @abstract
730  * Grabs a page.
731  *
732  * @discussion
733  * Allocate a page by looking at:
734  * - per-cpu queues,
735  * - global free queues,
736  * - magical queues (delayed, secluded, ...)
737  *
738  * This function always succeeds for VM privileged threads,
739  * unless VM_PAGE_GRAB_NOPAGEWAIT is passed.
740  *
741  * This function might return VM_PAGE_NULL if there are no pages left.
742  */
743 extern vm_page_t        vm_page_grab_options(vm_grab_options_t options);
744 
745 static inline vm_page_t
vm_page_grab(void)746 vm_page_grab(void)
747 {
748 	return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE);
749 }
750 
751 /*!
752  * @abstract
753  * Returns the proper grab options for the specified object.
754  */
755 extern vm_grab_options_t vm_page_grab_options_for_object(vm_object_t object);
756 
757 #if XNU_VM_HAS_LOPAGE
758 extern vm_page_t vm_page_grablo(vm_grab_options_t options);
759 #else
760 static inline vm_page_t
vm_page_grablo(vm_grab_options_t options)761 vm_page_grablo(vm_grab_options_t options)
762 {
763 	return vm_page_grab_options(options);
764 }
765 #endif
766 
767 
768 __options_closed_decl(vmp_release_options_t, uint32_t, {
769 	VMP_RELEASE_NONE                = 0x00,
770 	VMP_RELEASE_Q_LOCKED            = 0x01,
771 	VMP_RELEASE_SKIP_FREE_CHECK     = 0x02,
772 	VMP_RELEASE_HIBERNATE           = 0x04,
773 	VMP_RELEASE_STARTUP             = 0x08,
774 });
775 
776 extern void vm_page_release(
777 	vm_page_t               page,
778 	vmp_release_options_t   options);
779 
780 extern boolean_t        vm_page_wait(
781 	int             interruptible);
782 
783 extern void             vm_page_init(
784 	vm_page_t       page,
785 	ppnum_t         phys_page);
786 
787 extern void             vm_page_free(
788 	vm_page_t       page);
789 
790 extern void             vm_page_free_unlocked(
791 	vm_page_t       page,
792 	boolean_t       remove_from_hash);
793 
794 
795 extern void             vm_page_balance_inactive(
796 	int             max_to_move);
797 
798 extern void             vm_page_activate(
799 	vm_page_t       page);
800 
801 extern void             vm_page_deactivate(
802 	vm_page_t       page);
803 
804 extern void             vm_page_deactivate_internal(
805 	vm_page_t       page,
806 	boolean_t       clear_hw_reference);
807 
808 extern void             vm_page_enqueue_cleaned(vm_page_t page);
809 
810 extern void             vm_page_lru(
811 	vm_page_t       page);
812 
813 extern void             vm_page_speculate(
814 	vm_page_t       page,
815 	boolean_t       new);
816 
817 extern void             vm_page_speculate_ageit(
818 	struct vm_speculative_age_q *aq);
819 
820 extern void             vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks);
821 
822 extern void             vm_page_rename(
823 	vm_page_t               page,
824 	vm_object_t             new_object,
825 	vm_object_offset_t      new_offset);
826 
827 extern void             vm_page_insert(
828 	vm_page_t               page,
829 	vm_object_t             object,
830 	vm_object_offset_t      offset);
831 
832 extern void             vm_page_insert_wired(
833 	vm_page_t               page,
834 	vm_object_t             object,
835 	vm_object_offset_t      offset,
836 	vm_tag_t                tag);
837 
838 
839 extern void             vm_page_insert_internal(
840 	vm_page_t               page,
841 	vm_object_t             object,
842 	vm_object_offset_t      offset,
843 	vm_tag_t                tag,
844 	boolean_t               queues_lock_held,
845 	boolean_t               insert_in_hash,
846 	boolean_t               batch_pmap_op,
847 	boolean_t               delayed_accounting,
848 	uint64_t                *delayed_ledger_update);
849 
850 extern void             vm_page_replace(
851 	vm_page_t               mem,
852 	vm_object_t             object,
853 	vm_object_offset_t      offset);
854 
855 extern void             vm_page_remove(
856 	vm_page_t       page,
857 	boolean_t       remove_from_hash);
858 
859 #if HAS_MTE
860 extern void             vm_page_zero_fill(
861 	vm_page_t       page,
862 	bool            zero_tags);
863 #else /* HAS_MTE */
864 extern void             vm_page_zero_fill(
865 	vm_page_t       page);
866 #endif /* HAS_MTE */
867 
868 extern void             vm_page_part_zero_fill(
869 	vm_page_t       m,
870 	vm_offset_t     m_pa,
871 	vm_size_t       len);
872 
873 extern void             vm_page_copy(
874 	vm_page_t       src_page,
875 	vm_page_t       dest_page);
876 
877 extern void             vm_page_part_copy(
878 	vm_page_t       src_m,
879 	vm_offset_t     src_pa,
880 	vm_page_t       dst_m,
881 	vm_offset_t     dst_pa,
882 	vm_size_t       len);
883 
884 extern void             vm_page_wire(
885 	vm_page_t       page,
886 	vm_tag_t        tag,
887 	boolean_t       check_memorystatus);
888 
889 extern void             vm_page_unwire(
890 	vm_page_t       page,
891 	boolean_t       queueit);
892 
893 extern void             vm_set_page_size(void);
894 
895 extern void             vm_page_validate_cs(
896 	vm_page_t       page,
897 	vm_map_size_t   fault_page_size,
898 	vm_map_offset_t fault_phys_offset);
899 
900 extern void             vm_page_validate_cs_mapped(
901 	vm_page_t       page,
902 	vm_map_size_t   fault_page_size,
903 	vm_map_offset_t fault_phys_offset,
904 	const void      *kaddr);
905 extern void             vm_page_validate_cs_mapped_slow(
906 	vm_page_t       page,
907 	const void      *kaddr);
908 extern void             vm_page_validate_cs_mapped_chunk(
909 	vm_page_t       page,
910 	const void      *kaddr,
911 	vm_offset_t     chunk_offset,
912 	vm_size_t       chunk_size,
913 	boolean_t       *validated,
914 	unsigned        *tainted);
915 
916 extern void             vm_page_free_prepare_queues(
917 	vm_page_t       page);
918 
919 extern void             vm_page_free_prepare_object(
920 	vm_page_t       page,
921 	boolean_t       remove_from_hash);
922 
923 extern wait_result_t    vm_page_sleep(
924 	vm_object_t        object,
925 	vm_page_t          m,
926 	wait_interrupt_t   interruptible,
927 	lck_sleep_action_t action);
928 
929 extern void             vm_page_wakeup(
930 	vm_object_t        object,
931 	vm_page_t          m);
932 
933 extern void             vm_page_wakeup_done(
934 	vm_object_t        object,
935 	vm_page_t          m);
936 
937 typedef struct page_worker_token {
938 	thread_pri_floor_t pwt_floor_token;
939 	bool pwt_did_register_inheritor;
940 } page_worker_token_t;
941 
942 extern void             vm_page_wakeup_done_with_inheritor(
943 	vm_object_t        object,
944 	vm_page_t          m,
945 	page_worker_token_t *token);
946 
947 extern void             page_worker_register_worker(
948 	event_t            event,
949 	page_worker_token_t *out_token);
950 
951 extern boolean_t        vm_page_is_relocatable(
952 	vm_page_t            m,
953 	vm_relocate_reason_t reloc_reason);
954 
955 extern kern_return_t    vm_page_relocate(
956 	vm_page_t            m1,
957 	int *                compressed_pages,
958 	vm_relocate_reason_t reason,
959 	vm_page_t*           new_page);
960 
961 extern bool             vm_page_is_restricted(
962 	vm_page_t mem);
963 
964 /*
965  * Functions implemented as macros. m->vmp_wanted and m->vmp_busy are
966  * protected by the object lock.
967  */
968 
969 #if !XNU_TARGET_OS_OSX
970 #define SET_PAGE_DIRTY(m, set_pmap_modified)                            \
971 	        MACRO_BEGIN                                             \
972 	        vm_page_t __page__ = (m);                               \
973 	        if (__page__->vmp_pmapped == TRUE &&                    \
974 	            __page__->vmp_wpmapped == TRUE &&                   \
975 	            __page__->vmp_dirty == FALSE &&                     \
976 	            (set_pmap_modified)) {                              \
977 	                pmap_set_modify(VM_PAGE_GET_PHYS_PAGE(__page__)); \
978 	        }                                                       \
979 	        __page__->vmp_dirty = TRUE;                             \
980 	        MACRO_END
981 #else /* !XNU_TARGET_OS_OSX */
982 #define SET_PAGE_DIRTY(m, set_pmap_modified)                            \
983 	        MACRO_BEGIN                                             \
984 	        vm_page_t __page__ = (m);                               \
985 	        __page__->vmp_dirty = TRUE;                             \
986 	        MACRO_END
987 #endif /* !XNU_TARGET_OS_OSX */
988 
989 #define VM_PAGE_FREE(p)                         \
990 	        MACRO_BEGIN                     \
991 	        vm_page_free_unlocked(p, TRUE); \
992 	        MACRO_END
993 
994 
995 #define VM_PAGE_WAIT()          ((void)vm_page_wait(THREAD_UNINT))
996 
997 static inline void
vm_free_page_lock(void)998 vm_free_page_lock(void)
999 {
1000 	lck_mtx_lock(&vm_page_queue_free_lock);
1001 }
1002 
1003 static inline void
vm_free_page_lock_spin(void)1004 vm_free_page_lock_spin(void)
1005 {
1006 	lck_mtx_lock_spin(&vm_page_queue_free_lock);
1007 }
1008 
1009 static inline void
vm_free_page_lock_convert(void)1010 vm_free_page_lock_convert(void)
1011 {
1012 	lck_mtx_convert_spin(&vm_page_queue_free_lock);
1013 }
1014 
1015 static inline void
vm_free_page_unlock(void)1016 vm_free_page_unlock(void)
1017 {
1018 	lck_mtx_unlock(&vm_page_queue_free_lock);
1019 }
1020 
1021 
1022 #define vm_page_lockconvert_queues()    lck_mtx_convert_spin(&vm_page_queue_lock)
1023 
1024 
1025 #ifdef  VPL_LOCK_SPIN
1026 extern lck_grp_t vm_page_lck_grp_local;
1027 
1028 #define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_spin_init(&vlq->vpl_lock, vpl_grp, vpl_attr)
1029 #define VPL_LOCK(vpl) lck_spin_lock_grp(vpl, &vm_page_lck_grp_local)
1030 #define VPL_UNLOCK(vpl) lck_spin_unlock(vpl)
1031 #else
1032 #define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_mtx_init(&vlq->vpl_lock, vpl_grp, vpl_attr)
1033 #define VPL_LOCK(vpl) lck_mtx_lock_spin(vpl)
1034 #define VPL_UNLOCK(vpl) lck_mtx_unlock(vpl)
1035 #endif
1036 
1037 #if DEVELOPMENT || DEBUG
1038 #define VM_PAGE_SPECULATIVE_USED_ADD()                          \
1039 	MACRO_BEGIN                                             \
1040 	OSAddAtomic(1, &vm_page_speculative_used);              \
1041 	MACRO_END
1042 #else
1043 #define VM_PAGE_SPECULATIVE_USED_ADD()
1044 #endif
1045 
1046 #define VM_PAGE_CONSUME_CLUSTERED(mem)                          \
1047 	MACRO_BEGIN                                             \
1048 	ppnum_t	__phys_page;                                    \
1049 	__phys_page = VM_PAGE_GET_PHYS_PAGE(mem);               \
1050 	pmap_lock_phys_page(__phys_page);                       \
1051 	if (mem->vmp_clustered) {                               \
1052 	        vm_object_t o;                                  \
1053 	        o = VM_PAGE_OBJECT(mem);                        \
1054 	        assert(o);                                      \
1055 	        o->pages_used++;                                \
1056 	        mem->vmp_clustered = FALSE;                     \
1057 	        VM_PAGE_SPECULATIVE_USED_ADD();                 \
1058 	}                                                       \
1059 	pmap_unlock_phys_page(__phys_page);                     \
1060 	MACRO_END
1061 
1062 
1063 #define VM_PAGE_COUNT_AS_PAGEIN(mem)                            \
1064 	MACRO_BEGIN                                             \
1065 	{                                                       \
1066 	vm_object_t o;                                          \
1067 	o = VM_PAGE_OBJECT(mem);                                \
1068 	DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL);           \
1069 	counter_inc(&current_task()->pageins);                  \
1070 	if (o->internal) {                                      \
1071 	        DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL);       \
1072 	} else {                                                \
1073 	        DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); \
1074 	}                                                       \
1075 	}                                                       \
1076 	MACRO_END
1077 
1078 
1079 /* adjust for stolen pages accounted elsewhere */
1080 #define VM_PAGE_MOVE_STOLEN(page_count)                         \
1081 	MACRO_BEGIN                                             \
1082 	vm_page_stolen_count -=	(page_count);                   \
1083 	vm_page_wire_count_initial -= (page_count);             \
1084 	MACRO_END
1085 
1086 kern_return_t
1087 pmap_enter_object_options_check(
1088 	pmap_t           pmap,
1089 	vm_map_address_t virtual_address,
1090 	vm_map_offset_t  fault_phys_offset,
1091 	vm_object_t      object,
1092 	ppnum_t          pn,
1093 	vm_prot_t        protection,
1094 	vm_prot_t        fault_type,
1095 	boolean_t        wired,
1096 	unsigned int     options);
1097 
1098 extern kern_return_t pmap_enter_options_check(
1099 	pmap_t           pmap,
1100 	vm_map_address_t virtual_address,
1101 	vm_map_offset_t  fault_phys_offset,
1102 	vm_page_t        page,
1103 	vm_prot_t        protection,
1104 	vm_prot_t        fault_type,
1105 	boolean_t        wired,
1106 	unsigned int     options);
1107 
1108 extern kern_return_t pmap_enter_check(
1109 	pmap_t           pmap,
1110 	vm_map_address_t virtual_address,
1111 	vm_page_t        page,
1112 	vm_prot_t        protection,
1113 	vm_prot_t        fault_type,
1114 	boolean_t        wired);
1115 
1116 #define DW_vm_page_unwire               0x01
1117 #define DW_vm_page_wire                 0x02
1118 #define DW_vm_page_free                 0x04
1119 #define DW_vm_page_activate             0x08
1120 #define DW_vm_page_deactivate_internal  0x10
1121 #define DW_vm_page_speculate            0x20
1122 #define DW_vm_page_lru                  0x40
1123 #define DW_vm_pageout_throttle_up       0x80
1124 #define DW_PAGE_WAKEUP                  0x100
1125 #define DW_clear_busy                   0x200
1126 #define DW_clear_reference              0x400
1127 #define DW_set_reference                0x800
1128 #define DW_move_page                    0x1000
1129 #define DW_VM_PAGE_QUEUES_REMOVE        0x2000
1130 #define DW_enqueue_cleaned              0x4000
1131 #define DW_vm_phantom_cache_update      0x8000
1132 #if HAS_MTE
1133 /*
1134  * Wake up a tag storage page if it's done being used
1135  * in a UPL. This requires the page queues lock.
1136  */
1137 #define DW_vm_page_wakeup_tag_storage   0x10000
1138 #endif /* HAS_MTE */
1139 #define DW_vm_page_iopl_wire            0x20000
1140 #define DW_vm_page_iopl_wire_write      0x40000
1141 
1142 struct vm_page_delayed_work {
1143 	vm_page_t       dw_m;
1144 	int             dw_mask;
1145 };
1146 
1147 #define DEFAULT_DELAYED_WORK_LIMIT      32
1148 
1149 struct vm_page_delayed_work_ctx {
1150 	struct vm_page_delayed_work dwp[DEFAULT_DELAYED_WORK_LIMIT];
1151 	thread_t                    delayed_owner;
1152 };
1153 
1154 kern_return_t vm_page_do_delayed_work(vm_object_t object, vm_tag_t tag, struct vm_page_delayed_work *dwp, int dw_count);
1155 
1156 #define DELAYED_WORK_LIMIT(max) ((vm_max_delayed_work_limit >= max ? max : vm_max_delayed_work_limit))
1157 
1158 /*
1159  * vm_page_do_delayed_work may need to drop the object lock...
1160  * if it does, we need the pages it's looking at to
1161  * be held stable via the busy bit, so if busy isn't already
1162  * set, we need to set it and ask vm_page_do_delayed_work
1163  * to clear it and wakeup anyone that might have blocked on
1164  * it once we're done processing the page.
1165  */
1166 
1167 #define VM_PAGE_ADD_DELAYED_WORK(dwp, mem, dw_cnt)              \
1168 	MACRO_BEGIN                                             \
1169 	if (mem->vmp_busy == FALSE) {                           \
1170 	        mem->vmp_busy = TRUE;                           \
1171 	        if ( !(dwp->dw_mask & DW_vm_page_free))         \
1172 	                dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); \
1173 	}                                                       \
1174 	dwp->dw_m = mem;                                        \
1175 	dwp++;                                                  \
1176 	dw_cnt++;                                               \
1177 	MACRO_END
1178 
1179 
1180 //todo int
1181 extern vm_page_t vm_object_page_grab(vm_object_t);
1182 
1183 //todo int
1184 #if VM_PAGE_BUCKETS_CHECK
1185 extern void vm_page_buckets_check(void);
1186 #endif /* VM_PAGE_BUCKETS_CHECK */
1187 
1188 //todo int
1189 extern void vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_specialq);
1190 extern void vm_page_remove_internal(vm_page_t page);
1191 extern void vm_page_enqueue_inactive(vm_page_t mem, boolean_t first);
1192 extern void vm_page_enqueue_active(vm_page_t mem, boolean_t first);
1193 extern void vm_page_check_pageable_safe(vm_page_t page);
1194 //end int
1195 
1196 //todo int
1197 extern void vm_retire_boot_pages(void);
1198 
1199 //todo all int
1200 
1201 #define VMP_ERROR_GET(p) ((p)->vmp_error)
1202 
1203 
1204 #endif /* XNU_KERNEL_PRIVATE */
1205 __END_DECLS
1206 
1207 #endif  /* _VM_VM_PAGE_INTERNAL_H_ */
1208