1 /*
2 * Copyright (c) 2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _VM_VM_PAGE_INTERNAL_H_
30 #define _VM_VM_PAGE_INTERNAL_H_
31
32 #include <sys/cdefs.h>
33 #include <vm/vm_page.h>
34
35 __BEGIN_DECLS
36 #ifdef XNU_KERNEL_PRIVATE
37
38 PERCPU_DECL(unsigned int, start_color);
39
40 extern struct vm_page_free_queue vm_page_queue_free;
41
42 /*
43 * @var vm_page_deactivate_behind
44 * @brief Whether the system should proactively deactivate pages in large
45 * sequential vm-objects/xfers to prevent file-cache/compressor
46 * thrashing.
47 */
48 extern bool vm_page_deactivate_behind;
49
50 /*
51 * @var vm_page_deactivate_behind_min_resident_ratio
52 * @brief The minimum size of an xfer/vm-object at which proactive deactivation
53 * should be engaged to prevent file-cache/compressor thrashing.
54 */
55 extern uint32_t vm_page_deactivate_behind_min_resident_ratio;
56
57
58 /*!
59 * @abstract
60 * Applies a signed delta to a VM counter that is not meant to ever overflow.
61 *
62 * @discussion
63 * This is not meant for counters counting "events", but for counters that
64 * maintain how many objects there is in a given state (free pages, ...).
65 *
66 * @param counter A pointer to a counter of any integer type.
67 * @param value The signed delta to apply.
68 * @returns The new value of the counter.
69 */
70 #define VM_COUNTER_DELTA(counter, value) ({ \
71 __auto_type __counter = (counter); \
72 release_assert(!os_add_overflow(*__counter, value, __counter)); \
73 *__counter; \
74 })
75 #define VM_COUNTER_ATOMIC_DELTA(counter, value) ({ \
76 __auto_type __value = (value); \
77 __auto_type __orig = os_atomic_add_orig(counter, __value, relaxed); \
78 release_assert(!os_add_overflow(__orig, __value, &__orig)); \
79 __orig + __value; \
80 })
81
82
83 /*!
84 * @abstract
85 * Applies an unsigned increment to a VM counter that is not meant to ever
86 * overflow.
87 *
88 * @discussion
89 * This is not meant for counters counting "events", but for counters that
90 * maintain how many objects there is in a given state (free pages, ...).
91 *
92 * @param counter A pointer to a counter of any integer type.
93 * @param value The unsigned value to add.
94 * @returns The new value of the counter.
95 */
96 #define VM_COUNTER_ADD(counter, value) ({ \
97 __auto_type __counter = (counter); \
98 release_assert(!os_add_overflow(*__counter, value, __counter)); \
99 *__counter; \
100 })
101 #define VM_COUNTER_ATOMIC_ADD(counter, value) ({ \
102 __auto_type __value = (value); \
103 __auto_type __orig = os_atomic_add_orig(counter, __value, relaxed); \
104 release_assert(!os_add_overflow(__orig, __value, &__orig)); \
105 __orig + __value; \
106 })
107
108 /*!
109 * @abstract
110 * Applies an unsigned decrement to a VM counter that is not meant to ever
111 * overflow.
112 *
113 * @discussion
114 * This is not meant for counters counting "events", but for counters that
115 * maintain how many objects there is in a given state (free pages, ...).
116 *
117 * @param counter A pointer to a counter of any integer type.
118 * @param value The unsigned value to substract.
119 * @returns The new value of the counter.
120 */
121 #define VM_COUNTER_SUB(counter, value) ({ \
122 __auto_type __counter = (counter); \
123 release_assert(!os_sub_overflow(*__counter, value, __counter)); \
124 *__counter; \
125 })
126 #define VM_COUNTER_ATOMIC_SUB(counter, value) ({ \
127 __auto_type __value = (value); \
128 __auto_type __orig = os_atomic_sub_orig(counter, __value, relaxed); \
129 release_assert(!os_sub_overflow(__orig, __value, &__orig)); \
130 __orig - __value; \
131 })
132
133
134 /*!
135 * @abstract
136 * Convenience wrapper to increment a VM counter.
137 *
138 * @discussion
139 * This is not meant for counters counting "events", but for counters that
140 * maintain how many objects there is in a given state (free pages, ...).
141 *
142 * @param counter A pointer to a counter of any integer type.
143 * @returns The new value of the counter.
144 */
145 #define VM_COUNTER_INC(counter) VM_COUNTER_ADD(counter, 1)
146 #define VM_COUNTER_ATOMIC_INC(counter) VM_COUNTER_ATOMIC_ADD(counter, 1)
147
148 /*!
149 * @abstract
150 * Convenience wrapper to decrement a VM counter.
151 *
152 * @discussion
153 * This is not meant for counters counting "events", but for counters that
154 * maintain how many objects there is in a given state (free pages, ...).
155 *
156 * @param counter A pointer to a counter of any integer type.
157 * @returns The new value of the counter.
158 */
159 #define VM_COUNTER_DEC(counter) VM_COUNTER_SUB(counter, 1)
160 #define VM_COUNTER_ATOMIC_DEC(counter) VM_COUNTER_ATOMIC_SUB(counter, 1)
161
162 static inline int
VMP_CS_FOR_OFFSET(vm_map_offset_t fault_phys_offset)163 VMP_CS_FOR_OFFSET(
164 vm_map_offset_t fault_phys_offset)
165 {
166 assertf(fault_phys_offset < PAGE_SIZE &&
167 !(fault_phys_offset & FOURK_PAGE_MASK),
168 "offset 0x%llx\n", (uint64_t)fault_phys_offset);
169 return 1 << (fault_phys_offset >> FOURK_PAGE_SHIFT);
170 }
171 static inline bool
VMP_CS_VALIDATED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)172 VMP_CS_VALIDATED(
173 vm_page_t p,
174 vm_map_size_t fault_page_size,
175 vm_map_offset_t fault_phys_offset)
176 {
177 assertf(fault_page_size <= PAGE_SIZE,
178 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
179 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
180 if (fault_page_size == PAGE_SIZE) {
181 return p->vmp_cs_validated == VMP_CS_ALL_TRUE;
182 }
183 return p->vmp_cs_validated & VMP_CS_FOR_OFFSET(fault_phys_offset);
184 }
185 static inline bool
VMP_CS_TAINTED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)186 VMP_CS_TAINTED(
187 vm_page_t p,
188 vm_map_size_t fault_page_size,
189 vm_map_offset_t fault_phys_offset)
190 {
191 assertf(fault_page_size <= PAGE_SIZE,
192 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
193 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
194 if (fault_page_size == PAGE_SIZE) {
195 return p->vmp_cs_tainted != VMP_CS_ALL_FALSE;
196 }
197 return p->vmp_cs_tainted & VMP_CS_FOR_OFFSET(fault_phys_offset);
198 }
199 static inline bool
VMP_CS_NX(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset)200 VMP_CS_NX(
201 vm_page_t p,
202 vm_map_size_t fault_page_size,
203 vm_map_offset_t fault_phys_offset)
204 {
205 assertf(fault_page_size <= PAGE_SIZE,
206 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
207 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
208 if (fault_page_size == PAGE_SIZE) {
209 return p->vmp_cs_nx != VMP_CS_ALL_FALSE;
210 }
211 return p->vmp_cs_nx & VMP_CS_FOR_OFFSET(fault_phys_offset);
212 }
213 static inline void
VMP_CS_SET_VALIDATED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)214 VMP_CS_SET_VALIDATED(
215 vm_page_t p,
216 vm_map_size_t fault_page_size,
217 vm_map_offset_t fault_phys_offset,
218 boolean_t value)
219 {
220 assertf(fault_page_size <= PAGE_SIZE,
221 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
222 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
223 if (value) {
224 if (fault_page_size == PAGE_SIZE) {
225 p->vmp_cs_validated = VMP_CS_ALL_TRUE;
226 }
227 p->vmp_cs_validated |= VMP_CS_FOR_OFFSET(fault_phys_offset);
228 } else {
229 if (fault_page_size == PAGE_SIZE) {
230 p->vmp_cs_validated = VMP_CS_ALL_FALSE;
231 }
232 p->vmp_cs_validated &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
233 }
234 }
235 static inline void
VMP_CS_SET_TAINTED(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)236 VMP_CS_SET_TAINTED(
237 vm_page_t p,
238 vm_map_size_t fault_page_size,
239 vm_map_offset_t fault_phys_offset,
240 boolean_t value)
241 {
242 assertf(fault_page_size <= PAGE_SIZE,
243 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
244 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
245 if (value) {
246 if (fault_page_size == PAGE_SIZE) {
247 p->vmp_cs_tainted = VMP_CS_ALL_TRUE;
248 }
249 p->vmp_cs_tainted |= VMP_CS_FOR_OFFSET(fault_phys_offset);
250 } else {
251 if (fault_page_size == PAGE_SIZE) {
252 p->vmp_cs_tainted = VMP_CS_ALL_FALSE;
253 }
254 p->vmp_cs_tainted &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
255 }
256 }
257 static inline void
VMP_CS_SET_NX(vm_page_t p,vm_map_size_t fault_page_size,vm_map_offset_t fault_phys_offset,boolean_t value)258 VMP_CS_SET_NX(
259 vm_page_t p,
260 vm_map_size_t fault_page_size,
261 vm_map_offset_t fault_phys_offset,
262 boolean_t value)
263 {
264 assertf(fault_page_size <= PAGE_SIZE,
265 "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",
266 (uint64_t)fault_page_size, (uint64_t)fault_phys_offset);
267 if (value) {
268 if (fault_page_size == PAGE_SIZE) {
269 p->vmp_cs_nx = VMP_CS_ALL_TRUE;
270 }
271 p->vmp_cs_nx |= VMP_CS_FOR_OFFSET(fault_phys_offset);
272 } else {
273 if (fault_page_size == PAGE_SIZE) {
274 p->vmp_cs_nx = VMP_CS_ALL_FALSE;
275 }
276 p->vmp_cs_nx &= ~VMP_CS_FOR_OFFSET(fault_phys_offset);
277 }
278 }
279
280
281 #if defined(__LP64__)
282 static __inline__ void
vm_page_enqueue_tail(vm_page_queue_t que,vm_page_queue_entry_t elt)283 vm_page_enqueue_tail(
284 vm_page_queue_t que,
285 vm_page_queue_entry_t elt)
286 {
287 vm_page_queue_entry_t old_tail;
288
289 old_tail = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(que->prev);
290 elt->next = VM_PAGE_PACK_PTR(que);
291 elt->prev = que->prev;
292 que->prev = old_tail->next = VM_PAGE_PACK_PTR(elt);
293 }
294
295 static __inline__ void
vm_page_remque(vm_page_queue_entry_t elt)296 vm_page_remque(
297 vm_page_queue_entry_t elt)
298 {
299 vm_page_queue_entry_t next;
300 vm_page_queue_entry_t prev;
301 vm_page_packed_t next_pck = elt->next;
302 vm_page_packed_t prev_pck = elt->prev;
303
304 next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(next_pck);
305
306 /* next may equal prev (and the queue head) if elt was the only element */
307 prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(prev_pck);
308
309 next->prev = prev_pck;
310 prev->next = next_pck;
311
312 elt->next = 0;
313 elt->prev = 0;
314 }
315
316 #if defined(__x86_64__)
317 /*
318 * Insert a new page into a free queue and clump pages within the same 16K boundary together
319 */
320 static inline void
vm_page_queue_enter_clump(vm_page_queue_t head,vm_page_t elt)321 vm_page_queue_enter_clump(
322 vm_page_queue_t head,
323 vm_page_t elt)
324 {
325 vm_page_queue_entry_t first = NULL; /* first page in the clump */
326 vm_page_queue_entry_t last = NULL; /* last page in the clump */
327 vm_page_queue_entry_t prev = NULL;
328 vm_page_queue_entry_t next;
329 uint_t n_free = 1;
330 extern unsigned int vm_clump_size, vm_clump_promote_threshold;
331 extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
332
333 /*
334 * If elt is part of the vm_pages[] array, find its neighboring buddies in the array.
335 */
336 if (vm_page_in_array(elt)) {
337 vm_page_t p;
338 uint_t i;
339 uint_t n;
340 ppnum_t clump_num;
341
342 first = last = (vm_page_queue_entry_t)elt;
343 clump_num = VM_PAGE_GET_CLUMP(elt);
344 n = VM_PAGE_GET_PHYS_PAGE(elt) & vm_clump_mask;
345
346 /*
347 * Check for preceeding vm_pages[] entries in the same chunk
348 */
349 for (i = 0, p = elt - 1; i < n && vm_page_get(0) <= p; i++, p--) {
350 if (p->vmp_q_state == VM_PAGE_ON_FREE_Q && clump_num == VM_PAGE_GET_CLUMP(p)) {
351 if (prev == NULL) {
352 prev = (vm_page_queue_entry_t)p;
353 }
354 first = (vm_page_queue_entry_t)p;
355 n_free++;
356 }
357 }
358
359 /*
360 * Check the following vm_pages[] entries in the same chunk
361 */
362 for (i = n + 1, p = elt + 1; i < vm_clump_size && p < vm_page_get(vm_pages_count); i++, p++) {
363 if (p->vmp_q_state == VM_PAGE_ON_FREE_Q && clump_num == VM_PAGE_GET_CLUMP(p)) {
364 if (last == (vm_page_queue_entry_t)elt) { /* first one only */
365 __DEBUG_CHECK_BUDDIES(prev, p, vmp_pageq);
366 }
367
368 if (prev == NULL) {
369 prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev);
370 }
371 last = (vm_page_queue_entry_t)p;
372 n_free++;
373 }
374 }
375 __DEBUG_STAT_INCREMENT_INRANGE;
376 }
377
378 /* if elt is not part of vm_pages or if 1st page in clump, insert at tail */
379 if (prev == NULL) {
380 prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(head->prev);
381 }
382
383 /* insert the element */
384 next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(prev->next);
385 elt->vmp_pageq.next = prev->next;
386 elt->vmp_pageq.prev = next->prev;
387 prev->next = next->prev = VM_PAGE_PACK_PTR(elt);
388 __DEBUG_STAT_INCREMENT_INSERTS;
389
390 /*
391 * Check if clump needs to be promoted to head.
392 */
393 if (n_free >= vm_clump_promote_threshold && n_free > 1) {
394 vm_page_queue_entry_t first_prev;
395
396 first_prev = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(first->prev);
397
398 /* If not at head already */
399 if (first_prev != head) {
400 vm_page_queue_entry_t last_next;
401 vm_page_queue_entry_t head_next;
402
403 last_next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(last->next);
404
405 /* verify that the links within the clump are consistent */
406 __DEBUG_VERIFY_LINKS(first, n_free, last_next);
407
408 /* promote clump to head */
409 first_prev->next = last->next;
410 last_next->prev = first->prev;
411 first->prev = VM_PAGE_PACK_PTR(head);
412 last->next = head->next;
413
414 head_next = (vm_page_queue_entry_t)VM_PAGE_UNPACK_PTR(head->next);
415 head_next->prev = VM_PAGE_PACK_PTR(last);
416 head->next = VM_PAGE_PACK_PTR(first);
417 __DEBUG_STAT_INCREMENT_PROMOTES(n_free);
418 }
419 }
420 }
421 #endif /* __x86_64__ */
422 #endif /* __LP64__ */
423
424
425 /*!
426 * @abstract
427 * The number of pages to try to free/process at once while under
428 * the free page queue lock.
429 *
430 * @discussion
431 * The value is chosen to be a trade off between:
432 * - creating a lot of contention on the free page queue lock
433 * taking and dropping it all the time,
434 * - avoiding to hold the free page queue lock for too long periods of time.
435 */
436 #define VMP_FREE_BATCH_SIZE 64
437
438 /*!
439 * @function vm_page_free_queue_init()
440 *
441 * @abstract
442 * Initialize a free queue.
443 *
444 * @param free_queue The free queue to initialize.
445 */
446 extern void vm_page_free_queue_init(
447 vm_page_free_queue_t free_queue);
448
449 /*!
450 * @function vm_page_free_queue_enter()
451 *
452 * @abstract
453 * Add a page to a free queue.
454 *
455 * @discussion
456 * Internally, the free queue is not synchronized, so any locking must be done
457 * outside of this function.
458 *
459 * The page queue state will be set to the appropriate free queue state for the
460 * memory class (typically VM_PAGE_ON_FREE_Q).
461 *
462 * Note that the callers are responsible for making sure that this operation is
463 * a valid transition. This is a helper to abstract handling of the several
464 * free page queues on the system which sits above vm_page_queue_enter() and
465 * maintains counters as well, but is otherwise oblivious to the page state
466 * machine.
467 *
468 * Most clients should use a wrapper around this function (typically
469 * vm_page_release() or vm_page_free_list()) and not call it directly.
470 *
471 * @param mem_class The memory class to free pages to.
472 * @param page The page to free.
473 * @param pnum the physical address of @c page
474 */
475 extern void vm_page_free_queue_enter(
476 vm_memory_class_t mem_class,
477 vm_page_t page,
478 ppnum_t pnum);
479
480 /*!
481 * @function vm_page_free_queue_remove()
482 *
483 * @abstract
484 * Removes an arbitrary free page from the given free queue.
485 *
486 * @discussion
487 * The given page must be in the given free queue, or state may be corrupted.
488 *
489 * Internally, the free queue is not synchronized, so any locking must be done
490 * outside of this function.
491 *
492 * Note that the callers are responsible for making sure that the requested
493 * queue state corresponds to a valid transition. This is a helper to abstract
494 * handling of the several free page queues on the system which sits above
495 * vm_page_queue_remove() and maintains counters as well, but is otherwise
496 * oblivious to the page state machine.
497 *
498 * Most clients should use a wrapper around this function (typically
499 * vm_page_free_queue_steal()) and not call it directly.
500 *
501 * @param class The memory class corresponding to the free queue
502 * @c page is enqueued on.
503 * @param mem The page to remove.
504 * @param pnum The physical address of @c page
505 * @param q_state The desired queue state for the page.
506 */
507 __attribute__((always_inline))
508 extern void vm_page_free_queue_remove(
509 vm_memory_class_t class,
510 vm_page_t mem,
511 ppnum_t pnum,
512 vm_page_q_state_t q_state);
513
514 /*!
515 * @function vm_page_free_queue_grab()
516 *
517 * @abstract
518 * Gets pages from the free queue.
519 *
520 * @discussion
521 * Clients cannot get more pages than the free queue has; attempting to do so
522 * will cause a panic.
523 *
524 * Internally, the free queue is not synchronized, so any locking must be done
525 * outside of this function.
526 *
527 * Note that the callers are responsible for making sure that the requested
528 * queue state corresponds to a valid transition. This is a helper to abstract
529 * handling of the several free page queues on the system which sits above
530 * vm_page_queue_remove() and maintains counters as well, but is otherwise
531 * oblivious to the page state machine.
532 *
533 * Most clients should use a wrapper (typically vm_page_grab_options())
534 * around this function and not call it directly.
535 *
536 * @param options The grab options.
537 * @param mem_class The memory class to allocate from.
538 * @param num_pages The number of pages to grab.
539 * @param q_state The vmp_q_state to set on the page.
540 *
541 * @returns
542 * A list of pages; the list will be num_pages long.
543 */
544 extern vm_page_list_t vm_page_free_queue_grab(
545 vm_grab_options_t options,
546 vm_memory_class_t mem_class,
547 unsigned int num_pages,
548 vm_page_q_state_t q_state);
549
550 /*!
551 * @abstract
552 * Perform a wakeup for a free page queue wait event.
553 *
554 * @param event the free page queue event to wake up
555 * @param n the number of threads to try to wake up
556 * (UINT32_MAX means all).
557 */
558 extern void vm_page_free_wakeup(event_t event, uint32_t n);
559
560
561 extern void vm_page_assign_special_state(vm_page_t mem, vm_page_specialq_t mode);
562 extern void vm_page_update_special_state(vm_page_t mem);
563 extern void vm_page_add_to_specialq(vm_page_t mem, boolean_t first);
564 extern void vm_page_remove_from_specialq(vm_page_t mem);
565
566
567 /*
568 * Prototypes for functions exported by this module.
569 */
570 extern void vm_page_bootstrap(
571 vm_offset_t *startp,
572 vm_offset_t *endp);
573
574 extern vm_page_t kdp_vm_page_lookup(
575 vm_object_t object,
576 vm_object_offset_t offset);
577
578 extern vm_page_t vm_page_lookup(
579 vm_object_t object,
580 vm_object_offset_t offset);
581
582 /*!
583 * @abstract
584 * Creates a fictitious page.
585 *
586 * @discussion
587 * This function never returns VM_PAGE_NULL;
588 *
589 * Pages made by this function have the @c vm_page_fictitious_addr
590 * fake physical address.
591 */
592 extern vm_page_t vm_page_create_fictitious(void);
593
594 /*!
595 * @abstract
596 * Returns a kernel guard page (used by @c kmem_alloc_guard()).
597 *
598 * @discussion
599 * Pages returned by this function have the @c vm_page_guard_addr
600 * fake physical address.
601 *
602 * @param canwait Whether the caller can wait, if true,
603 * this function never returns VM_PAGE_NULL.
604 */
605 extern vm_page_t vm_page_create_guard(bool canwait);
606
607 /*!
608 * @abstract
609 * Create a private VM page.
610 *
611 * @discussion
612 * These pages allow for non canonical references to the same physical page.
613 * Its @c VM_PAGE_GET_PHYS_PAGE() will be @c base_page.
614 *
615 * Such pages must not be released back to the free queues directly,
616 * @c vm_page_reset_private() must be called first.
617 *
618 * This function never returns VM_PAGE_NULL
619 *
620 * @param base_page The physical page this private page represents.
621 */
622 extern vm_page_t vm_page_create_private(ppnum_t base_page);
623
624 /*!
625 * @abstract
626 * Returns whether this is the canonical page for a regular managed kernel page.
627 *
628 * @discussion
629 * A kernel page is the canonical @c vm_page_t for a given pmap managed physical
630 * page. These pages are made at startup, or when @c ml_static_mfree() is
631 * called, and are never freed.
632 *
633 * Its @c VM_PAGE_GET_PHYS_PAGE() will be a valid @c ppnum_t value.
634 *
635 * A page can either be:
636 * - a canonical page (@c vm_page_is_canonical())
637 * - a fictitious page (@c vm_page_is_fictitious()),
638 * of which guard pages are a special case (@c vm_page_is_guard())
639 * - a private page (@c vm_page_is_private())
640 */
641 extern bool vm_page_is_canonical(const struct vm_page *m) __pure2;
642
643 /*!
644 * @abstract
645 * Returns whether this page is fictitious (made by @c vm_page_create_guard()
646 * or by @c vm_page_create_fictitious()).
647 *
648 * @discussion
649 * A page can either be:
650 * - a canonical page (@c vm_page_is_canonical())
651 * - a fictitious page (@c vm_page_is_fictitious()),
652 * of which guard pages are a special case (@c vm_page_is_guard())
653 * - a private page (@c vm_page_is_private())
654 */
655 extern bool vm_page_is_fictitious(const struct vm_page *m);
656
657 /*!
658 * @abstract
659 * Returns whether this is a kernel guard page that was made by
660 * @c vm_page_create_guard().
661 */
662 extern bool vm_page_is_guard(const struct vm_page *m) __pure2;
663
664 /*!
665 * @abstract
666 * Returns whether a page is private (made by @c vm_page_create_private(),
667 * or converted from a fictitious page by @c vm_page_make_private()).
668 *
669 * @discussion
670 * A page can either be:
671 * - a canonical page (@c vm_page_is_canonical())
672 * - a fictitious page (@c vm_page_is_fictitious()),
673 * of which guard pages are a special case (@c vm_page_is_guard())
674 * - a private page (@c vm_page_is_private())
675 */
676 extern bool vm_page_is_private(const struct vm_page *m);
677
678 /*!
679 * @abstract
680 * Converts a fictitious page made by @c vm_page_create_fictitious()
681 * into a private page.
682 *
683 * @param m The fictitious page to convert into a private one.
684 * @param base_page The physical page that this page will represent
685 * (@c vm_page_create_private()).
686 */
687 extern void vm_page_make_private(vm_page_t m, ppnum_t base_page);
688
689 /*!
690 * @abstract
691 * Converts a private page into a fictitious page (as if made by
692 * @c vm_page_create_fictitious()).
693 *
694 * @discussion
695 * Private pages can't be released with @c vm_page_release()
696 * without being turned into a fictitious page first using this function.
697 */
698 extern void vm_page_reset_private(vm_page_t m);
699
700 #if HAS_MTE
701
702 /*!
703 * @abstract
704 * Returns whether the specified physical page number is actual tag storage.
705 *
706 * @discussion
707 * This returns fails for pages in the tag storage range that is recursive or
708 * unmanaged, unlike pmap_in_tag_storage_range().
709 *
710 * Note that it might return "true" for pages that the MTE Info data structure
711 * considers covering "unmanaged" memory.
712 *
713 * @param page A canonical VM page.
714 * @param pnum The page physical number for @c page.
715 */
716 extern bool vm_page_is_tag_storage_pnum(vm_page_t page, ppnum_t pnum) __pure2;
717
718 static inline bool
vm_page_is_tag_storage(vm_page_t page)719 vm_page_is_tag_storage(vm_page_t page)
720 {
721 return vm_page_is_tag_storage_pnum(page, VM_PAGE_GET_PHYS_PAGE(page));
722 }
723
724 #endif /* HAS_MTE */
725
726 extern bool vm_pool_low(void);
727
728 /*!
729 * @abstract
730 * Grabs a page.
731 *
732 * @discussion
733 * Allocate a page by looking at:
734 * - per-cpu queues,
735 * - global free queues,
736 * - magical queues (delayed, secluded, ...)
737 *
738 * This function always succeeds for VM privileged threads,
739 * unless VM_PAGE_GRAB_NOPAGEWAIT is passed.
740 *
741 * This function might return VM_PAGE_NULL if there are no pages left.
742 */
743 extern vm_page_t vm_page_grab_options(vm_grab_options_t options);
744
745 static inline vm_page_t
vm_page_grab(void)746 vm_page_grab(void)
747 {
748 return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE);
749 }
750
751 /*!
752 * @abstract
753 * Returns the proper grab options for the specified object.
754 */
755 extern vm_grab_options_t vm_page_grab_options_for_object(vm_object_t object);
756
757 #if XNU_VM_HAS_LOPAGE
758 extern vm_page_t vm_page_grablo(vm_grab_options_t options);
759 #else
760 static inline vm_page_t
vm_page_grablo(vm_grab_options_t options)761 vm_page_grablo(vm_grab_options_t options)
762 {
763 return vm_page_grab_options(options);
764 }
765 #endif
766
767
768 __options_closed_decl(vmp_release_options_t, uint32_t, {
769 VMP_RELEASE_NONE = 0x00,
770 VMP_RELEASE_Q_LOCKED = 0x01,
771 VMP_RELEASE_SKIP_FREE_CHECK = 0x02,
772 VMP_RELEASE_HIBERNATE = 0x04,
773 VMP_RELEASE_STARTUP = 0x08,
774 });
775
776 extern void vm_page_release(
777 vm_page_t page,
778 vmp_release_options_t options);
779
780 extern boolean_t vm_page_wait(
781 int interruptible);
782
783 extern void vm_page_init(
784 vm_page_t page,
785 ppnum_t phys_page);
786
787 extern void vm_page_free(
788 vm_page_t page);
789
790 extern void vm_page_free_unlocked(
791 vm_page_t page,
792 boolean_t remove_from_hash);
793
794
795 extern void vm_page_balance_inactive(
796 int max_to_move);
797
798 extern void vm_page_activate(
799 vm_page_t page);
800
801 extern void vm_page_deactivate(
802 vm_page_t page);
803
804 extern void vm_page_deactivate_internal(
805 vm_page_t page,
806 boolean_t clear_hw_reference);
807
808 extern void vm_page_enqueue_cleaned(vm_page_t page);
809
810 extern void vm_page_lru(
811 vm_page_t page);
812
813 extern void vm_page_speculate(
814 vm_page_t page,
815 boolean_t new);
816
817 extern void vm_page_speculate_ageit(
818 struct vm_speculative_age_q *aq);
819
820 extern void vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks);
821
822 extern void vm_page_rename(
823 vm_page_t page,
824 vm_object_t new_object,
825 vm_object_offset_t new_offset);
826
827 extern void vm_page_insert(
828 vm_page_t page,
829 vm_object_t object,
830 vm_object_offset_t offset);
831
832 extern void vm_page_insert_wired(
833 vm_page_t page,
834 vm_object_t object,
835 vm_object_offset_t offset,
836 vm_tag_t tag);
837
838
839 extern void vm_page_insert_internal(
840 vm_page_t page,
841 vm_object_t object,
842 vm_object_offset_t offset,
843 vm_tag_t tag,
844 boolean_t queues_lock_held,
845 boolean_t insert_in_hash,
846 boolean_t batch_pmap_op,
847 boolean_t delayed_accounting,
848 uint64_t *delayed_ledger_update);
849
850 extern void vm_page_replace(
851 vm_page_t mem,
852 vm_object_t object,
853 vm_object_offset_t offset);
854
855 extern void vm_page_remove(
856 vm_page_t page,
857 boolean_t remove_from_hash);
858
859 #if HAS_MTE
860 extern void vm_page_zero_fill(
861 vm_page_t page,
862 bool zero_tags);
863 #else /* HAS_MTE */
864 extern void vm_page_zero_fill(
865 vm_page_t page);
866 #endif /* HAS_MTE */
867
868 extern void vm_page_part_zero_fill(
869 vm_page_t m,
870 vm_offset_t m_pa,
871 vm_size_t len);
872
873 extern void vm_page_copy(
874 vm_page_t src_page,
875 vm_page_t dest_page);
876
877 extern void vm_page_part_copy(
878 vm_page_t src_m,
879 vm_offset_t src_pa,
880 vm_page_t dst_m,
881 vm_offset_t dst_pa,
882 vm_size_t len);
883
884 extern void vm_page_wire(
885 vm_page_t page,
886 vm_tag_t tag,
887 boolean_t check_memorystatus);
888
889 extern void vm_page_unwire(
890 vm_page_t page,
891 boolean_t queueit);
892
893 extern void vm_set_page_size(void);
894
895 extern void vm_page_validate_cs(
896 vm_page_t page,
897 vm_map_size_t fault_page_size,
898 vm_map_offset_t fault_phys_offset);
899
900 extern void vm_page_validate_cs_mapped(
901 vm_page_t page,
902 vm_map_size_t fault_page_size,
903 vm_map_offset_t fault_phys_offset,
904 const void *kaddr);
905 extern void vm_page_validate_cs_mapped_slow(
906 vm_page_t page,
907 const void *kaddr);
908 extern void vm_page_validate_cs_mapped_chunk(
909 vm_page_t page,
910 const void *kaddr,
911 vm_offset_t chunk_offset,
912 vm_size_t chunk_size,
913 boolean_t *validated,
914 unsigned *tainted);
915
916 extern void vm_page_free_prepare_queues(
917 vm_page_t page);
918
919 extern void vm_page_free_prepare_object(
920 vm_page_t page,
921 boolean_t remove_from_hash);
922
923 extern wait_result_t vm_page_sleep(
924 vm_object_t object,
925 vm_page_t m,
926 wait_interrupt_t interruptible,
927 lck_sleep_action_t action);
928
929 extern void vm_page_wakeup(
930 vm_object_t object,
931 vm_page_t m);
932
933 extern void vm_page_wakeup_done(
934 vm_object_t object,
935 vm_page_t m);
936
937 typedef struct page_worker_token {
938 thread_pri_floor_t pwt_floor_token;
939 bool pwt_did_register_inheritor;
940 } page_worker_token_t;
941
942 extern void vm_page_wakeup_done_with_inheritor(
943 vm_object_t object,
944 vm_page_t m,
945 page_worker_token_t *token);
946
947 extern void page_worker_register_worker(
948 event_t event,
949 page_worker_token_t *out_token);
950
951 extern boolean_t vm_page_is_relocatable(
952 vm_page_t m,
953 vm_relocate_reason_t reloc_reason);
954
955 extern kern_return_t vm_page_relocate(
956 vm_page_t m1,
957 int * compressed_pages,
958 vm_relocate_reason_t reason,
959 vm_page_t* new_page);
960
961 extern bool vm_page_is_restricted(
962 vm_page_t mem);
963
964 /*
965 * Functions implemented as macros. m->vmp_wanted and m->vmp_busy are
966 * protected by the object lock.
967 */
968
969 #if !XNU_TARGET_OS_OSX
970 #define SET_PAGE_DIRTY(m, set_pmap_modified) \
971 MACRO_BEGIN \
972 vm_page_t __page__ = (m); \
973 if (__page__->vmp_pmapped == TRUE && \
974 __page__->vmp_wpmapped == TRUE && \
975 __page__->vmp_dirty == FALSE && \
976 (set_pmap_modified)) { \
977 pmap_set_modify(VM_PAGE_GET_PHYS_PAGE(__page__)); \
978 } \
979 __page__->vmp_dirty = TRUE; \
980 MACRO_END
981 #else /* !XNU_TARGET_OS_OSX */
982 #define SET_PAGE_DIRTY(m, set_pmap_modified) \
983 MACRO_BEGIN \
984 vm_page_t __page__ = (m); \
985 __page__->vmp_dirty = TRUE; \
986 MACRO_END
987 #endif /* !XNU_TARGET_OS_OSX */
988
989 #define VM_PAGE_FREE(p) \
990 MACRO_BEGIN \
991 vm_page_free_unlocked(p, TRUE); \
992 MACRO_END
993
994
995 #define VM_PAGE_WAIT() ((void)vm_page_wait(THREAD_UNINT))
996
997 static inline void
vm_free_page_lock(void)998 vm_free_page_lock(void)
999 {
1000 lck_mtx_lock(&vm_page_queue_free_lock);
1001 }
1002
1003 static inline void
vm_free_page_lock_spin(void)1004 vm_free_page_lock_spin(void)
1005 {
1006 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1007 }
1008
1009 static inline void
vm_free_page_lock_convert(void)1010 vm_free_page_lock_convert(void)
1011 {
1012 lck_mtx_convert_spin(&vm_page_queue_free_lock);
1013 }
1014
1015 static inline void
vm_free_page_unlock(void)1016 vm_free_page_unlock(void)
1017 {
1018 lck_mtx_unlock(&vm_page_queue_free_lock);
1019 }
1020
1021
1022 #define vm_page_lockconvert_queues() lck_mtx_convert_spin(&vm_page_queue_lock)
1023
1024
1025 #ifdef VPL_LOCK_SPIN
1026 extern lck_grp_t vm_page_lck_grp_local;
1027
1028 #define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_spin_init(&vlq->vpl_lock, vpl_grp, vpl_attr)
1029 #define VPL_LOCK(vpl) lck_spin_lock_grp(vpl, &vm_page_lck_grp_local)
1030 #define VPL_UNLOCK(vpl) lck_spin_unlock(vpl)
1031 #else
1032 #define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_mtx_init(&vlq->vpl_lock, vpl_grp, vpl_attr)
1033 #define VPL_LOCK(vpl) lck_mtx_lock_spin(vpl)
1034 #define VPL_UNLOCK(vpl) lck_mtx_unlock(vpl)
1035 #endif
1036
1037 #if DEVELOPMENT || DEBUG
1038 #define VM_PAGE_SPECULATIVE_USED_ADD() \
1039 MACRO_BEGIN \
1040 OSAddAtomic(1, &vm_page_speculative_used); \
1041 MACRO_END
1042 #else
1043 #define VM_PAGE_SPECULATIVE_USED_ADD()
1044 #endif
1045
1046 #define VM_PAGE_CONSUME_CLUSTERED(mem) \
1047 MACRO_BEGIN \
1048 ppnum_t __phys_page; \
1049 __phys_page = VM_PAGE_GET_PHYS_PAGE(mem); \
1050 pmap_lock_phys_page(__phys_page); \
1051 if (mem->vmp_clustered) { \
1052 vm_object_t o; \
1053 o = VM_PAGE_OBJECT(mem); \
1054 assert(o); \
1055 o->pages_used++; \
1056 mem->vmp_clustered = FALSE; \
1057 VM_PAGE_SPECULATIVE_USED_ADD(); \
1058 } \
1059 pmap_unlock_phys_page(__phys_page); \
1060 MACRO_END
1061
1062
1063 #define VM_PAGE_COUNT_AS_PAGEIN(mem) \
1064 MACRO_BEGIN \
1065 { \
1066 vm_object_t o; \
1067 o = VM_PAGE_OBJECT(mem); \
1068 DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); \
1069 counter_inc(¤t_task()->pageins); \
1070 if (o->internal) { \
1071 DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); \
1072 } else { \
1073 DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); \
1074 } \
1075 } \
1076 MACRO_END
1077
1078
1079 /* adjust for stolen pages accounted elsewhere */
1080 #define VM_PAGE_MOVE_STOLEN(page_count) \
1081 MACRO_BEGIN \
1082 vm_page_stolen_count -= (page_count); \
1083 vm_page_wire_count_initial -= (page_count); \
1084 MACRO_END
1085
1086 kern_return_t
1087 pmap_enter_object_options_check(
1088 pmap_t pmap,
1089 vm_map_address_t virtual_address,
1090 vm_map_offset_t fault_phys_offset,
1091 vm_object_t object,
1092 ppnum_t pn,
1093 vm_prot_t protection,
1094 vm_prot_t fault_type,
1095 boolean_t wired,
1096 unsigned int options);
1097
1098 extern kern_return_t pmap_enter_options_check(
1099 pmap_t pmap,
1100 vm_map_address_t virtual_address,
1101 vm_map_offset_t fault_phys_offset,
1102 vm_page_t page,
1103 vm_prot_t protection,
1104 vm_prot_t fault_type,
1105 boolean_t wired,
1106 unsigned int options);
1107
1108 extern kern_return_t pmap_enter_check(
1109 pmap_t pmap,
1110 vm_map_address_t virtual_address,
1111 vm_page_t page,
1112 vm_prot_t protection,
1113 vm_prot_t fault_type,
1114 boolean_t wired);
1115
1116 #define DW_vm_page_unwire 0x01
1117 #define DW_vm_page_wire 0x02
1118 #define DW_vm_page_free 0x04
1119 #define DW_vm_page_activate 0x08
1120 #define DW_vm_page_deactivate_internal 0x10
1121 #define DW_vm_page_speculate 0x20
1122 #define DW_vm_page_lru 0x40
1123 #define DW_vm_pageout_throttle_up 0x80
1124 #define DW_PAGE_WAKEUP 0x100
1125 #define DW_clear_busy 0x200
1126 #define DW_clear_reference 0x400
1127 #define DW_set_reference 0x800
1128 #define DW_move_page 0x1000
1129 #define DW_VM_PAGE_QUEUES_REMOVE 0x2000
1130 #define DW_enqueue_cleaned 0x4000
1131 #define DW_vm_phantom_cache_update 0x8000
1132 #if HAS_MTE
1133 /*
1134 * Wake up a tag storage page if it's done being used
1135 * in a UPL. This requires the page queues lock.
1136 */
1137 #define DW_vm_page_wakeup_tag_storage 0x10000
1138 #endif /* HAS_MTE */
1139 #define DW_vm_page_iopl_wire 0x20000
1140 #define DW_vm_page_iopl_wire_write 0x40000
1141
1142 struct vm_page_delayed_work {
1143 vm_page_t dw_m;
1144 int dw_mask;
1145 };
1146
1147 #define DEFAULT_DELAYED_WORK_LIMIT 32
1148
1149 struct vm_page_delayed_work_ctx {
1150 struct vm_page_delayed_work dwp[DEFAULT_DELAYED_WORK_LIMIT];
1151 thread_t delayed_owner;
1152 };
1153
1154 kern_return_t vm_page_do_delayed_work(vm_object_t object, vm_tag_t tag, struct vm_page_delayed_work *dwp, int dw_count);
1155
1156 #define DELAYED_WORK_LIMIT(max) ((vm_max_delayed_work_limit >= max ? max : vm_max_delayed_work_limit))
1157
1158 /*
1159 * vm_page_do_delayed_work may need to drop the object lock...
1160 * if it does, we need the pages it's looking at to
1161 * be held stable via the busy bit, so if busy isn't already
1162 * set, we need to set it and ask vm_page_do_delayed_work
1163 * to clear it and wakeup anyone that might have blocked on
1164 * it once we're done processing the page.
1165 */
1166
1167 #define VM_PAGE_ADD_DELAYED_WORK(dwp, mem, dw_cnt) \
1168 MACRO_BEGIN \
1169 if (mem->vmp_busy == FALSE) { \
1170 mem->vmp_busy = TRUE; \
1171 if ( !(dwp->dw_mask & DW_vm_page_free)) \
1172 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); \
1173 } \
1174 dwp->dw_m = mem; \
1175 dwp++; \
1176 dw_cnt++; \
1177 MACRO_END
1178
1179
1180 //todo int
1181 extern vm_page_t vm_object_page_grab(vm_object_t);
1182
1183 //todo int
1184 #if VM_PAGE_BUCKETS_CHECK
1185 extern void vm_page_buckets_check(void);
1186 #endif /* VM_PAGE_BUCKETS_CHECK */
1187
1188 //todo int
1189 extern void vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_specialq);
1190 extern void vm_page_remove_internal(vm_page_t page);
1191 extern void vm_page_enqueue_inactive(vm_page_t mem, boolean_t first);
1192 extern void vm_page_enqueue_active(vm_page_t mem, boolean_t first);
1193 extern void vm_page_check_pageable_safe(vm_page_t page);
1194 //end int
1195
1196 //todo int
1197 extern void vm_retire_boot_pages(void);
1198
1199 //todo all int
1200
1201 #define VMP_ERROR_GET(p) ((p)->vmp_error)
1202
1203
1204 #endif /* XNU_KERNEL_PRIVATE */
1205 __END_DECLS
1206
1207 #endif /* _VM_VM_PAGE_INTERNAL_H_ */
1208