xref: /xnu-12377.41.6/osfmk/vm/vm_pageout_xnu.h (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _VM_VM_PAGEOUT_XNU_H_
30 #define _VM_VM_PAGEOUT_XNU_H_
31 
32 #include <sys/cdefs.h>
33 
34 __BEGIN_DECLS
35 #include <vm/vm_pageout.h>
36 
37 #ifdef XNU_KERNEL_PRIVATE
38 
39 extern void memoryshot(unsigned int event, unsigned int control);
40 
41 extern void update_vm_info(void);
42 
43 
44 
45 #if CONFIG_IOSCHED
46 extern int upl_get_cached_tier(
47 	upl_t                   upl);
48 #endif
49 
50 extern void upl_set_iodone(upl_t, void *);
51 extern void upl_set_iodone_error(upl_t, int);
52 extern void upl_callout_iodone(upl_t);
53 
54 extern ppnum_t upl_get_highest_page(
55 	upl_t                   upl);
56 
57 extern upl_t upl_associated_upl(upl_t upl);
58 extern void upl_set_associated_upl(upl_t upl, upl_t associated_upl);
59 extern void upl_set_map_exclusive(upl_t upl);
60 extern void upl_clear_map_exclusive(upl_t upl);
61 extern void upl_set_fs_verify_info(upl_t upl, uint32_t size_per_page);
62 extern bool upl_has_fs_verify_info(upl_t upl);
63 extern uint8_t * upl_fs_verify_buf(upl_t upl, uint32_t *size);
64 
65 
66 #include <vm/vm_kern_xnu.h>
67 
68 
69 extern upl_size_t upl_adjusted_size(
70 	upl_t upl,
71 	vm_map_offset_t page_mask);
72 extern vm_object_offset_t upl_adjusted_offset(
73 	upl_t upl,
74 	vm_map_offset_t page_mask);
75 extern vm_object_offset_t upl_get_data_offset(
76 	upl_t upl);
77 
78 extern kern_return_t vm_map_create_upl(
79 	vm_map_t                map,
80 	vm_map_address_t        offset,
81 	upl_size_t              *upl_size,
82 	upl_t                   *upl,
83 	upl_page_info_array_t   page_list,
84 	unsigned int            *count,
85 	upl_control_flags_t     *flags,
86 	vm_tag_t            tag);
87 
88 extern void               vm_page_free_list(
89 	vm_page_t               mem,
90 	bool                    prepare_object);
91 
92 extern kern_return_t vm_page_alloc_list(
93 	vm_size_t   page_count,
94 	kma_flags_t flags,
95 	vm_page_t  *list);
96 #if XNU_TARGET_OS_OSX
97 extern kern_return_t    vm_pageout_wait(uint64_t deadline);
98 #endif /* XNU_TARGET_OS_OSX */
99 
100 
101 #ifdef  MACH_KERNEL_PRIVATE
102 
103 #include <vm/vm_page.h>
104 
105 extern unsigned int     vm_pageout_scan_event_counter;
106 extern unsigned int     vm_page_anonymous_count;
107 extern thread_t         vm_pageout_scan_thread;
108 extern thread_t         vm_pageout_gc_thread;
109 extern sched_cond_atomic_t vm_pageout_gc_cond;
110 
111 /*
112  * must hold the page queues lock to
113  * manipulate this structure
114  */
115 struct vm_pageout_queue {
116 	vm_page_queue_head_t pgo_pending;  /* laundry pages to be processed by pager's iothread */
117 	unsigned int    pgo_laundry;       /* current count of laundry pages on queue or in flight */
118 	unsigned int    pgo_maxlaundry;
119 
120 	uint32_t
121 	    pgo_busy:1,        /* iothread is currently processing request from pgo_pending */
122 	    pgo_throttled:1,   /* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
123 	    pgo_lowpriority:1, /* iothread is set to use low priority I/O */
124 	    pgo_draining:1,
125 	    pgo_inited:1,
126 	    pgo_unused_bits:26;
127 };
128 
129 #define VM_PAGE_Q_THROTTLED(q)          \
130 	((q)->pgo_laundry >= (q)->pgo_maxlaundry)
131 
132 extern struct   vm_pageout_queue        vm_pageout_queue_internal;
133 extern struct   vm_pageout_queue        vm_pageout_queue_external;
134 
135 /*
136  * This function is redeclared with slightly different parameter types in vfs_cluster.c
137  * This should be fixed at a later time.
138  */
139 extern void vector_upl_set_iostate(upl_t, upl_t, upl_offset_t, upl_size_t);
140 
141 /*
142  *	Routines exported to Mach.
143  */
144 extern void             vm_pageout(void);
145 
146 __startup_func extern void             vm_config_init(void);
147 
148 extern kern_return_t    vm_pageout_internal_start(void);
149 
150 extern void             vm_pageout_object_terminate(
151 	vm_object_t     object);
152 
153 extern void             vm_pageout_cluster(
154 	vm_page_t       m);
155 
156 extern void             vm_pageout_initialize_page(
157 	vm_page_t       m);
158 
159 
160 struct _vector_upl_iostates {
161 	upl_offset_t offset;
162 	upl_size_t   size;
163 };
164 
165 typedef struct _vector_upl_iostates vector_upl_iostates_t;
166 
167 struct _vector_upl {
168 	upl_size_t              size;
169 	uint32_t                num_upls;
170 	uint32_t                invalid_upls;
171 	uint32_t                max_upls;
172 	vm_offset_t             dst_addr;
173 	vm_object_offset_t      offset;
174 	upl_page_info_array_t   pagelist;
175 	struct {
176 		upl_t                   elem;
177 		vector_upl_iostates_t   iostate;
178 	} upls[];
179 };
180 
181 typedef struct _vector_upl* vector_upl_t;
182 
183 /* universal page list structure */
184 
185 #if UPL_DEBUG
186 #define UPL_DEBUG_COMMIT_RECORDS 4
187 
188 struct ucd {
189 	upl_offset_t    c_beg;
190 	upl_offset_t    c_end;
191 	int             c_aborted;
192 	uint32_t        c_btref; /* btref_t */
193 };
194 #endif
195 
196 struct upl_io_completion {
197 	void     *io_context;
198 	void     (*io_done)(void *, int);
199 
200 	int      io_error;
201 };
202 
203 struct upl_fs_verify_info {
204 	uint8_t *verify_data_ptr; /* verification data (hashes) for the data pages in the UPL */
205 	uint32_t verify_data_len; /* the digest size per page (can vary depending on the type of hash) */
206 };
207 
208 struct upl {
209 	decl_lck_mtx_data(, Lock);      /* Synchronization */
210 	int             ref_count;
211 	int             ext_ref_count;
212 	int             flags;
213 	ctid_t          map_addr_owner; /* owning thread for upl_map_range */
214 	/*
215 	 * XXX CAUTION: to accomodate devices with "mixed page sizes",
216 	 * u_offset and u_size are now byte-aligned and no longer
217 	 * page-aligned, on all devices.
218 	 */
219 	vm_object_offset_t u_offset;
220 	upl_size_t      u_size;       /* size in bytes of the address space */
221 	upl_size_t      u_mapped_size;       /* size in bytes of the UPL that is mapped */
222 	vm_offset_t     kaddr;      /* secondary mapping in kernel */
223 	vm_object_t     map_object;
224 	vector_upl_t    vector_upl;
225 	union {
226 		upl_t   associated_upl;
227 		struct  upl_fs_verify_info *verify_info; /* verification data for the data pages in the UPL */
228 	} u_fs_un;
229 	struct upl_io_completion *upl_iodone;
230 	ppnum_t         highest_page;
231 #if CONFIG_IOSCHED
232 	int             upl_priority;
233 	uint64_t        *upl_reprio_info;
234 	void            *decmp_io_upl;
235 #endif
236 #if CONFIG_IOSCHED || UPL_DEBUG
237 	thread_t        upl_creator;
238 	queue_chain_t   uplq;       /* List of outstanding upls on an obj */
239 #endif
240 #if     UPL_DEBUG
241 	uintptr_t       ubc_alias1;
242 	uintptr_t       ubc_alias2;
243 
244 	uint32_t        upl_state;
245 	uint32_t        upl_commit_index;
246 	uint32_t        upl_create_btref; /* btref_t */
247 
248 	struct  ucd     upl_commit_records[UPL_DEBUG_COMMIT_RECORDS];
249 #endif  /* UPL_DEBUG */
250 
251 	bitmap_t       *lite_list;
252 	struct upl_page_info page_list[];
253 };
254 
255 /* upl struct flags */
256 #define UPL_PAGE_LIST_MAPPED    0x1
257 #define UPL_KERNEL_MAPPED       0x2
258 #define UPL_CLEAR_DIRTY         0x4
259 #define UPL_COMPOSITE_LIST      0x8
260 #define UPL_INTERNAL            0x10
261 #define UPL_PAGE_SYNC_DONE      0x20
262 #define UPL_DEVICE_MEMORY       0x40
263 #define UPL_PAGEOUT             0x80
264 #define UPL_LITE                0x100
265 #define UPL_IO_WIRE             0x200
266 #define UPL_ACCESS_BLOCKED      0x400
267 #define UPL_SHADOWED            0x1000
268 #define UPL_KERNEL_OBJECT       0x2000
269 #define UPL_VECTOR              0x4000
270 #define UPL_SET_DIRTY           0x8000
271 #define UPL_HAS_BUSY            0x10000
272 #define UPL_TRACKED_BY_OBJECT   0x20000
273 #define UPL_EXPEDITE_SUPPORTED  0x40000
274 #define UPL_DECMP_REQ           0x80000
275 #define UPL_DECMP_REAL_IO       0x100000
276 #define UPL_MAP_EXCLUSIVE_WAIT  0x200000
277 #define UPL_HAS_FS_VERIFY_INFO  0x400000
278 #define UPL_HAS_WIRED           0x800000
279 
280 /* flags for upl_create flags parameter */
281 #define UPL_CREATE_EXTERNAL     0
282 #define UPL_CREATE_INTERNAL     0x1
283 #define UPL_CREATE_LITE         0x2
284 #define UPL_CREATE_IO_TRACKING  0x4
285 #define UPL_CREATE_EXPEDITE_SUP 0x8
286 
287 extern void vector_upl_deallocate(upl_t);
288 extern void vector_upl_set_addr(upl_t, vm_offset_t);
289 extern void vector_upl_get_addr(upl_t, vm_offset_t*);
290 extern void vector_upl_get_iostate(upl_t, upl_t, upl_offset_t*, upl_size_t*);
291 extern void vector_upl_get_iostate_byindex(upl_t, uint32_t, upl_offset_t*, upl_size_t*);
292 extern upl_t vector_upl_subupl_byindex(upl_t, uint32_t);
293 extern upl_t vector_upl_subupl_byoffset(upl_t, upl_offset_t*, upl_size_t*);
294 
295 
296 extern void vm_page_free_reserve(int pages);
297 
298 #endif  /* MACH_KERNEL_PRIVATE */
299 
300 
301 struct vm_pageout_state {
302 	boolean_t vm_pressure_thread_running;
303 	boolean_t vm_pressure_changed;
304 	boolean_t vm_restricted_to_single_processor;
305 	int vm_compressor_thread_count;
306 
307 	unsigned int vm_page_speculative_q_age_ms;
308 	unsigned int vm_page_speculative_percentage;
309 	unsigned int vm_page_speculative_target;
310 
311 	unsigned int vm_pageout_swap_wait;
312 	unsigned int vm_pageout_idle_wait;      /* milliseconds */
313 	unsigned int vm_pageout_empty_wait;     /* milliseconds */
314 	unsigned int vm_pageout_burst_wait;     /* milliseconds */
315 	unsigned int vm_pageout_deadlock_wait;  /* milliseconds */
316 	unsigned int vm_pageout_deadlock_relief;
317 	unsigned int vm_pageout_burst_inactive_throttle;
318 
319 	unsigned int vm_pageout_inactive;
320 	unsigned int vm_pageout_inactive_used;  /* debugging */
321 	unsigned int vm_pageout_inactive_clean; /* debugging */
322 
323 	uint32_t vm_page_filecache_min;
324 	uint32_t vm_page_filecache_min_divisor;
325 	uint32_t vm_page_xpmapped_min;
326 	uint32_t vm_page_xpmapped_min_divisor;
327 	uint64_t vm_pageout_considered_page_last;
328 
329 	int vm_page_free_count_init;
330 
331 	unsigned int vm_memory_pressure;
332 
333 	int memorystatus_purge_on_critical;
334 	int memorystatus_purge_on_warning;
335 	int memorystatus_purge_on_urgent;
336 
337 	thread_t vm_pageout_early_swapout_iothread;
338 };
339 
340 extern struct vm_pageout_state vm_pageout_state;
341 
342 /*
343  * This structure is used to track the VM_INFO instrumentation
344  */
345 struct vm_pageout_vminfo {
346 	unsigned long vm_pageout_considered_page;
347 	unsigned long vm_pageout_considered_bq_internal;
348 	unsigned long vm_pageout_considered_bq_external;
349 	unsigned long vm_pageout_skipped_external;
350 	unsigned long vm_pageout_skipped_internal;
351 
352 	unsigned long vm_pageout_pages_evicted;
353 	unsigned long vm_pageout_pages_purged;
354 	unsigned long vm_pageout_freed_cleaned;
355 	unsigned long vm_pageout_freed_speculative;
356 	unsigned long vm_pageout_freed_external;
357 	unsigned long vm_pageout_freed_internal;
358 	unsigned long vm_pageout_inactive_dirty_internal;
359 	unsigned long vm_pageout_inactive_dirty_external;
360 	unsigned long vm_pageout_inactive_referenced;
361 	unsigned long vm_pageout_reactivation_limit_exceeded;
362 	unsigned long vm_pageout_inactive_force_reclaim;
363 	unsigned long vm_pageout_inactive_nolock;
364 	unsigned long vm_pageout_filecache_min_reactivated;
365 	unsigned long vm_pageout_scan_inactive_throttled_internal;
366 	unsigned long vm_pageout_scan_inactive_throttled_external;
367 
368 	uint64_t      vm_pageout_compressions;
369 	uint64_t      vm_compressor_pages_grabbed;
370 	unsigned long vm_compressor_failed;
371 
372 	unsigned long vm_page_pages_freed;
373 
374 	unsigned long vm_phantom_cache_found_ghost;
375 	unsigned long vm_phantom_cache_added_ghost;
376 
377 	unsigned long vm_pageout_protected_sharedcache;
378 	unsigned long vm_pageout_forcereclaimed_sharedcache;
379 	unsigned long vm_pageout_protected_realtime;
380 	unsigned long vm_pageout_forcereclaimed_realtime;
381 
382 	uint64_t vm_compactor_major_compactions_completed;
383 	uint64_t vm_compactor_major_compactions_considered;
384 	uint64_t vm_compactor_major_compactions_bailed;
385 	uint64_t vm_compactor_major_compaction_bytes_freed;
386 	uint64_t vm_compactor_major_compaction_bytes_moved;
387 	uint64_t vm_compactor_major_compaction_slots_moved;
388 	uint64_t vm_compactor_major_compaction_segments_freed;
389 	uint64_t vm_compactor_swapouts_queued;
390 	uint64_t vm_compactor_swapout_bytes_wasted;
391 };
392 
393 extern struct vm_pageout_vminfo vm_pageout_vminfo;
394 
395 extern void vm_swapout_thread(void);
396 
397 #if DEVELOPMENT || DEBUG
398 
399 /*
400  *	This structure records the pageout daemon's actions:
401  *	how many pages it looks at and what happens to those pages.
402  *	No locking needed because only one thread modifies the fields.
403  */
404 struct vm_pageout_debug {
405 	uint32_t vm_pageout_balanced;
406 	uint32_t vm_pageout_scan_event_counter;
407 	uint32_t vm_pageout_speculative_dirty;
408 
409 	uint32_t vm_pageout_inactive_busy;
410 	uint32_t vm_pageout_inactive_absent;
411 	uint32_t vm_pageout_inactive_notalive;
412 	uint32_t vm_pageout_inactive_error;
413 	uint32_t vm_pageout_inactive_deactivated;
414 
415 	uint32_t vm_pageout_enqueued_cleaned;
416 
417 	uint32_t vm_pageout_cleaned_busy;
418 	uint32_t vm_pageout_cleaned_nolock;
419 	uint32_t vm_pageout_cleaned_reference_reactivated;
420 	uint32_t vm_pageout_cleaned_volatile_reactivated;
421 	uint32_t vm_pageout_cleaned_reactivated;  /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
422 	uint32_t vm_pageout_cleaned_fault_reactivated;
423 
424 	uint32_t vm_pageout_dirty_no_pager;
425 	uint32_t vm_pageout_purged_objects;
426 
427 	uint32_t vm_pageout_scan_throttle;
428 	uint32_t vm_pageout_scan_reclaimed_throttled;
429 	uint32_t vm_pageout_scan_burst_throttle;
430 	uint32_t vm_pageout_scan_empty_throttle;
431 	uint32_t vm_pageout_scan_swap_throttle;
432 	uint32_t vm_pageout_scan_deadlock_detected;
433 	uint32_t vm_pageout_scan_inactive_throttle_success;
434 	uint32_t vm_pageout_scan_throttle_deferred;
435 
436 	uint32_t vm_pageout_inactive_external_forced_jetsam_count;
437 
438 	uint32_t vm_grab_anon_overrides;
439 	uint32_t vm_grab_anon_nops;
440 
441 	uint32_t vm_pageout_no_victim;
442 	uint32_t vm_pageout_yield_for_free_pages;
443 	unsigned long vm_pageout_throttle_up_count;
444 	uint32_t vm_page_steal_pageout_page;
445 
446 	uint32_t vm_cs_validated_resets;
447 	uint32_t vm_object_iopl_request_sleep_for_cleaning;
448 	uint32_t vm_page_slide_counter;
449 	uint32_t vm_page_slide_errors;
450 	uint32_t vm_page_throttle_count;
451 	/*
452 	 * Statistics about UPL enforcement of copy-on-write obligations.
453 	 */
454 	unsigned long upl_cow;
455 	unsigned long upl_cow_again;
456 	unsigned long upl_cow_pages;
457 	unsigned long upl_cow_again_pages;
458 	unsigned long iopl_cow;
459 	unsigned long iopl_cow_pages;
460 };
461 
462 extern struct vm_pageout_debug vm_pageout_debug;
463 
464 #define VM_PAGEOUT_DEBUG(member, value)                 \
465 	MACRO_BEGIN                                     \
466 	        vm_pageout_debug.member += value;       \
467 	MACRO_END
468 #else /* DEVELOPMENT || DEBUG */
469 #define VM_PAGEOUT_DEBUG(member, value)
470 #endif /* DEVELOPMENT || DEBUG */
471 
472 #define MAX_COMPRESSOR_THREAD_COUNT      8
473 
474 /*
475  * Forward declarations for internal routines.
476  */
477 
478 /*
479  * Contains relevant state for pageout iothreads. Some state is unused by
480  * external (file-backed) thread.
481  */
482 struct pgo_iothread_state {
483 	struct vm_pageout_queue *q;
484 	// cheads unused by external thread
485 	void                    *current_early_swapout_chead;
486 	void                    *current_regular_swapout_cheads[COMPRESSOR_PAGEOUT_CHEADS_MAX_COUNT];
487 	void                    *current_late_swapout_chead;
488 	char                    *scratch_buf;
489 	int                     id;
490 	thread_t                pgo_iothread; // holds a +1 ref
491 	sched_cond_atomic_t     pgo_wakeup;
492 #if DEVELOPMENT || DEBUG
493 	// for perf_compressor benchmark
494 	struct vm_pageout_queue *benchmark_q;
495 #endif /* DEVELOPMENT || DEBUG */
496 };
497 
498 extern struct pgo_iothread_state pgo_iothread_internal_state[MAX_COMPRESSOR_THREAD_COUNT];
499 
500 extern struct pgo_iothread_state pgo_iothread_external_state;
501 
502 struct vm_compressor_swapper_stats {
503 	uint64_t unripe_under_30s;
504 	uint64_t unripe_under_60s;
505 	uint64_t unripe_under_300s;
506 	uint64_t reclaim_swapins;
507 	uint64_t defrag_swapins;
508 	uint64_t compressor_swap_threshold_exceeded;
509 	uint64_t external_q_throttled;
510 	uint64_t free_count_below_reserve;
511 	uint64_t thrashing_detected;
512 	uint64_t fragmentation_detected;
513 };
514 extern struct vm_compressor_swapper_stats vmcs_stats;
515 
516 #if DEVELOPMENT || DEBUG
517 typedef struct vmct_stats_s {
518 	uint64_t vmct_runtimes[MAX_COMPRESSOR_THREAD_COUNT];
519 	uint64_t vmct_pages[MAX_COMPRESSOR_THREAD_COUNT];
520 	uint64_t vmct_iterations[MAX_COMPRESSOR_THREAD_COUNT];
521 	// total mach absolute time that compressor threads has been running
522 	uint64_t vmct_cthreads_total;
523 	int32_t vmct_minpages[MAX_COMPRESSOR_THREAD_COUNT];
524 	int32_t vmct_maxpages[MAX_COMPRESSOR_THREAD_COUNT];
525 } vmct_stats_t;
526 
527 kern_return_t
528 run_compressor_perf_test(
529 	user_addr_t buf,
530 	size_t buffer_size,
531 	uint64_t *time,
532 	uint64_t *bytes_compressed,
533 	uint64_t *compressor_growth);
534 
535 #endif /* DEVELOPMENT || DEBUG */
536 
537 #endif /* XNU_KERNEL_PRIVATE */
538 __END_DECLS
539 
540 #endif  /* _VM_VM_PAGEOUT_XNU_H_ */
541