1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store_internal.h"
30 #include <vm/vm_pageout_xnu.h>
31 #include <vm/vm_protos_internal.h>
32 #include <vm/vm_kern_xnu.h>
33 #include <vm/vm_map_xnu.h>
34 #include <vm/vm_compressor_internal.h>
35 #include <vm/vm_iokit.h>
36 #include <vm/vm_map_internal.h>
37
38 #include <IOKit/IOHibernatePrivate.h>
39 #include <kern/policy_internal.h>
40 #include <sys/kern_memorystatus_xnu.h>
41
42 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
43 LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
44
45 #if defined(XNU_TARGET_OS_OSX)
46 /*
47 * launchd explicitly turns ON swap later during boot on macOS devices.
48 */
49 boolean_t compressor_store_stop_compaction = TRUE;
50 #else
51 boolean_t compressor_store_stop_compaction = FALSE;
52 #endif
53
54 boolean_t vm_swapfile_create_needed = FALSE;
55 boolean_t vm_swapfile_gc_needed = FALSE;
56
57 int vm_swapper_throttle = -1;
58 uint64_t vm_swapout_thread_id;
59
60 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
61 uint64_t vm_swap_get_failures = 0; /* Fatal */
62 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
63 int vm_num_swap_files_config = 0;
64 int vm_num_swap_files = 0;
65 int vm_num_pinned_swap_files = 0;
66 uint64_t vm_swap_volume_capacity = 0;
67 int vm_swapout_thread_processed_segments = 0;
68 int vm_swapout_thread_awakened = 0;
69 bool vm_swapout_thread_running = FALSE;
70 _Atomic bool vm_swapout_wake_pending = false;
71 int vm_swapfile_create_thread_awakened = 0;
72 int vm_swapfile_create_thread_running = 0;
73 int vm_swapfile_gc_thread_awakened = 0;
74 int vm_swapfile_gc_thread_running = 0;
75
76 int64_t vm_swappin_avail = 0;
77 boolean_t vm_swappin_enabled = FALSE;
78 unsigned int vm_swapfile_total_segs_alloced = 0;
79 unsigned int vm_swapfile_total_segs_alloced_max = 0;
80 unsigned int vm_swapfile_total_segs_used = 0;
81 unsigned int vm_swapfile_total_segs_used_max = 0;
82
83 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
84
85 extern vm_map_t compressor_map;
86 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
87
88 #define SWAP_READY 0x1 /* Swap file is ready to be used */
89 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
90 #define SWAP_WANTED 0x4 /* Swap file has waiters */
91 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
92 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
93
94
95 struct swapfile {
96 queue_head_t swp_queue; /* list of swap files */
97 char *swp_path; /* saved pathname of swap file */
98 struct vnode *swp_vp; /* backing vnode */
99 uint64_t swp_size; /* size of this swap file */
100 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
101 unsigned int swp_pathlen; /* length of pathname */
102 unsigned int swp_nsegs; /* #segments we can use */
103 unsigned int swp_nseginuse; /* #segments in use */
104 unsigned int swp_index; /* index of this swap file */
105 unsigned int swp_flags; /* state of swap file */
106 unsigned int swp_free_hint; /* offset of 1st free chunk */
107 unsigned int swp_io_count; /* count of outstanding I/Os */
108 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
109
110 struct trim_list *swp_delayed_trim_list_head;
111 unsigned int swp_delayed_trim_count;
112 };
113
114 queue_head_t swf_global_queue;
115 boolean_t swp_trim_supported = FALSE;
116
117 extern uint64_t dont_trim_until_ts;
118 uint64_t vm_swapfile_last_failed_to_create_ts = 0;
119 uint64_t vm_swapfile_last_successful_create_ts = 0;
120 static bool vm_swapfile_can_be_created = false;
121 static bool delayed_trim_handling_in_progress = false;
122
123 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
124
125 static void vm_swapout_thread_throttle_adjust(void);
126 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
127 static void vm_swapfile_create_thread(void);
128 static void vm_swapfile_gc_thread(void);
129 static void vm_swap_defragment(void);
130 static void vm_swap_handle_delayed_trims(boolean_t);
131 static void vm_swap_do_delayed_trim(struct swapfile *);
132 static void vm_swap_wait_on_trim_handling_in_progress(void);
133 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
134
135 extern int vnode_getwithref(struct vnode* vp);
136
137 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
138
139 #if !XNU_TARGET_OS_OSX
140
141 /*
142 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
143 * number of swapfiles allowed. That increases wired memory overhead.
144 * So we want to keep the max swapfiles same on both DEV/RELEASE so
145 * that the memory overhead is similar for performance comparisons.
146 */
147 #define VM_MAX_SWAP_FILE_NUM 5
148 #if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
149 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
150 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
151 #else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
152 /*
153 * We reserve compressor pool VA at boot for the max # of swap files. If someone
154 * has enabled app swap but we're not an arm large memory device we can't hog
155 * all of the VA so we only go up to 4GB.
156 */
157 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
158 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
159 #endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
160 #define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))
161
162 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
163
164 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
165 #define VM_SWAP_SHOULD_PIN(_size) FALSE
166 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
167
168 #else /* !XNU_TARGET_OS_OSX */
169
170 #define VM_MAX_SWAP_FILE_NUM 100
171 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
172
173 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
174 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
175 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
176
177 #endif /* !XNU_TARGET_OS_OSX */
178
179 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
180 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
181
182 #define VM_SWAP_BUSY() (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
183
184
185 #if CHECKSUM_THE_SWAP
186 extern unsigned int hash_string(char *cp, int len);
187 #endif
188
189 #if RECORD_THE_COMPRESSED_DATA
190 boolean_t c_compressed_record_init_done = FALSE; /* was the record file opened? */
191 int c_compressed_record_write_error = 0;
192 struct vnode *c_compressed_record_vp = NULL; /* the file opened for record write */
193 uint64_t c_compressed_record_file_offset = 0; /* next write offset */
194 void c_compressed_record_init(void);
195 void c_compressed_record_write(char *, int);
196 #endif
197
198 extern void vm_pageout_io_throttle(void);
199
200 static struct swapfile *vm_swapfile_for_handle(uint64_t);
201
202 /*
203 * Called with the vm_swap_data_lock held.
204 */
205
206 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)207 vm_swapfile_for_handle(uint64_t f_offset)
208 {
209 uint64_t file_offset = 0;
210 unsigned int swapfile_index = 0;
211 struct swapfile* swf = NULL;
212
213 file_offset = (f_offset & SWAP_SLOT_MASK);
214 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
215
216 swf = (struct swapfile*) queue_first(&swf_global_queue);
217
218 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
219 if (swapfile_index == swf->swp_index) {
220 break;
221 }
222
223 swf = (struct swapfile*) queue_next(&swf->swp_queue);
224 }
225
226 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
227 swf = NULL;
228 }
229
230 return swf;
231 }
232
233 #if ENCRYPTED_SWAP
234
235 #include <libkern/crypto/aesxts.h>
236
237 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
238
239 boolean_t swap_crypt_initialized;
240 void swap_crypt_initialize(void);
241
242 symmetric_xts xts_modectx;
243 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
244 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
245
246 #if DEVELOPMENT || DEBUG
247 boolean_t swap_crypt_xts_tested = FALSE;
248 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
249 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
250 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
251 #endif /* DEVELOPMENT || DEBUG */
252
253 unsigned long vm_page_encrypt_counter;
254 unsigned long vm_page_decrypt_counter;
255
256
257 void
swap_crypt_initialize(void)258 swap_crypt_initialize(void)
259 {
260 uint8_t *enckey1, *enckey2;
261 int keylen1, keylen2;
262 int error;
263
264 assert(swap_crypt_initialized == FALSE);
265
266 keylen1 = sizeof(swap_crypt_key1);
267 enckey1 = (uint8_t *)&swap_crypt_key1;
268 keylen2 = sizeof(swap_crypt_key2);
269 enckey2 = (uint8_t *)&swap_crypt_key2;
270
271 error = cc_rand_generate((void *)enckey1, keylen1);
272 assert(!error);
273
274 error = cc_rand_generate((void *)enckey2, keylen2);
275 assert(!error);
276
277 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
278 assert(!error);
279
280 swap_crypt_initialized = TRUE;
281
282 #if DEVELOPMENT || DEBUG
283 uint8_t *encptr;
284 uint8_t *decptr;
285 uint8_t *refptr;
286 uint8_t *iv;
287 uint64_t ivnum[2];
288 int size = 0;
289 int i = 0;
290 int rc = 0;
291
292 assert(swap_crypt_xts_tested == FALSE);
293
294 /*
295 * Validate the encryption algorithms.
296 *
297 * First initialize the test data.
298 */
299 for (i = 0; i < 4096; i++) {
300 swap_crypt_test_page_ref[i] = (char) i;
301 }
302 ivnum[0] = (uint64_t)0xaa;
303 ivnum[1] = 0;
304 iv = (uint8_t *)ivnum;
305
306 refptr = (uint8_t *)swap_crypt_test_page_ref;
307 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
308 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
309 size = 4096;
310
311 /* encrypt */
312 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
313 assert(!rc);
314
315 /* compare result with original - should NOT match */
316 for (i = 0; i < 4096; i++) {
317 if (swap_crypt_test_page_encrypt[i] !=
318 swap_crypt_test_page_ref[i]) {
319 break;
320 }
321 }
322 assert(i != 4096);
323
324 /* decrypt */
325 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
326 assert(!rc);
327
328 /* compare result with original */
329 for (i = 0; i < 4096; i++) {
330 if (swap_crypt_test_page_decrypt[i] !=
331 swap_crypt_test_page_ref[i]) {
332 panic("encryption test failed");
333 }
334 }
335 /* encrypt in place */
336 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
337 assert(!rc);
338
339 /* decrypt in place */
340 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
341 assert(!rc);
342
343 for (i = 0; i < 4096; i++) {
344 if (swap_crypt_test_page_decrypt[i] !=
345 swap_crypt_test_page_ref[i]) {
346 panic("in place encryption test failed");
347 }
348 }
349 swap_crypt_xts_tested = TRUE;
350 #endif /* DEVELOPMENT || DEBUG */
351 }
352
353
354 void
vm_swap_encrypt(c_segment_t c_seg)355 vm_swap_encrypt(c_segment_t c_seg)
356 {
357 uint8_t *ptr;
358 uint8_t *iv;
359 uint64_t ivnum[2];
360 int size = 0;
361 int rc = 0;
362
363 if (swap_crypt_initialized == FALSE) {
364 swap_crypt_initialize();
365 }
366
367 /*
368 * Data stored in the compressor should never need to be faulted in.
369 * Make sure pages storing data that we're encrypting cannot
370 * be stolen out from under us in the off chance that the mapping
371 * gets disconnected while we're actively encrypting.
372 */
373 PAGE_REPLACEMENT_DISALLOWED(TRUE);
374 #if DEVELOPMENT || DEBUG
375 C_SEG_MAKE_WRITEABLE(c_seg);
376 #endif
377 ptr = (uint8_t *)c_seg->c_store.c_buffer;
378 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
379
380 ivnum[0] = (uint64_t)c_seg;
381 ivnum[1] = 0;
382 iv = (uint8_t *)ivnum;
383
384 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
385 assert(!rc);
386
387 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
388
389 #if DEVELOPMENT || DEBUG
390 C_SEG_WRITE_PROTECT(c_seg);
391 #endif
392 PAGE_REPLACEMENT_DISALLOWED(FALSE);
393 }
394
395 void
vm_swap_decrypt(c_segment_t c_seg,bool disallow_page_replacement)396 vm_swap_decrypt(c_segment_t c_seg, bool disallow_page_replacement)
397 {
398 uint8_t *ptr;
399 uint8_t *iv;
400 uint64_t ivnum[2];
401 int size = 0;
402 int rc = 0;
403
404 assert(swap_crypt_initialized);
405
406 /*
407 * See comment in vm_swap_encrypt().
408 * The master lock may already be held, though, which is why we don't do
409 * PAGE_REPLACEMENT_DISALLOWED(TRUE) and do a try_lock instead.
410 */
411 if (disallow_page_replacement) {
412 PAGE_REPLACEMENT_DISALLOWED(TRUE);
413 }
414
415 #if DEVELOPMENT || DEBUG
416 C_SEG_MAKE_WRITEABLE(c_seg);
417 #endif
418 ptr = (uint8_t *)c_seg->c_store.c_buffer;
419 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
420
421 ivnum[0] = (uint64_t)c_seg;
422 ivnum[1] = 0;
423 iv = (uint8_t *)ivnum;
424
425 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
426 assert(!rc);
427
428 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
429
430 #if DEVELOPMENT || DEBUG
431 C_SEG_WRITE_PROTECT(c_seg);
432 #endif
433 if (disallow_page_replacement) {
434 PAGE_REPLACEMENT_DISALLOWED(FALSE);
435 }
436 }
437 #endif /* ENCRYPTED_SWAP */
438
439 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
440 extern bool memorystatus_swap_all_apps;
441
442 void
vm_compressor_swap_init_swap_file_limit(void)443 vm_compressor_swap_init_swap_file_limit(void)
444 {
445 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
446 #if CONFIG_JETSAM
447 if (memorystatus_swap_all_apps) {
448 if (vm_swap_volume_capacity == 0) {
449 /*
450 * Early in boot we don't know the swap volume capacity.
451 * That's fine. Reserve space for the maximum config
452 * and we'll lower this later in boot once we have the capacity.
453 */
454 vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
455 } else {
456 static uint64_t kFixedPointFactor = 100;
457 /*
458 * Scale the max number of swap files linearly.
459 * But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
460 */
461 vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
462 * VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
463 vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
464 vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
465 }
466 }
467 #endif /* CONFIG_JETSAM */
468 #if DEVELOPMENT || DEBUG
469 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
470 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
471 if (parsed_vm_max_num_swap_files > 0) {
472 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
473 } else {
474 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
475 }
476 }
477 #endif
478 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
479 }
480
481 int vm_swap_enabled = 0;
482 void
vm_compressor_swap_init(void)483 vm_compressor_swap_init(void)
484 {
485 thread_t thread = NULL;
486
487 queue_init(&swf_global_queue);
488
489 #if !XNU_TARGET_OS_OSX
490 /*
491 * dummy value until the swap file gets created
492 * when we drive the first c_segment_t to the
493 * swapout queue... at that time we will
494 * know the true size we have to work with
495 */
496 c_overage_swapped_limit = 16;
497 #endif /* !XNU_TARGET_OS_OSX */
498
499 compressed_swap_chunk_size = c_seg_bufsize;
500 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
501 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
502 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
503
504 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
505 BASEPRI_VM, &thread) != KERN_SUCCESS) {
506 panic("vm_swapout_thread: create failed");
507 }
508 thread_set_thread_name(thread, "VM_swapout");
509 vm_swapout_thread_id = thread->thread_id;
510 thread_deallocate(thread);
511
512 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
513 BASEPRI_VM, &thread) != KERN_SUCCESS) {
514 panic("vm_swapfile_create_thread: create failed");
515 }
516 thread_set_thread_name(thread, "VM_swapfile_create");
517 thread_deallocate(thread);
518
519 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
520 BASEPRI_VM, &thread) != KERN_SUCCESS) {
521 panic("vm_swapfile_gc_thread: create failed");
522 }
523 thread_set_thread_name(thread, "VM_swapfile_gc");
524 /*
525 * Swapfile garbage collection will need to allocate memory
526 * to complete its swap reclaim and in-memory compaction.
527 * So allow it to dip into the reserved VM page pool.
528 */
529 thread_lock(thread);
530 thread->options |= TH_OPT_VMPRIV;
531 thread_unlock(thread);
532 thread_deallocate(thread);
533 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
534 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
535 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
536 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
537
538 vm_swap_enabled = 1;
539 printf("VM Swap Subsystem is ON\n");
540 }
541
542
543 #if RECORD_THE_COMPRESSED_DATA
544
545 void
c_compressed_record_init()546 c_compressed_record_init()
547 {
548 if (c_compressed_record_init_done == FALSE) {
549 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
550 c_compressed_record_init_done = TRUE;
551 }
552 }
553
554 void
c_compressed_record_write(char * buf,int size)555 c_compressed_record_write(char *buf, int size)
556 {
557 if (c_compressed_record_write_error == 0) {
558 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
559 c_compressed_record_file_offset += size;
560 }
561 }
562 #endif
563
564
565 int compaction_swapper_inited = 0;
566
567 void
vm_compaction_swapper_do_init(void)568 vm_compaction_swapper_do_init(void)
569 {
570 struct vnode *vp;
571 char *pathname;
572 int namelen;
573
574 if (compaction_swapper_inited) {
575 return;
576 }
577
578 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
579 compaction_swapper_inited = 1;
580 return;
581 }
582 lck_mtx_lock(&vm_swap_data_lock);
583
584 if (!compaction_swapper_inited) {
585 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
586 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
587 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
588
589 vm_swapfile_open(pathname, &vp);
590
591 if (vp) {
592 if (vnode_pager_isSSD(vp) == FALSE) {
593 /*
594 * swap files live on an HDD, so let's make sure to start swapping
595 * much earlier since we're not worried about SSD write-wear and
596 * we have so little write bandwidth to work with
597 * these values were derived expermentially by running the performance
598 * teams stock test for evaluating HDD performance against various
599 * combinations and looking and comparing overall results.
600 * Note that the > relationship between these 4 values must be maintained
601 */
602 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
603 vm_compressor_minorcompact_threshold_divisor = 15;
604 }
605 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
606 vm_compressor_majorcompact_threshold_divisor = 18;
607 }
608 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
609 vm_compressor_unthrottle_threshold_divisor = 24;
610 }
611 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
612 vm_compressor_catchup_threshold_divisor = 30;
613 }
614 }
615 #if XNU_TARGET_OS_OSX
616 vnode_setswapmount(vp);
617 vm_swappin_avail = vnode_getswappin_avail(vp);
618
619 if (vm_swappin_avail) {
620 vm_swappin_enabled = TRUE;
621 }
622 #endif /* XNU_TARGET_OS_OSX */
623 vm_swapfile_close((uint64_t)pathname, vp);
624 }
625 kfree_data(pathname, namelen);
626
627 compaction_swapper_inited = 1;
628 }
629 lck_mtx_unlock(&vm_swap_data_lock);
630 }
631
632
633 void
vm_swap_consider_defragmenting(int flags)634 vm_swap_consider_defragmenting(int flags)
635 {
636 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
637 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
638
639 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
640 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
641 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
642 lck_mtx_lock(&vm_swap_data_lock);
643
644 if (force_defrag) {
645 vm_swap_force_defrag = TRUE;
646 }
647
648 if (force_reclaim) {
649 vm_swap_force_reclaim = TRUE;
650 }
651
652 if (!vm_swapfile_gc_thread_running) {
653 thread_wakeup((event_t) &vm_swapfile_gc_needed);
654 }
655
656 lck_mtx_unlock(&vm_swap_data_lock);
657 }
658 }
659 }
660
661
662 int vm_swap_defragment_yielded = 0;
663 int vm_swap_defragment_swapin = 0;
664 int vm_swap_defragment_free = 0;
665 int vm_swap_defragment_busy = 0;
666
667 static void
vm_swap_defragment()668 vm_swap_defragment()
669 {
670 c_segment_t c_seg;
671
672 /*
673 * have to grab the master lock w/o holding
674 * any locks in spin mode
675 */
676 PAGE_REPLACEMENT_DISALLOWED(TRUE);
677
678 lck_mtx_lock_spin_always(c_list_lock);
679
680 while (!queue_empty(&c_swappedout_sparse_list_head)) {
681 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
682 vm_swap_defragment_yielded++;
683 break;
684 }
685 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
686
687 lck_mtx_lock_spin_always(&c_seg->c_lock);
688
689 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
690
691 if (c_seg->c_busy) {
692 lck_mtx_unlock_always(c_list_lock);
693
694 PAGE_REPLACEMENT_DISALLOWED(FALSE);
695 /*
696 * c_seg_wait_on_busy consumes c_seg->c_lock
697 */
698 c_seg_wait_on_busy(c_seg);
699
700 PAGE_REPLACEMENT_DISALLOWED(TRUE);
701
702 lck_mtx_lock_spin_always(c_list_lock);
703
704 vm_swap_defragment_busy++;
705 continue;
706 }
707 if (c_seg->c_bytes_used == 0) {
708 /*
709 * c_seg_free_locked consumes the c_list_lock
710 * and c_seg->c_lock
711 */
712 C_SEG_BUSY(c_seg);
713 c_seg_free_locked(c_seg);
714
715 vm_swap_defragment_free++;
716 } else {
717 lck_mtx_unlock_always(c_list_lock);
718
719 #if CONFIG_FREEZE
720 if (freezer_incore_cseg_acct) {
721 /*
722 * TODO(jason): These two are tricky because they're pre-emptive jetsams.
723 * The system is not unhealthy, but we know that it's about to become unhealthy once
724 * we do this swapin.
725 * So we're waking up the memorystatus thread to make space
726 * (hopefully) before this segment comes in.
727 *
728 * I think the compressor_backing_store needs to keep track of
729 * two new globals that will track the number of segments
730 * being swapped in due to defrag and the number of slots used
731 * in those segments.
732 * Then the health check below can be called from the memorystatus
733 * thread.
734 */
735 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
736 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
737 }
738
739 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
740 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
741 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
742 }
743 }
744 #endif /* CONFIG_FREEZE */
745 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
746 lck_mtx_unlock_always(&c_seg->c_lock);
747 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
748 }
749
750 vm_swap_defragment_swapin++;
751 }
752 PAGE_REPLACEMENT_DISALLOWED(FALSE);
753
754 vm_pageout_io_throttle();
755
756 /*
757 * because write waiters have privilege over readers,
758 * dropping and immediately retaking the master lock will
759 * still allow any thread waiting to acquire the
760 * master lock exclusively an opportunity to take it
761 */
762 PAGE_REPLACEMENT_DISALLOWED(TRUE);
763
764 lck_mtx_lock_spin_always(c_list_lock);
765 }
766 lck_mtx_unlock_always(c_list_lock);
767
768 PAGE_REPLACEMENT_DISALLOWED(FALSE);
769 }
770
771 TUNABLE(uint64_t, vm_swapfile_creation_delay_ns, "vm_swapfile_creation_delay_ns", 15 * NSEC_PER_SEC);
772
773 static inline bool
vm_swapfile_should_create(uint64_t now)774 vm_swapfile_should_create(uint64_t now)
775 {
776 uint64_t delta_failed_creation_ns;
777 absolutetime_to_nanoseconds(now - vm_swapfile_last_failed_to_create_ts, &delta_failed_creation_ns);
778
779 return (vm_num_swap_files < vm_num_swap_files_config) &&
780 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) &&
781 (delta_failed_creation_ns > vm_swapfile_creation_delay_ns);
782 }
783
784 bool vm_swapfile_create_thread_inited = false;
785
786 static void
vm_swapfile_create_thread(void)787 vm_swapfile_create_thread(void)
788 {
789 uint64_t now;
790
791 if (!vm_swapfile_create_thread_inited) {
792 #if CONFIG_THREAD_GROUPS
793 thread_group_vm_add();
794 #endif /* CONFIG_THREAD_GROUPS */
795 current_thread()->options |= TH_OPT_VMPRIV;
796
797 vm_swapfile_create_thread_inited = true;
798 }
799
800 vm_swapfile_create_thread_awakened++;
801 vm_swapfile_create_thread_running = 1;
802
803 while (TRUE) {
804 /*
805 * walk through the list of swap files
806 * and do the delayed frees/trims for
807 * any swap file whose count of delayed
808 * frees is above the batch limit
809 */
810 vm_swap_handle_delayed_trims(FALSE);
811
812 lck_mtx_lock(&vm_swap_data_lock);
813
814 if (hibernate_in_progress_with_pinned_swap == TRUE) {
815 break;
816 }
817
818 if (compressor_store_stop_compaction == TRUE) {
819 break;
820 }
821
822 now = mach_absolute_time();
823
824 if (!vm_swapfile_should_create(now)) {
825 break;
826 }
827
828 lck_mtx_unlock(&vm_swap_data_lock);
829
830 if (vm_swap_create_file() == FALSE) {
831 vm_swapfile_last_failed_to_create_ts = now;
832 HIBLOG("low swap: failed to create swapfile\n");
833 } else {
834 vm_swapfile_last_successful_create_ts = now;
835 }
836 }
837 vm_swapfile_create_thread_running = 0;
838
839 if (hibernate_in_progress_with_pinned_swap == TRUE) {
840 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
841 }
842
843 if (compressor_store_stop_compaction == TRUE) {
844 thread_wakeup((event_t)&compressor_store_stop_compaction);
845 }
846
847 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
848
849 lck_mtx_unlock(&vm_swap_data_lock);
850
851 thread_block((thread_continue_t)vm_swapfile_create_thread);
852
853 /* NOTREACHED */
854 }
855
856
857 #if HIBERNATION
858
859 kern_return_t
hibernate_pin_swap(boolean_t start)860 hibernate_pin_swap(boolean_t start)
861 {
862 vm_compaction_swapper_do_init();
863
864 if (start == FALSE) {
865 lck_mtx_lock(&vm_swap_data_lock);
866 hibernate_in_progress_with_pinned_swap = FALSE;
867 lck_mtx_unlock(&vm_swap_data_lock);
868
869 return KERN_SUCCESS;
870 }
871 if (vm_swappin_enabled == FALSE) {
872 return KERN_SUCCESS;
873 }
874
875 lck_mtx_lock(&vm_swap_data_lock);
876
877 hibernate_in_progress_with_pinned_swap = TRUE;
878
879 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
880 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
881
882 lck_mtx_unlock(&vm_swap_data_lock);
883
884 thread_block(THREAD_CONTINUE_NULL);
885
886 lck_mtx_lock(&vm_swap_data_lock);
887 }
888 if (vm_num_swap_files > vm_num_pinned_swap_files) {
889 hibernate_in_progress_with_pinned_swap = FALSE;
890 lck_mtx_unlock(&vm_swap_data_lock);
891
892 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
893 vm_num_swap_files, vm_num_pinned_swap_files);
894 return KERN_FAILURE;
895 }
896 lck_mtx_unlock(&vm_swap_data_lock);
897
898 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
899 if (vm_swap_create_file() == FALSE) {
900 break;
901 }
902 }
903 return KERN_SUCCESS;
904 }
905 #endif
906 bool vm_swapfile_gc_thread_inited = false;
907 static void
vm_swapfile_gc_thread(void)908 vm_swapfile_gc_thread(void)
909 {
910 boolean_t need_defragment;
911 boolean_t need_reclaim;
912
913 if (!vm_swapfile_gc_thread_inited) {
914 #if CONFIG_THREAD_GROUPS
915 thread_group_vm_add();
916 #endif /* CONFIG_THREAD_GROUPS */
917 vm_swapfile_gc_thread_inited = true;
918 }
919
920 vm_swapfile_gc_thread_awakened++;
921 vm_swapfile_gc_thread_running = 1;
922
923 while (TRUE) {
924 lck_mtx_lock(&vm_swap_data_lock);
925
926 if (hibernate_in_progress_with_pinned_swap == TRUE) {
927 break;
928 }
929
930 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
931 break;
932 }
933
934 need_defragment = FALSE;
935 need_reclaim = FALSE;
936
937 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
938 need_defragment = TRUE;
939 }
940
941 if (VM_SWAP_SHOULD_RECLAIM()) {
942 need_defragment = TRUE;
943 need_reclaim = TRUE;
944 }
945 if (need_defragment == FALSE && need_reclaim == FALSE) {
946 break;
947 }
948
949 vm_swap_force_defrag = FALSE;
950 vm_swap_force_reclaim = FALSE;
951
952 lck_mtx_unlock(&vm_swap_data_lock);
953
954 if (need_defragment == TRUE) {
955 vm_swap_defragment();
956 }
957 if (need_reclaim == TRUE) {
958 vm_swap_reclaim();
959 }
960 }
961 vm_swapfile_gc_thread_running = 0;
962
963 if (hibernate_in_progress_with_pinned_swap == TRUE) {
964 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
965 }
966
967 if (compressor_store_stop_compaction == TRUE) {
968 thread_wakeup((event_t)&compressor_store_stop_compaction);
969 }
970
971 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
972
973 lck_mtx_unlock(&vm_swap_data_lock);
974
975 thread_block((thread_continue_t)vm_swapfile_gc_thread);
976
977 /* NOTREACHED */
978 }
979
980
981
982 #define VM_SWAPOUT_LIMIT_T2P 4
983 #define VM_SWAPOUT_LIMIT_T1P 4
984 #define VM_SWAPOUT_LIMIT_T0P 6
985 #define VM_SWAPOUT_LIMIT_T0 8
986 #define VM_SWAPOUT_LIMIT_MAX 8
987
988 #define VM_SWAPOUT_START 0
989 #define VM_SWAPOUT_T2_PASSIVE 1
990 #define VM_SWAPOUT_T1_PASSIVE 2
991 #define VM_SWAPOUT_T0_PASSIVE 3
992 #define VM_SWAPOUT_T0 4
993
994 int vm_swapout_state = VM_SWAPOUT_START;
995 int vm_swapout_limit = 1;
996
997 int vm_swapper_entered_T0 = 0;
998 int vm_swapper_entered_T0P = 0;
999 int vm_swapper_entered_T1P = 0;
1000 int vm_swapper_entered_T2P = 0;
1001
1002
1003 static void
vm_swapout_thread_throttle_adjust(void)1004 vm_swapout_thread_throttle_adjust(void)
1005 {
1006 switch (vm_swapout_state) {
1007 case VM_SWAPOUT_START:
1008
1009 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1010 vm_swapper_entered_T2P++;
1011
1012 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1013 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1014 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1015 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1016 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1017 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1018
1019 break;
1020
1021 case VM_SWAPOUT_T2_PASSIVE:
1022
1023 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1024 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1025 vm_swapper_entered_T0P++;
1026
1027 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1028 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1029 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1030 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1031 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1032 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1033
1034 break;
1035 }
1036 if (swapout_target_age || hibernate_flushing == TRUE) {
1037 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
1038 vm_swapper_entered_T1P++;
1039
1040 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1041 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1042 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1043 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1044 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
1045 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
1046 }
1047 break;
1048
1049 case VM_SWAPOUT_T1_PASSIVE:
1050
1051 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1052 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1053 vm_swapper_entered_T0P++;
1054
1055 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1056 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1057 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1058 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1059 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1060 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1061
1062 break;
1063 }
1064 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
1065 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1066 vm_swapper_entered_T2P++;
1067
1068 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1069 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1070 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1071 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1072 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1073 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1074 }
1075 break;
1076
1077 case VM_SWAPOUT_T0_PASSIVE:
1078
1079 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1080 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1081 vm_swapper_entered_T2P++;
1082
1083 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1084 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1085 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1086 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1087 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1088 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1089
1090 break;
1091 }
1092 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1093 vm_swapper_entered_T0++;
1094
1095 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1096 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1097 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1098 vm_swapout_state = VM_SWAPOUT_T0;
1099 }
1100 break;
1101
1102 case VM_SWAPOUT_T0:
1103
1104 if (SWAPPER_HAS_CAUGHTUP()) {
1105 vm_swapper_entered_T0P++;
1106
1107 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1108 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1109 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1110 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1111 }
1112 break;
1113 }
1114 }
1115
1116 int vm_swapout_found_empty = 0;
1117
1118 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1119
1120 int vm_swapout_soc_busy = 0;
1121 int vm_swapout_soc_done = 0;
1122
1123
1124 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1125 vm_swapout_find_free_soc(void)
1126 {
1127 int i;
1128
1129 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1130 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1131 return &vm_swapout_ctx[i];
1132 }
1133 }
1134 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1135
1136 return NULL;
1137 }
1138
1139 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1140 vm_swapout_find_done_soc(void)
1141 {
1142 int i;
1143
1144 if (vm_swapout_soc_done) {
1145 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1146 if (vm_swapout_ctx[i].swp_io_done) {
1147 return &vm_swapout_ctx[i];
1148 }
1149 }
1150 }
1151 return NULL;
1152 }
1153
1154 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1155 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1156 {
1157 kern_return_t kr;
1158
1159 if (soc->swp_io_error) {
1160 kr = KERN_FAILURE;
1161 } else {
1162 kr = KERN_SUCCESS;
1163 }
1164
1165 lck_mtx_unlock_always(c_list_lock);
1166
1167 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1168 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1169
1170 lck_mtx_lock_spin_always(c_list_lock);
1171
1172 soc->swp_io_done = 0;
1173 soc->swp_io_busy = 0;
1174
1175 vm_swapout_soc_busy--;
1176 vm_swapout_soc_done--;
1177 }
1178
1179 bool vm_swapout_thread_inited = false;
1180 extern uint32_t c_donate_swapout_count;
1181 #if CONFIG_JETSAM
1182 bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
1183 /*
1184 * swapout_sleep_threshold sets the percentage of the swapout threshold at which
1185 * the swap thread will stop processing the swapout queue.
1186 * By default this is 90 which means we will swap until the
1187 * swapout queue size is at 90% of the threshold to wake the swap thread.
1188 * By definition the queue length must be >= 100% of the threshold when the.
1189 * swap thread is woken up. On development builds this can be adjusted with
1190 * the vm.swapout_sleep_threshold sysctl.
1191 */
1192 uint32_t swapout_sleep_threshold = 90;
1193 #endif /* CONFIG_JETSAM */
1194 static bool
should_process_swapout_queue(const queue_head_t * swapout_list_head)1195 should_process_swapout_queue(const queue_head_t *swapout_list_head)
1196 {
1197 bool process_queue = !queue_empty(swapout_list_head) &&
1198 vm_swapout_soc_busy < vm_swapout_limit &&
1199 !compressor_store_stop_compaction;
1200 #if CONFIG_JETSAM
1201 if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
1202 process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
1203 }
1204 #endif /* CONFIG_JETSAM */
1205 return process_queue;
1206 }
1207
1208 void
vm_swapout_thread(void)1209 vm_swapout_thread(void)
1210 {
1211 uint32_t size = 0;
1212 c_segment_t c_seg = NULL;
1213 kern_return_t kr = KERN_SUCCESS;
1214 struct swapout_io_completion *soc;
1215 queue_head_t *swapout_list_head;
1216 bool queues_empty = false;
1217
1218 if (!vm_swapout_thread_inited) {
1219 #if CONFIG_THREAD_GROUPS
1220 thread_group_vm_add();
1221 #endif /* CONFIG_THREAD_GROUPS */
1222 current_thread()->options |= TH_OPT_VMPRIV;
1223 vm_swapout_thread_inited = true;
1224 }
1225
1226 vm_swapout_thread_awakened++;
1227
1228 lck_mtx_lock_spin_always(c_list_lock);
1229
1230 swapout_list_head = &c_early_swapout_list_head;
1231 vm_swapout_thread_running = TRUE;
1232 os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
1233 again:
1234 while (should_process_swapout_queue(swapout_list_head)) {
1235 c_seg = (c_segment_t)queue_first(swapout_list_head);
1236
1237 lck_mtx_lock_spin_always(&c_seg->c_lock);
1238
1239 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1240
1241 if (c_seg->c_busy) {
1242 lck_mtx_unlock_always(c_list_lock);
1243
1244 c_seg_wait_on_busy(c_seg);
1245
1246 lck_mtx_lock_spin_always(c_list_lock);
1247
1248 continue;
1249 }
1250 vm_swapout_thread_processed_segments++;
1251
1252 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1253
1254 if (size == 0) {
1255 assert(c_seg->c_bytes_used == 0);
1256
1257 /*
1258 * c_seg_free_locked will drop the c_list_lock and
1259 * the c_seg->c_lock.
1260 */
1261 C_SEG_BUSY(c_seg);
1262 c_seg_free_locked(c_seg);
1263 c_seg = NULL;
1264
1265 vm_swapout_found_empty++;
1266 goto c_seg_is_empty;
1267 }
1268 C_SEG_BUSY(c_seg);
1269 c_seg->c_busy_swapping = 1;
1270
1271 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1272
1273 lck_mtx_unlock_always(c_list_lock);
1274 lck_mtx_unlock_always(&c_seg->c_lock);
1275
1276 #if CHECKSUM_THE_SWAP
1277 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1278 c_seg->cseg_swap_size = size;
1279 #endif /* CHECKSUM_THE_SWAP */
1280
1281 #if ENCRYPTED_SWAP
1282 vm_swap_encrypt(c_seg);
1283 #endif /* ENCRYPTED_SWAP */
1284
1285 soc = vm_swapout_find_free_soc();
1286 assert(soc);
1287
1288 soc->swp_upl_ctx.io_context = (void *)soc;
1289 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1290 soc->swp_upl_ctx.io_error = 0;
1291
1292 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1293
1294 if (kr != KERN_SUCCESS) {
1295 if (soc->swp_io_done) {
1296 lck_mtx_lock_spin_always(c_list_lock);
1297
1298 soc->swp_io_done = 0;
1299 vm_swapout_soc_done--;
1300
1301 lck_mtx_unlock_always(c_list_lock);
1302 }
1303 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1304 } else {
1305 soc->swp_io_busy = 1;
1306 vm_swapout_soc_busy++;
1307 }
1308
1309 c_seg_is_empty:
1310 if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
1311 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1312 }
1313
1314 lck_mtx_lock_spin_always(c_list_lock);
1315
1316 while ((soc = vm_swapout_find_done_soc())) {
1317 vm_swapout_complete_soc(soc);
1318 }
1319 lck_mtx_unlock_always(c_list_lock);
1320
1321 vm_swapout_thread_throttle_adjust();
1322
1323 lck_mtx_lock_spin_always(c_list_lock);
1324 }
1325 while ((soc = vm_swapout_find_done_soc())) {
1326 vm_swapout_complete_soc(soc);
1327 }
1328 lck_mtx_unlock_always(c_list_lock);
1329
1330 vm_pageout_io_throttle();
1331
1332 lck_mtx_lock_spin_always(c_list_lock);
1333
1334 /*
1335 * Recheck if we have some c_segs to wakeup
1336 * post throttle. And, check to see if we
1337 * have any more swapouts needed.
1338 */
1339 if (vm_swapout_soc_done) {
1340 goto again;
1341 }
1342
1343 #if XNU_TARGET_OS_OSX
1344 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1345 #else /* XNU_TARGET_OS_OSX */
1346 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1347 #endif /* XNU_TARGET_OS_OSX */
1348
1349 if (!queues_empty) {
1350 swapout_list_head = NULL;
1351 if (!queue_empty(&c_early_swapout_list_head)) {
1352 swapout_list_head = &c_early_swapout_list_head;
1353 } else {
1354 #if XNU_TARGET_OS_OSX
1355 /*
1356 * On macOS we _always_ processs all swapout queues.
1357 */
1358 if (!queue_empty(&c_regular_swapout_list_head)) {
1359 swapout_list_head = &c_regular_swapout_list_head;
1360 } else {
1361 swapout_list_head = &c_late_swapout_list_head;
1362 }
1363 #else /* XNU_TARGET_OS_OSX */
1364 /*
1365 * On non-macOS swap-capable platforms, we might want to
1366 * processs just the early queue (Freezer) or process both
1367 * early and late queues (app swap). We processed the early
1368 * queue up above. The late Q will only be processed if the
1369 * checks in should_process_swapout_queue give the go-ahead.
1370 */
1371 swapout_list_head = &c_late_swapout_list_head;
1372 #endif /* XNU_TARGET_OS_OSX */
1373 }
1374 if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
1375 goto again;
1376 }
1377 }
1378
1379 assert_wait((event_t)&vm_swapout_thread, THREAD_UNINT);
1380
1381 vm_swapout_thread_running = FALSE;
1382
1383 lck_mtx_unlock_always(c_list_lock);
1384
1385 thread_block((thread_continue_t)vm_swapout_thread);
1386
1387 /* NOTREACHED */
1388 }
1389
1390
1391 void
vm_swapout_iodone(void * io_context,int error)1392 vm_swapout_iodone(void *io_context, int error)
1393 {
1394 struct swapout_io_completion *soc;
1395
1396 soc = (struct swapout_io_completion *)io_context;
1397
1398 lck_mtx_lock_spin_always(c_list_lock);
1399
1400 soc->swp_io_done = 1;
1401 soc->swp_io_error = error;
1402 vm_swapout_soc_done++;
1403
1404 if (!vm_swapout_thread_running) {
1405 thread_wakeup((event_t)&vm_swapout_thread);
1406 }
1407
1408 lck_mtx_unlock_always(c_list_lock);
1409 }
1410
1411
1412 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1413 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1414 {
1415 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1416
1417 if (kr == KERN_SUCCESS) {
1418 kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
1419 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1420 }
1421 #if ENCRYPTED_SWAP
1422 else {
1423 vm_swap_decrypt(c_seg, false);
1424 }
1425 #endif /* ENCRYPTED_SWAP */
1426 lck_mtx_lock_spin_always(c_list_lock);
1427 lck_mtx_lock_spin_always(&c_seg->c_lock);
1428
1429 if (kr == KERN_SUCCESS) {
1430 int new_state = C_ON_SWAPPEDOUT_Q;
1431 boolean_t insert_head = FALSE;
1432
1433 if (hibernate_flushing == TRUE) {
1434 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1435 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1436 insert_head = TRUE;
1437 }
1438 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1439 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1440 }
1441
1442 c_seg_switch_state(c_seg, new_state, insert_head);
1443
1444 c_seg->c_store.c_swap_handle = f_offset;
1445
1446 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1447 __assert_only unsigned int new_swapped_count = os_atomic_add(
1448 &vm_page_swapped_count, c_seg->c_slots_used, relaxed);
1449 /* Detect overflow */
1450 assert3u(new_swapped_count, >=, c_seg->c_slots_used);
1451
1452 c_seg->c_swappedin = false;
1453
1454 if (c_seg->c_bytes_used) {
1455 os_atomic_sub(&compressor_bytes_used, c_seg->c_bytes_used, relaxed);
1456 }
1457
1458 #if CONFIG_FREEZE
1459 /*
1460 * Successful swapout. Decrement the in-core compressed pages count.
1461 */
1462 os_atomic_sub(&c_segment_pages_compressed_incore, c_seg->c_slots_used, relaxed);
1463 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1464 if (c_seg->c_has_donated_pages) {
1465 os_atomic_sub(&c_segment_pages_compressed_incore_late_swapout, (c_seg->c_slots_used), relaxed);
1466 }
1467 #endif /* CONFIG_FREEZE */
1468 } else {
1469 if (c_seg->c_overage_swap == TRUE) {
1470 c_seg->c_overage_swap = FALSE;
1471 c_overage_swapped_count--;
1472 }
1473
1474 #if CONFIG_FREEZE
1475 if (c_seg->c_has_freezer_pages) {
1476 if (c_seg->c_task_owner) {
1477 c_seg_update_task_owner(c_seg, NULL);
1478 }
1479 /*
1480 * We failed to swapout a frozen cseg. We need
1481 * to put it back in the queues, specifically the
1482 * AGE_Q. So clear the donated bit otherwise it'll
1483 * land on the swapped_in Q.
1484 */
1485 c_seg->c_has_donated_pages = 0;
1486 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1487 } else
1488 #endif /* CONFIG_FREEZE */
1489 {
1490 if (c_seg->c_has_donated_pages) {
1491 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
1492 } else {
1493 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1494 }
1495 }
1496
1497 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1498 c_seg_need_delayed_compaction(c_seg, TRUE);
1499 }
1500 }
1501 assert(c_seg->c_busy_swapping);
1502 assert(c_seg->c_busy);
1503
1504 c_seg->c_busy_swapping = 0;
1505 lck_mtx_unlock_always(c_list_lock);
1506
1507 C_SEG_WAKEUP_DONE(c_seg);
1508 lck_mtx_unlock_always(&c_seg->c_lock);
1509
1510 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1511 }
1512
1513
1514 boolean_t
vm_swap_create_file()1515 vm_swap_create_file()
1516 {
1517 uint64_t size = 0;
1518 int namelen = 0;
1519 boolean_t swap_file_created = FALSE;
1520 boolean_t swap_file_reuse = FALSE;
1521 boolean_t swap_file_pin = FALSE;
1522 struct swapfile *swf = NULL;
1523
1524 /*
1525 * make sure we've got all the info we need
1526 * to potentially pin a swap file... we could
1527 * be swapping out due to hibernation w/o ever
1528 * having run vm_pageout_scan, which is normally
1529 * the trigger to do the init
1530 */
1531 vm_compaction_swapper_do_init();
1532
1533 /*
1534 * Any swapfile structure ready for re-use?
1535 */
1536
1537 lck_mtx_lock(&vm_swap_data_lock);
1538
1539 swf = (struct swapfile*) queue_first(&swf_global_queue);
1540
1541 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1542 if (swf->swp_flags == SWAP_REUSE) {
1543 swap_file_reuse = TRUE;
1544 break;
1545 }
1546 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1547 }
1548
1549 lck_mtx_unlock(&vm_swap_data_lock);
1550
1551 if (swap_file_reuse == FALSE) {
1552 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1553
1554 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1555 swf->swp_index = vm_num_swap_files + 1;
1556 swf->swp_pathlen = namelen;
1557 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1558
1559 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1560 }
1561
1562 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1563
1564 if (swf->swp_vp == NULL) {
1565 if (swap_file_reuse == FALSE) {
1566 kfree_data(swf->swp_path, swf->swp_pathlen);
1567 kfree_type(struct swapfile, swf);
1568 }
1569 return FALSE;
1570 }
1571 vm_swapfile_can_be_created = true;
1572
1573 size = MAX_SWAP_FILE_SIZE;
1574
1575 while (size >= MIN_SWAP_FILE_SIZE) {
1576 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1577
1578 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1579 int num_bytes_for_bitmap = 0;
1580
1581 swap_file_created = TRUE;
1582
1583 swf->swp_size = size;
1584 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1585 swf->swp_nseginuse = 0;
1586 swf->swp_free_hint = 0;
1587
1588 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1589 /*
1590 * Allocate a bitmap that describes the
1591 * number of segments held by this swapfile.
1592 */
1593 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1594 Z_WAITOK | Z_ZERO);
1595
1596 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1597 Z_WAITOK | Z_ZERO);
1598
1599 /*
1600 * passing a NULL trim_list into vnode_trim_list
1601 * will return ENOTSUP if trim isn't supported
1602 * and 0 if it is
1603 */
1604 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1605 swp_trim_supported = TRUE;
1606 }
1607
1608 lck_mtx_lock(&vm_swap_data_lock);
1609
1610 swf->swp_flags = SWAP_READY;
1611
1612 if (swap_file_reuse == FALSE) {
1613 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1614 }
1615
1616 vm_num_swap_files++;
1617
1618 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1619 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1620 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1621 }
1622
1623 if (swap_file_pin == TRUE) {
1624 vm_num_pinned_swap_files++;
1625 swf->swp_flags |= SWAP_PINNED;
1626 vm_swappin_avail -= swf->swp_size;
1627 }
1628
1629 lck_mtx_unlock(&vm_swap_data_lock);
1630
1631 thread_wakeup((event_t) &vm_num_swap_files);
1632 #if !XNU_TARGET_OS_OSX
1633 if (vm_num_swap_files == 1) {
1634 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1635
1636 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1637 c_overage_swapped_limit /= 2;
1638 }
1639 }
1640 #endif /* !XNU_TARGET_OS_OSX */
1641 break;
1642 } else {
1643 size = size / 2;
1644 }
1645 }
1646 if (swap_file_created == FALSE) {
1647 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1648
1649 swf->swp_vp = NULL;
1650
1651 if (swap_file_reuse == FALSE) {
1652 kfree_data(swf->swp_path, swf->swp_pathlen);
1653 kfree_type(struct swapfile, swf);
1654 }
1655 }
1656 return swap_file_created;
1657 }
1658
1659 extern void vnode_put(struct vnode* vp);
1660 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1661 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1662 {
1663 struct swapfile *swf = NULL;
1664 uint64_t file_offset = 0;
1665 int retval = 0;
1666
1667 assert(c_seg->c_store.c_buffer);
1668
1669 lck_mtx_lock(&vm_swap_data_lock);
1670
1671 swf = vm_swapfile_for_handle(f_offset);
1672
1673 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1674 vm_swap_get_failures++;
1675 retval = 1;
1676 goto done;
1677 }
1678 swf->swp_io_count++;
1679
1680 lck_mtx_unlock(&vm_swap_data_lock);
1681
1682 #if DEVELOPMENT || DEBUG
1683 C_SEG_MAKE_WRITEABLE(c_seg);
1684 #endif
1685 file_offset = (f_offset & SWAP_SLOT_MASK);
1686
1687 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1688 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1689 } else {
1690 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1691 vnode_put(swf->swp_vp);
1692 }
1693
1694 #if DEVELOPMENT || DEBUG
1695 C_SEG_WRITE_PROTECT(c_seg);
1696 #endif
1697 if (retval == 0) {
1698 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1699 } else {
1700 vm_swap_get_failures++;
1701 }
1702
1703 /*
1704 * Free this slot in the swap structure.
1705 */
1706 vm_swap_free(f_offset);
1707
1708 lck_mtx_lock(&vm_swap_data_lock);
1709 swf->swp_io_count--;
1710
1711 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1712 swf->swp_flags &= ~SWAP_WANTED;
1713 thread_wakeup((event_t) &swf->swp_flags);
1714 }
1715 done:
1716 lck_mtx_unlock(&vm_swap_data_lock);
1717
1718 if (retval == 0) {
1719 return KERN_SUCCESS;
1720 } else {
1721 return KERN_FAILURE;
1722 }
1723 }
1724
1725 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1726 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1727 {
1728 unsigned int segidx = 0;
1729 struct swapfile *swf = NULL;
1730 uint64_t file_offset = 0;
1731 uint64_t swapfile_index = 0;
1732 unsigned int byte_for_segidx = 0;
1733 unsigned int offset_within_byte = 0;
1734 boolean_t swf_eligible = FALSE;
1735 boolean_t waiting = FALSE;
1736 boolean_t retried = FALSE;
1737 int error = 0;
1738 uint64_t now;
1739 void *upl_ctx = NULL;
1740 boolean_t drop_iocount = FALSE;
1741
1742 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1743 return KERN_FAILURE;
1744 }
1745 retry:
1746 lck_mtx_lock(&vm_swap_data_lock);
1747
1748 swf = (struct swapfile*) queue_first(&swf_global_queue);
1749
1750 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1751 segidx = swf->swp_free_hint;
1752
1753 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1754
1755 if (swf_eligible) {
1756 while (segidx < swf->swp_nsegs) {
1757 byte_for_segidx = segidx >> 3;
1758 offset_within_byte = segidx % 8;
1759
1760 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1761 segidx++;
1762 continue;
1763 }
1764
1765 (swf->swp_bitmap)[byte_for_segidx] |= (uint8_t)(1 << offset_within_byte);
1766
1767 file_offset = segidx * compressed_swap_chunk_size;
1768 swf->swp_nseginuse++;
1769 swf->swp_io_count++;
1770 swf->swp_csegs[segidx] = c_seg;
1771
1772 swapfile_index = swf->swp_index;
1773 vm_swapfile_total_segs_used++;
1774 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1775 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1776 }
1777
1778 now = mach_absolute_time();
1779
1780 if (vm_swapfile_should_create(now) && !vm_swapfile_create_thread_running) {
1781 thread_wakeup((event_t) &vm_swapfile_create_needed);
1782 }
1783
1784 lck_mtx_unlock(&vm_swap_data_lock);
1785
1786 goto issue_io;
1787 }
1788 }
1789 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1790 }
1791 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1792
1793 /*
1794 * we've run out of swap segments, but may not
1795 * be in a position to immediately create a new swap
1796 * file if we've recently failed to create due to a lack
1797 * of free space in the root filesystem... we'll try
1798 * to kick that create off, but in any event we're going
1799 * to take a breather (up to 1 second) so that we're not caught in a tight
1800 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1801 * segments into swap files only to have them immediately put back
1802 * on the c_age queue due to vm_swap_put failing.
1803 *
1804 * if we're doing these puts due to a hibernation flush,
1805 * no need to block... setting hibernate_no_swapspace to TRUE,
1806 * will cause "vm_compressor_compact_and_swap" to immediately abort
1807 */
1808 now = mach_absolute_time();
1809
1810 if (vm_swapfile_should_create(now)) {
1811 if (!vm_swapfile_create_thread_running) {
1812 thread_wakeup((event_t) &vm_swapfile_create_needed);
1813 }
1814 waiting = TRUE;
1815 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1816 } else {
1817 if (hibernate_flushing) {
1818 hibernate_no_swapspace = TRUE;
1819 }
1820 }
1821
1822 lck_mtx_unlock(&vm_swap_data_lock);
1823
1824 if (waiting == TRUE) {
1825 thread_block(THREAD_CONTINUE_NULL);
1826
1827 if (retried == FALSE && hibernate_flushing == TRUE) {
1828 retried = TRUE;
1829 goto retry;
1830 }
1831 }
1832 vm_swap_put_failures_no_swap_file++;
1833
1834 return KERN_FAILURE;
1835
1836 issue_io:
1837 assert(c_seg->c_busy_swapping);
1838 assert(c_seg->c_busy);
1839 assert(!c_seg->c_on_minorcompact_q);
1840
1841 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1842
1843 if (soc) {
1844 soc->swp_c_seg = c_seg;
1845 soc->swp_c_size = size;
1846
1847 soc->swp_swf = swf;
1848
1849 soc->swp_io_error = 0;
1850 soc->swp_io_done = 0;
1851
1852 upl_ctx = (void *)&soc->swp_upl_ctx;
1853 }
1854
1855 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1856 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1857 } else {
1858 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1859 drop_iocount = TRUE;
1860 }
1861
1862 if (error || upl_ctx == NULL) {
1863 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1864 }
1865
1866 return KERN_SUCCESS;
1867 }
1868
1869 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1870 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1871 {
1872 if (drop_iocount) {
1873 vnode_put(swf->swp_vp);
1874 }
1875
1876 lck_mtx_lock(&vm_swap_data_lock);
1877
1878 swf->swp_io_count--;
1879
1880 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1881 swf->swp_flags &= ~SWAP_WANTED;
1882 thread_wakeup((event_t) &swf->swp_flags);
1883 }
1884 lck_mtx_unlock(&vm_swap_data_lock);
1885
1886 if (error) {
1887 vm_swap_free(*f_offset);
1888 vm_swap_put_failures++;
1889
1890 return KERN_FAILURE;
1891 }
1892 return KERN_SUCCESS;
1893 }
1894
1895
1896 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1897 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1898 {
1899 uint64_t file_offset = 0;
1900 unsigned int segidx = 0;
1901
1902
1903 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1904 unsigned int byte_for_segidx = 0;
1905 unsigned int offset_within_byte = 0;
1906
1907 file_offset = (f_offset & SWAP_SLOT_MASK);
1908 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1909
1910 byte_for_segidx = segidx >> 3;
1911 offset_within_byte = segidx % 8;
1912
1913 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1914 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1915
1916 swf->swp_csegs[segidx] = NULL;
1917
1918 swf->swp_nseginuse--;
1919 vm_swapfile_total_segs_used--;
1920
1921 if (segidx < swf->swp_free_hint) {
1922 swf->swp_free_hint = segidx;
1923 }
1924 }
1925 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1926 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1927 }
1928 }
1929 }
1930
1931
1932 uint32_t vm_swap_free_now_count = 0;
1933 uint32_t vm_swap_free_delayed_count = 0;
1934
1935
1936 void
vm_swap_free(uint64_t f_offset)1937 vm_swap_free(uint64_t f_offset)
1938 {
1939 struct swapfile *swf = NULL;
1940 struct trim_list *tl = NULL;
1941 uint64_t now;
1942
1943 if (swp_trim_supported == TRUE) {
1944 tl = kalloc_type(struct trim_list, Z_WAITOK);
1945 }
1946
1947 lck_mtx_lock(&vm_swap_data_lock);
1948
1949 swf = vm_swapfile_for_handle(f_offset);
1950
1951 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1952 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1953 /*
1954 * don't delay the free if the underlying disk doesn't support
1955 * trim, or we're in the midst of reclaiming this swap file since
1956 * we don't want to move segments that are technically free
1957 * but not yet handled by the delayed free mechanism
1958 */
1959 vm_swap_free_now(swf, f_offset);
1960
1961 vm_swap_free_now_count++;
1962 goto done;
1963 }
1964 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1965 tl->tl_length = compressed_swap_chunk_size;
1966
1967 tl->tl_next = swf->swp_delayed_trim_list_head;
1968 swf->swp_delayed_trim_list_head = tl;
1969 swf->swp_delayed_trim_count++;
1970 tl = NULL;
1971
1972 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1973 now = mach_absolute_time();
1974
1975 if (now > dont_trim_until_ts) {
1976 thread_wakeup((event_t) &vm_swapfile_create_needed);
1977 }
1978 }
1979 vm_swap_free_delayed_count++;
1980 }
1981 done:
1982 lck_mtx_unlock(&vm_swap_data_lock);
1983
1984 if (tl != NULL) {
1985 kfree_type(struct trim_list, tl);
1986 }
1987 }
1988
1989
1990 static void
vm_swap_wait_on_trim_handling_in_progress()1991 vm_swap_wait_on_trim_handling_in_progress()
1992 {
1993 while (delayed_trim_handling_in_progress) {
1994 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1995 lck_mtx_unlock(&vm_swap_data_lock);
1996
1997 thread_block(THREAD_CONTINUE_NULL);
1998
1999 lck_mtx_lock(&vm_swap_data_lock);
2000 }
2001 }
2002
2003
2004 static void
vm_swap_handle_delayed_trims(boolean_t force_now)2005 vm_swap_handle_delayed_trims(boolean_t force_now)
2006 {
2007 struct swapfile *swf = NULL;
2008
2009 /*
2010 * serialize the race between us and vm_swap_reclaim...
2011 * if vm_swap_reclaim wins it will turn off SWAP_READY
2012 * on the victim it has chosen... we can just skip over
2013 * that file since vm_swap_reclaim will first process
2014 * all of the delayed trims associated with it
2015 */
2016
2017 if (compressor_store_stop_compaction == TRUE) {
2018 return;
2019 }
2020
2021 lck_mtx_lock(&vm_swap_data_lock);
2022
2023 delayed_trim_handling_in_progress = true;
2024
2025 lck_mtx_unlock(&vm_swap_data_lock);
2026
2027 /*
2028 * no need to hold the lock to walk the swf list since
2029 * vm_swap_create (the only place where we add to this list)
2030 * is run on the same thread as this function
2031 * and vm_swap_reclaim doesn't remove items from this list
2032 * instead marking them with SWAP_REUSE for future re-use
2033 */
2034 swf = (struct swapfile*) queue_first(&swf_global_queue);
2035
2036 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2037 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
2038 assert(!(swf->swp_flags & SWAP_RECLAIM));
2039 vm_swap_do_delayed_trim(swf);
2040 }
2041 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2042 }
2043 lck_mtx_lock(&vm_swap_data_lock);
2044
2045 delayed_trim_handling_in_progress = false;
2046 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
2047
2048 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
2049 thread_wakeup((event_t) &vm_swapfile_gc_needed);
2050 }
2051
2052 lck_mtx_unlock(&vm_swap_data_lock);
2053 }
2054
2055 static void
vm_swap_do_delayed_trim(struct swapfile * swf)2056 vm_swap_do_delayed_trim(struct swapfile *swf)
2057 {
2058 struct trim_list *tl, *tl_head;
2059 int error;
2060
2061 if (compressor_store_stop_compaction == TRUE) {
2062 return;
2063 }
2064
2065 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
2066 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
2067 return;
2068 }
2069
2070 lck_mtx_lock(&vm_swap_data_lock);
2071
2072 tl_head = swf->swp_delayed_trim_list_head;
2073 swf->swp_delayed_trim_list_head = NULL;
2074 swf->swp_delayed_trim_count = 0;
2075
2076 lck_mtx_unlock(&vm_swap_data_lock);
2077
2078 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
2079
2080 (void) vnode_put(swf->swp_vp);
2081
2082 while ((tl = tl_head) != NULL) {
2083 unsigned int segidx = 0;
2084 unsigned int byte_for_segidx = 0;
2085 unsigned int offset_within_byte = 0;
2086
2087 lck_mtx_lock(&vm_swap_data_lock);
2088
2089 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
2090
2091 byte_for_segidx = segidx >> 3;
2092 offset_within_byte = segidx % 8;
2093
2094 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
2095 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2096
2097 swf->swp_csegs[segidx] = NULL;
2098
2099 swf->swp_nseginuse--;
2100 vm_swapfile_total_segs_used--;
2101
2102 if (segidx < swf->swp_free_hint) {
2103 swf->swp_free_hint = segidx;
2104 }
2105 }
2106 lck_mtx_unlock(&vm_swap_data_lock);
2107
2108 tl_head = tl->tl_next;
2109
2110 kfree_type(struct trim_list, tl);
2111 }
2112 }
2113
2114
2115 void
vm_swap_flush()2116 vm_swap_flush()
2117 {
2118 return;
2119 }
2120
2121 int vm_swap_reclaim_yielded = 0;
2122
2123 void
vm_swap_reclaim(void)2124 vm_swap_reclaim(void)
2125 {
2126 vm_offset_t addr = 0;
2127 unsigned int segidx = 0;
2128 uint64_t f_offset = 0;
2129 struct swapfile *swf = NULL;
2130 struct swapfile *smallest_swf = NULL;
2131 unsigned int min_nsegs = 0;
2132 unsigned int byte_for_segidx = 0;
2133 unsigned int offset_within_byte = 0;
2134 uint32_t c_size = 0;
2135
2136 c_segment_t c_seg = NULL;
2137
2138 kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
2139 KMA_NOFAIL | KMA_KOBJECT | KMA_DATA_SHARED, VM_KERN_MEMORY_COMPRESSOR);
2140
2141 lck_mtx_lock(&vm_swap_data_lock);
2142
2143 /*
2144 * if we're running the swapfile list looking for
2145 * candidates with delayed trims, we need to
2146 * wait before making our decision concerning
2147 * the swapfile we want to reclaim
2148 */
2149 vm_swap_wait_on_trim_handling_in_progress();
2150
2151 /*
2152 * from here until we knock down the SWAP_READY bit,
2153 * we need to remain behind the vm_swap_data_lock...
2154 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
2155 * will not consider this swapfile for processing
2156 */
2157 swf = (struct swapfile*) queue_first(&swf_global_queue);
2158 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
2159 smallest_swf = NULL;
2160
2161 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2162 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
2163 smallest_swf = swf;
2164 min_nsegs = swf->swp_nseginuse;
2165 }
2166 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2167 }
2168
2169 if (smallest_swf == NULL) {
2170 goto done;
2171 }
2172
2173 swf = smallest_swf;
2174
2175
2176 swf->swp_flags &= ~SWAP_READY;
2177 swf->swp_flags |= SWAP_RECLAIM;
2178
2179 if (swf->swp_delayed_trim_count) {
2180 lck_mtx_unlock(&vm_swap_data_lock);
2181
2182 vm_swap_do_delayed_trim(swf);
2183
2184 lck_mtx_lock(&vm_swap_data_lock);
2185 }
2186 segidx = 0;
2187
2188 while (segidx < swf->swp_nsegs) {
2189 ReTry_for_cseg:
2190 /*
2191 * Wait for outgoing I/Os.
2192 */
2193 while (swf->swp_io_count) {
2194 swf->swp_flags |= SWAP_WANTED;
2195
2196 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2197 lck_mtx_unlock(&vm_swap_data_lock);
2198
2199 thread_block(THREAD_CONTINUE_NULL);
2200
2201 lck_mtx_lock(&vm_swap_data_lock);
2202 }
2203 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2204 vm_swap_reclaim_yielded++;
2205 break;
2206 }
2207
2208 byte_for_segidx = segidx >> 3;
2209 offset_within_byte = segidx % 8;
2210
2211 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2212 segidx++;
2213 continue;
2214 }
2215
2216 c_seg = swf->swp_csegs[segidx];
2217 assert(c_seg);
2218
2219 lck_mtx_lock_spin_always(&c_seg->c_lock);
2220
2221 if (c_seg->c_busy) {
2222 /*
2223 * a swapped out c_segment in the process of being freed will remain in the
2224 * busy state until after the vm_swap_free is called on it... vm_swap_free
2225 * takes the vm_swap_data_lock, so can't change the swap state until after
2226 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2227 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2228 * at that point, we re-look up the swap state which will now indicate that
2229 * this c_segment no longer exists.
2230 */
2231 c_seg->c_wanted = 1;
2232
2233 assert_wait((event_t) (c_seg), THREAD_UNINT);
2234 lck_mtx_unlock_always(&c_seg->c_lock);
2235
2236 lck_mtx_unlock(&vm_swap_data_lock);
2237
2238 thread_block(THREAD_CONTINUE_NULL);
2239
2240 lck_mtx_lock(&vm_swap_data_lock);
2241
2242 goto ReTry_for_cseg;
2243 }
2244 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2245
2246 f_offset = segidx * compressed_swap_chunk_size;
2247
2248 assert(c_seg == swf->swp_csegs[segidx]);
2249 swf->swp_csegs[segidx] = NULL;
2250 swf->swp_nseginuse--;
2251
2252 vm_swapfile_total_segs_used--;
2253
2254 lck_mtx_unlock(&vm_swap_data_lock);
2255
2256 assert(C_SEG_IS_ONDISK(c_seg));
2257
2258 C_SEG_BUSY(c_seg);
2259 c_seg->c_busy_swapping = 1;
2260 #if !CHECKSUM_THE_SWAP
2261 c_seg_trim_tail(c_seg);
2262 #endif
2263 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2264
2265 assert(c_size <= c_seg_bufsize && c_size);
2266
2267 lck_mtx_unlock_always(&c_seg->c_lock);
2268
2269 if (vnode_getwithref(swf->swp_vp)) {
2270 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2271 vm_swap_get_failures++;
2272 goto swap_io_failed;
2273 } else {
2274 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2275 /*
2276 * reading the data back in failed, so convert c_seg
2277 * to a swapped in c_segment that contains no data
2278 */
2279 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2280 /*
2281 * returns with c_busy_swapping cleared
2282 */
2283 vnode_put(swf->swp_vp);
2284 vm_swap_get_failures++;
2285 goto swap_io_failed;
2286 }
2287 vnode_put(swf->swp_vp);
2288 }
2289
2290 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2291 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2292
2293 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2294 vm_offset_t c_buffer;
2295
2296 /*
2297 * the put failed, so convert c_seg to a fully swapped in c_segment
2298 * with valid data
2299 */
2300 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2301
2302 kernel_memory_populate(c_buffer, c_size,
2303 KMA_NOFAIL | KMA_COMPRESSOR,
2304 VM_KERN_MEMORY_COMPRESSOR);
2305
2306 memcpy((char *)c_buffer, (char *)addr, c_size);
2307
2308 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2309 #if ENCRYPTED_SWAP
2310 vm_swap_decrypt(c_seg, true);
2311 #endif /* ENCRYPTED_SWAP */
2312 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2313 /*
2314 * returns with c_busy_swapping cleared
2315 */
2316 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2317
2318 goto swap_io_failed;
2319 }
2320 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2321
2322 lck_mtx_lock_spin_always(&c_seg->c_lock);
2323
2324 c_seg->c_swappedin = false;
2325
2326 assert(C_SEG_IS_ONDISK(c_seg));
2327 /*
2328 * The c_seg will now know about the new location on disk.
2329 */
2330 c_seg->c_store.c_swap_handle = f_offset;
2331
2332 assert(c_seg->c_busy_swapping);
2333 c_seg->c_busy_swapping = 0;
2334 swap_io_failed:
2335 assert(c_seg->c_busy);
2336 C_SEG_WAKEUP_DONE(c_seg);
2337
2338 lck_mtx_unlock_always(&c_seg->c_lock);
2339 lck_mtx_lock(&vm_swap_data_lock);
2340 }
2341
2342 if (swf->swp_nseginuse) {
2343 swf->swp_flags &= ~SWAP_RECLAIM;
2344 swf->swp_flags |= SWAP_READY;
2345
2346 goto done;
2347 }
2348 /*
2349 * We don't remove this inactive swf from the queue.
2350 * That way, we can re-use it when needed again and
2351 * preserve the namespace. The delayed_trim processing
2352 * is also dependent on us not removing swfs from the queue.
2353 */
2354 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2355
2356 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2357
2358 lck_mtx_unlock(&vm_swap_data_lock);
2359
2360 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2361
2362 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2363 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2364
2365 lck_mtx_lock(&vm_swap_data_lock);
2366
2367 if (swf->swp_flags & SWAP_PINNED) {
2368 vm_num_pinned_swap_files--;
2369 vm_swappin_avail += swf->swp_size;
2370 }
2371
2372 swf->swp_vp = NULL;
2373 swf->swp_size = 0;
2374 swf->swp_free_hint = 0;
2375 swf->swp_nsegs = 0;
2376 swf->swp_flags = SWAP_REUSE;
2377
2378 vm_num_swap_files--;
2379
2380 done:
2381 thread_wakeup((event_t) &swf->swp_flags);
2382 lck_mtx_unlock(&vm_swap_data_lock);
2383
2384 kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2385 }
2386
2387
2388 uint64_t
vm_swap_get_total_space(void)2389 vm_swap_get_total_space(void)
2390 {
2391 uint64_t total_space = 0;
2392
2393 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2394
2395 return total_space;
2396 }
2397
2398 uint64_t
vm_swap_get_used_space(void)2399 vm_swap_get_used_space(void)
2400 {
2401 uint64_t used_space = 0;
2402
2403 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2404
2405 return used_space;
2406 }
2407
2408 uint64_t
vm_swap_get_free_space(void)2409 vm_swap_get_free_space(void)
2410 {
2411 return vm_swap_get_total_space() - vm_swap_get_used_space();
2412 }
2413
2414 uint64_t
vm_swap_get_max_configured_space(void)2415 vm_swap_get_max_configured_space(void)
2416 {
2417 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2418 return num_swap_files * MAX_SWAP_FILE_SIZE;
2419 }
2420
2421 bool
vm_swap_low_on_space(void)2422 vm_swap_low_on_space(void)
2423 {
2424 if (vm_num_swap_files == 0 &&
2425 (!vm_swapfile_can_be_created || !SWAPPER_NEEDS_TO_UNTHROTTLE())) {
2426 /* We haven't started creating swap files yet */
2427 return false;
2428 }
2429
2430 if (vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used <
2431 (unsigned int)vm_swapfile_hiwater_segs / 8) {
2432 /*
2433 * We're running low on swapfile segments
2434 */
2435 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2436 /*
2437 * We've recently failed to create a new swapfile, likely due to disk
2438 * space exhaustion
2439 */
2440 return true;
2441 }
2442
2443 if (vm_num_swap_files == vm_num_swap_files_config) {
2444 /* We've reached the swapfile limit */
2445 return true;
2446 }
2447 }
2448 return false;
2449 }
2450
2451 bool
vm_swap_out_of_space(void)2452 vm_swap_out_of_space(void)
2453 {
2454 if (vm_num_swap_files == 0 &&
2455 (!vm_swapfile_can_be_created || !SWAPPER_NEEDS_TO_UNTHROTTLE())) {
2456 /* We haven't started creating swap files yet */
2457 return false;
2458 }
2459
2460 if (vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used <
2461 VM_SWAPOUT_LIMIT_MAX) {
2462 /*
2463 * We have run out of swapfile segments
2464 */
2465 if (vm_num_swap_files == vm_num_swap_files_config) {
2466 /* And we can't create any more swapfiles */
2467 return true;
2468 }
2469 }
2470
2471 return false;
2472 }
2473
2474 boolean_t
vm_swap_files_pinned(void)2475 vm_swap_files_pinned(void)
2476 {
2477 boolean_t result;
2478
2479 if (vm_swappin_enabled == FALSE) {
2480 return TRUE;
2481 }
2482
2483 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2484
2485 return result;
2486 }
2487
2488 #if CONFIG_FREEZE
2489 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2490 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2491 {
2492 boolean_t use_device_value = FALSE;
2493 struct swapfile *swf = NULL;
2494
2495 if (vm_num_swap_files) {
2496 lck_mtx_lock(&vm_swap_data_lock);
2497
2498 swf = (struct swapfile*) queue_first(&swf_global_queue);
2499
2500 if (swf) {
2501 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2502 if (swf->swp_flags == SWAP_READY) {
2503 assert(swf->swp_vp);
2504
2505 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2506 use_device_value = TRUE;
2507 }
2508 break;
2509 }
2510 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2511 }
2512 }
2513
2514 lck_mtx_unlock(&vm_swap_data_lock);
2515 } else {
2516 /*
2517 * This block is used for the initial budget value before any swap files
2518 * are created. We create a temp swap file to get the budget.
2519 */
2520
2521 struct vnode *temp_vp = NULL;
2522
2523 vm_swapfile_open(swapfilename, &temp_vp);
2524
2525 if (temp_vp) {
2526 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2527 use_device_value = TRUE;
2528 }
2529
2530 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2531 temp_vp = NULL;
2532 } else {
2533 *freeze_daily_budget = 0;
2534 }
2535 }
2536
2537 return use_device_value;
2538 }
2539 #endif /* CONFIG_FREEZE */
2540
2541 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2542 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2543 {
2544 lck_mtx_lock(&vm_swap_data_lock);
2545
2546 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2547 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2548
2549 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2550 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2551
2552 lck_mtx_unlock(&vm_swap_data_lock);
2553 }
2554