1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store_internal.h"
30 #include <vm/vm_pageout_xnu.h>
31 #include <vm/vm_protos_internal.h>
32 #include <vm/vm_kern_xnu.h>
33 #include <vm/vm_map_xnu.h>
34 #include <vm/vm_compressor_internal.h>
35 #include <vm/vm_iokit.h>
36 #include <vm/vm_map_internal.h>
37
38 #include <IOKit/IOHibernatePrivate.h>
39 #include <kern/policy_internal.h>
40 #include <sys/kern_memorystatus_xnu.h>
41
42 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
43 LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
44
45 #if defined(XNU_TARGET_OS_OSX)
46 /*
47 * launchd explicitly turns ON swap later during boot on macOS devices.
48 */
49 boolean_t compressor_store_stop_compaction = TRUE;
50 #else
51 boolean_t compressor_store_stop_compaction = FALSE;
52 #endif
53
54 boolean_t vm_swapfile_create_needed = FALSE;
55 boolean_t vm_swapfile_gc_needed = FALSE;
56
57 int vm_swapper_throttle = -1;
58 uint64_t vm_swapout_thread_id;
59
60 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
61 uint64_t vm_swap_get_failures = 0; /* Fatal */
62 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
63 int vm_num_swap_files_config = 0;
64 int vm_num_swap_files = 0;
65 int vm_num_pinned_swap_files = 0;
66 uint64_t vm_swap_volume_capacity = 0;
67 int vm_swapout_thread_processed_segments = 0;
68 int vm_swapout_thread_awakened = 0;
69 bool vm_swapout_thread_running = FALSE;
70 _Atomic bool vm_swapout_wake_pending = false;
71 int vm_swapfile_create_thread_awakened = 0;
72 int vm_swapfile_create_thread_running = 0;
73 int vm_swapfile_gc_thread_awakened = 0;
74 int vm_swapfile_gc_thread_running = 0;
75
76 int64_t vm_swappin_avail = 0;
77 boolean_t vm_swappin_enabled = FALSE;
78 unsigned int vm_swapfile_total_segs_alloced = 0;
79 unsigned int vm_swapfile_total_segs_alloced_max = 0;
80 unsigned int vm_swapfile_total_segs_used = 0;
81 unsigned int vm_swapfile_total_segs_used_max = 0;
82
83 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
84
85 extern vm_map_t compressor_map;
86 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
87
88 #define SWAP_READY 0x1 /* Swap file is ready to be used */
89 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
90 #define SWAP_WANTED 0x4 /* Swap file has waiters */
91 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
92 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
93
94
95 struct swapfile {
96 queue_head_t swp_queue; /* list of swap files */
97 char *swp_path; /* saved pathname of swap file */
98 struct vnode *swp_vp; /* backing vnode */
99 uint64_t swp_size; /* size of this swap file */
100 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
101 unsigned int swp_pathlen; /* length of pathname */
102 unsigned int swp_nsegs; /* #segments we can use */
103 unsigned int swp_nseginuse; /* #segments in use */
104 unsigned int swp_index; /* index of this swap file */
105 unsigned int swp_flags; /* state of swap file */
106 unsigned int swp_free_hint; /* offset of 1st free chunk */
107 unsigned int swp_io_count; /* count of outstanding I/Os */
108 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
109
110 struct trim_list *swp_delayed_trim_list_head;
111 unsigned int swp_delayed_trim_count;
112 };
113
114 queue_head_t swf_global_queue;
115 boolean_t swp_trim_supported = FALSE;
116
117 extern uint64_t dont_trim_until_ts;
118 uint64_t vm_swapfile_last_failed_to_create_ts = 0;
119 uint64_t vm_swapfile_last_successful_create_ts = 0;
120 int vm_swapfile_can_be_created = FALSE;
121 boolean_t delayed_trim_handling_in_progress = FALSE;
122
123 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
124
125 static void vm_swapout_thread_throttle_adjust(void);
126 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
127 static void vm_swapfile_create_thread(void);
128 static void vm_swapfile_gc_thread(void);
129 static void vm_swap_defragment(void);
130 static void vm_swap_handle_delayed_trims(boolean_t);
131 static void vm_swap_do_delayed_trim(struct swapfile *);
132 static void vm_swap_wait_on_trim_handling_in_progress(void);
133 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
134
135 extern int vnode_getwithref(struct vnode* vp);
136
137 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
138
139 #if !XNU_TARGET_OS_OSX
140
141 /*
142 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
143 * number of swapfiles allowed. That increases wired memory overhead.
144 * So we want to keep the max swapfiles same on both DEV/RELEASE so
145 * that the memory overhead is similar for performance comparisons.
146 */
147 #define VM_MAX_SWAP_FILE_NUM 5
148 #if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
149 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
150 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
151 #else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
152 /*
153 * We reserve compressor pool VA at boot for the max # of swap files. If someone
154 * has enabled app swap but we're not an arm large memory device we can't hog
155 * all of the VA so we only go up to 4GB.
156 */
157 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
158 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
159 #endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
160 #define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))
161
162 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
163
164 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
165 #define VM_SWAP_SHOULD_PIN(_size) FALSE
166 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
167
168 #else /* !XNU_TARGET_OS_OSX */
169
170 #define VM_MAX_SWAP_FILE_NUM 100
171 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
172
173 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
174 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
175 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
176
177 #endif /* !XNU_TARGET_OS_OSX */
178
179 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
180 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
181
182 #define VM_SWAP_BUSY() (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
183
184
185 #if CHECKSUM_THE_SWAP
186 extern unsigned int hash_string(char *cp, int len);
187 #endif
188
189 #if RECORD_THE_COMPRESSED_DATA
190 boolean_t c_compressed_record_init_done = FALSE; /* was the record file opened? */
191 int c_compressed_record_write_error = 0;
192 struct vnode *c_compressed_record_vp = NULL; /* the file opened for record write */
193 uint64_t c_compressed_record_file_offset = 0; /* next write offset */
194 void c_compressed_record_init(void);
195 void c_compressed_record_write(char *, int);
196 #endif
197
198 extern void vm_pageout_io_throttle(void);
199
200 static struct swapfile *vm_swapfile_for_handle(uint64_t);
201
202 /*
203 * Called with the vm_swap_data_lock held.
204 */
205
206 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)207 vm_swapfile_for_handle(uint64_t f_offset)
208 {
209 uint64_t file_offset = 0;
210 unsigned int swapfile_index = 0;
211 struct swapfile* swf = NULL;
212
213 file_offset = (f_offset & SWAP_SLOT_MASK);
214 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
215
216 swf = (struct swapfile*) queue_first(&swf_global_queue);
217
218 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
219 if (swapfile_index == swf->swp_index) {
220 break;
221 }
222
223 swf = (struct swapfile*) queue_next(&swf->swp_queue);
224 }
225
226 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
227 swf = NULL;
228 }
229
230 return swf;
231 }
232
233 #if ENCRYPTED_SWAP
234
235 #include <libkern/crypto/aesxts.h>
236
237 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
238
239 boolean_t swap_crypt_initialized;
240 void swap_crypt_initialize(void);
241
242 symmetric_xts xts_modectx;
243 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
244 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
245
246 #if DEVELOPMENT || DEBUG
247 boolean_t swap_crypt_xts_tested = FALSE;
248 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
249 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
250 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
251 #endif /* DEVELOPMENT || DEBUG */
252
253 unsigned long vm_page_encrypt_counter;
254 unsigned long vm_page_decrypt_counter;
255
256
257 void
swap_crypt_initialize(void)258 swap_crypt_initialize(void)
259 {
260 uint8_t *enckey1, *enckey2;
261 int keylen1, keylen2;
262 int error;
263
264 assert(swap_crypt_initialized == FALSE);
265
266 keylen1 = sizeof(swap_crypt_key1);
267 enckey1 = (uint8_t *)&swap_crypt_key1;
268 keylen2 = sizeof(swap_crypt_key2);
269 enckey2 = (uint8_t *)&swap_crypt_key2;
270
271 error = cc_rand_generate((void *)enckey1, keylen1);
272 assert(!error);
273
274 error = cc_rand_generate((void *)enckey2, keylen2);
275 assert(!error);
276
277 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
278 assert(!error);
279
280 swap_crypt_initialized = TRUE;
281
282 #if DEVELOPMENT || DEBUG
283 uint8_t *encptr;
284 uint8_t *decptr;
285 uint8_t *refptr;
286 uint8_t *iv;
287 uint64_t ivnum[2];
288 int size = 0;
289 int i = 0;
290 int rc = 0;
291
292 assert(swap_crypt_xts_tested == FALSE);
293
294 /*
295 * Validate the encryption algorithms.
296 *
297 * First initialize the test data.
298 */
299 for (i = 0; i < 4096; i++) {
300 swap_crypt_test_page_ref[i] = (char) i;
301 }
302 ivnum[0] = (uint64_t)0xaa;
303 ivnum[1] = 0;
304 iv = (uint8_t *)ivnum;
305
306 refptr = (uint8_t *)swap_crypt_test_page_ref;
307 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
308 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
309 size = 4096;
310
311 /* encrypt */
312 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
313 assert(!rc);
314
315 /* compare result with original - should NOT match */
316 for (i = 0; i < 4096; i++) {
317 if (swap_crypt_test_page_encrypt[i] !=
318 swap_crypt_test_page_ref[i]) {
319 break;
320 }
321 }
322 assert(i != 4096);
323
324 /* decrypt */
325 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
326 assert(!rc);
327
328 /* compare result with original */
329 for (i = 0; i < 4096; i++) {
330 if (swap_crypt_test_page_decrypt[i] !=
331 swap_crypt_test_page_ref[i]) {
332 panic("encryption test failed");
333 }
334 }
335 /* encrypt in place */
336 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
337 assert(!rc);
338
339 /* decrypt in place */
340 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
341 assert(!rc);
342
343 for (i = 0; i < 4096; i++) {
344 if (swap_crypt_test_page_decrypt[i] !=
345 swap_crypt_test_page_ref[i]) {
346 panic("in place encryption test failed");
347 }
348 }
349 swap_crypt_xts_tested = TRUE;
350 #endif /* DEVELOPMENT || DEBUG */
351 }
352
353
354 void
vm_swap_encrypt(c_segment_t c_seg)355 vm_swap_encrypt(c_segment_t c_seg)
356 {
357 uint8_t *ptr;
358 uint8_t *iv;
359 uint64_t ivnum[2];
360 int size = 0;
361 int rc = 0;
362
363 if (swap_crypt_initialized == FALSE) {
364 swap_crypt_initialize();
365 }
366
367 #if DEVELOPMENT || DEBUG
368 C_SEG_MAKE_WRITEABLE(c_seg);
369 #endif
370 ptr = (uint8_t *)c_seg->c_store.c_buffer;
371 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
372
373 ivnum[0] = (uint64_t)c_seg;
374 ivnum[1] = 0;
375 iv = (uint8_t *)ivnum;
376
377 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
378 assert(!rc);
379
380 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
381
382 #if DEVELOPMENT || DEBUG
383 C_SEG_WRITE_PROTECT(c_seg);
384 #endif
385 }
386
387 void
vm_swap_decrypt(c_segment_t c_seg)388 vm_swap_decrypt(c_segment_t c_seg)
389 {
390 uint8_t *ptr;
391 uint8_t *iv;
392 uint64_t ivnum[2];
393 int size = 0;
394 int rc = 0;
395
396 assert(swap_crypt_initialized);
397
398 #if DEVELOPMENT || DEBUG
399 C_SEG_MAKE_WRITEABLE(c_seg);
400 #endif
401 ptr = (uint8_t *)c_seg->c_store.c_buffer;
402 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
403
404 ivnum[0] = (uint64_t)c_seg;
405 ivnum[1] = 0;
406 iv = (uint8_t *)ivnum;
407
408 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
409 assert(!rc);
410
411 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
412
413 #if DEVELOPMENT || DEBUG
414 C_SEG_WRITE_PROTECT(c_seg);
415 #endif
416 }
417 #endif /* ENCRYPTED_SWAP */
418
419 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
420 extern bool memorystatus_swap_all_apps;
421
422 void
vm_compressor_swap_init_swap_file_limit(void)423 vm_compressor_swap_init_swap_file_limit(void)
424 {
425 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
426 #if CONFIG_JETSAM
427 if (memorystatus_swap_all_apps) {
428 if (vm_swap_volume_capacity == 0) {
429 /*
430 * Early in boot we don't know the swap volume capacity.
431 * That's fine. Reserve space for the maximum config
432 * and we'll lower this later in boot once we have the capacity.
433 */
434 vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
435 } else {
436 static uint64_t kFixedPointFactor = 100;
437 /*
438 * Scale the max number of swap files linearly.
439 * But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
440 */
441 vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
442 * VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
443 vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
444 vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
445 }
446 }
447 #endif /* CONFIG_JETSAM */
448 #if DEVELOPMENT || DEBUG
449 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
450 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
451 if (parsed_vm_max_num_swap_files > 0) {
452 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
453 } else {
454 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
455 }
456 }
457 #endif
458 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
459 }
460
461 int vm_swap_enabled = 0;
462 void
vm_compressor_swap_init(void)463 vm_compressor_swap_init(void)
464 {
465 thread_t thread = NULL;
466
467 queue_init(&swf_global_queue);
468
469 #if !XNU_TARGET_OS_OSX
470 /*
471 * dummy value until the swap file gets created
472 * when we drive the first c_segment_t to the
473 * swapout queue... at that time we will
474 * know the true size we have to work with
475 */
476 c_overage_swapped_limit = 16;
477 #endif /* !XNU_TARGET_OS_OSX */
478
479 compressed_swap_chunk_size = c_seg_bufsize;
480 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
481 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
482 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
483
484 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
485 BASEPRI_VM, &thread) != KERN_SUCCESS) {
486 panic("vm_swapout_thread: create failed");
487 }
488 thread_set_thread_name(thread, "VM_swapout");
489 vm_swapout_thread_id = thread->thread_id;
490 thread_deallocate(thread);
491
492 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
493 BASEPRI_VM, &thread) != KERN_SUCCESS) {
494 panic("vm_swapfile_create_thread: create failed");
495 }
496 thread_set_thread_name(thread, "VM_swapfile_create");
497 thread_deallocate(thread);
498
499 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
500 BASEPRI_VM, &thread) != KERN_SUCCESS) {
501 panic("vm_swapfile_gc_thread: create failed");
502 }
503 thread_set_thread_name(thread, "VM_swapfile_gc");
504 /*
505 * Swapfile garbage collection will need to allocate memory
506 * to complete its swap reclaim and in-memory compaction.
507 * So allow it to dip into the reserved VM page pool.
508 */
509 thread_lock(thread);
510 thread->options |= TH_OPT_VMPRIV;
511 thread_unlock(thread);
512 thread_deallocate(thread);
513 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
514 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
515 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
516 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
517
518 vm_swap_enabled = 1;
519 printf("VM Swap Subsystem is ON\n");
520 }
521
522
523 #if RECORD_THE_COMPRESSED_DATA
524
525 void
c_compressed_record_init()526 c_compressed_record_init()
527 {
528 if (c_compressed_record_init_done == FALSE) {
529 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
530 c_compressed_record_init_done = TRUE;
531 }
532 }
533
534 void
c_compressed_record_write(char * buf,int size)535 c_compressed_record_write(char *buf, int size)
536 {
537 if (c_compressed_record_write_error == 0) {
538 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
539 c_compressed_record_file_offset += size;
540 }
541 }
542 #endif
543
544
545 int compaction_swapper_inited = 0;
546
547 void
vm_compaction_swapper_do_init(void)548 vm_compaction_swapper_do_init(void)
549 {
550 struct vnode *vp;
551 char *pathname;
552 int namelen;
553
554 if (compaction_swapper_inited) {
555 return;
556 }
557
558 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
559 compaction_swapper_inited = 1;
560 return;
561 }
562 lck_mtx_lock(&vm_swap_data_lock);
563
564 if (!compaction_swapper_inited) {
565 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
566 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
567 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
568
569 vm_swapfile_open(pathname, &vp);
570
571 if (vp) {
572 if (vnode_pager_isSSD(vp) == FALSE) {
573 /*
574 * swap files live on an HDD, so let's make sure to start swapping
575 * much earlier since we're not worried about SSD write-wear and
576 * we have so little write bandwidth to work with
577 * these values were derived expermentially by running the performance
578 * teams stock test for evaluating HDD performance against various
579 * combinations and looking and comparing overall results.
580 * Note that the > relationship between these 4 values must be maintained
581 */
582 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
583 vm_compressor_minorcompact_threshold_divisor = 15;
584 }
585 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
586 vm_compressor_majorcompact_threshold_divisor = 18;
587 }
588 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
589 vm_compressor_unthrottle_threshold_divisor = 24;
590 }
591 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
592 vm_compressor_catchup_threshold_divisor = 30;
593 }
594 }
595 #if XNU_TARGET_OS_OSX
596 vnode_setswapmount(vp);
597 vm_swappin_avail = vnode_getswappin_avail(vp);
598
599 if (vm_swappin_avail) {
600 vm_swappin_enabled = TRUE;
601 }
602 #endif /* XNU_TARGET_OS_OSX */
603 vm_swapfile_close((uint64_t)pathname, vp);
604 }
605 kfree_data(pathname, namelen);
606
607 compaction_swapper_inited = 1;
608 }
609 lck_mtx_unlock(&vm_swap_data_lock);
610 }
611
612
613 void
vm_swap_consider_defragmenting(int flags)614 vm_swap_consider_defragmenting(int flags)
615 {
616 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
617 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
618
619 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
620 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
621 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
622 lck_mtx_lock(&vm_swap_data_lock);
623
624 if (force_defrag) {
625 vm_swap_force_defrag = TRUE;
626 }
627
628 if (force_reclaim) {
629 vm_swap_force_reclaim = TRUE;
630 }
631
632 if (!vm_swapfile_gc_thread_running) {
633 thread_wakeup((event_t) &vm_swapfile_gc_needed);
634 }
635
636 lck_mtx_unlock(&vm_swap_data_lock);
637 }
638 }
639 }
640
641
642 int vm_swap_defragment_yielded = 0;
643 int vm_swap_defragment_swapin = 0;
644 int vm_swap_defragment_free = 0;
645 int vm_swap_defragment_busy = 0;
646
647 #if CONFIG_FREEZE
648 extern int32_t c_segment_pages_compressed_incore;
649 extern int32_t c_segment_pages_compressed_incore_late_swapout;
650 extern uint32_t c_segment_pages_compressed_nearing_limit;
651 extern uint32_t c_segment_count;
652 extern uint32_t c_segments_nearing_limit;
653
654 extern bool freezer_incore_cseg_acct;
655 #endif /* CONFIG_FREEZE */
656
657 static void
vm_swap_defragment()658 vm_swap_defragment()
659 {
660 c_segment_t c_seg;
661
662 /*
663 * have to grab the master lock w/o holding
664 * any locks in spin mode
665 */
666 PAGE_REPLACEMENT_DISALLOWED(TRUE);
667
668 lck_mtx_lock_spin_always(c_list_lock);
669
670 while (!queue_empty(&c_swappedout_sparse_list_head)) {
671 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
672 vm_swap_defragment_yielded++;
673 break;
674 }
675 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
676
677 lck_mtx_lock_spin_always(&c_seg->c_lock);
678
679 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
680
681 if (c_seg->c_busy) {
682 lck_mtx_unlock_always(c_list_lock);
683
684 PAGE_REPLACEMENT_DISALLOWED(FALSE);
685 /*
686 * c_seg_wait_on_busy consumes c_seg->c_lock
687 */
688 c_seg_wait_on_busy(c_seg);
689
690 PAGE_REPLACEMENT_DISALLOWED(TRUE);
691
692 lck_mtx_lock_spin_always(c_list_lock);
693
694 vm_swap_defragment_busy++;
695 continue;
696 }
697 if (c_seg->c_bytes_used == 0) {
698 /*
699 * c_seg_free_locked consumes the c_list_lock
700 * and c_seg->c_lock
701 */
702 C_SEG_BUSY(c_seg);
703 c_seg_free_locked(c_seg);
704
705 vm_swap_defragment_free++;
706 } else {
707 lck_mtx_unlock_always(c_list_lock);
708
709 #if CONFIG_FREEZE
710 if (freezer_incore_cseg_acct) {
711 /*
712 * TODO(jason): These two are tricky because they're pre-emptive jetsams.
713 * The system is not unhealthy, but we know that it's about to become unhealthy once
714 * we do this swapin.
715 * So we're waking up the memorystatus thread to make space
716 * (hopefully) before this segment comes in.
717 *
718 * I think the compressor_backing_store needs to keep track of
719 * two new globals that will track the number of segments
720 * being swapped in due to defrag and the number of slots used
721 * in those segments.
722 * Then the health check below can be called from the memorystatus
723 * thread.
724 */
725 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
726 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
727 }
728
729 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
730 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
731 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
732 }
733 }
734 #endif /* CONFIG_FREEZE */
735 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
736 lck_mtx_unlock_always(&c_seg->c_lock);
737 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
738 }
739
740 vm_swap_defragment_swapin++;
741 }
742 PAGE_REPLACEMENT_DISALLOWED(FALSE);
743
744 vm_pageout_io_throttle();
745
746 /*
747 * because write waiters have privilege over readers,
748 * dropping and immediately retaking the master lock will
749 * still allow any thread waiting to acquire the
750 * master lock exclusively an opportunity to take it
751 */
752 PAGE_REPLACEMENT_DISALLOWED(TRUE);
753
754 lck_mtx_lock_spin_always(c_list_lock);
755 }
756 lck_mtx_unlock_always(c_list_lock);
757
758 PAGE_REPLACEMENT_DISALLOWED(FALSE);
759 }
760
761 TUNABLE(uint64_t, vm_swapfile_creation_delay_ns, "vm_swapfile_creation_delay_ns", 15 * NSEC_PER_SEC);
762
763 static inline bool
vm_swapfile_should_create(uint64_t now)764 vm_swapfile_should_create(uint64_t now)
765 {
766 uint64_t delta_failed_creation_ns;
767 absolutetime_to_nanoseconds(now - vm_swapfile_last_failed_to_create_ts, &delta_failed_creation_ns);
768
769 return (vm_num_swap_files < vm_num_swap_files_config) &&
770 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) &&
771 (delta_failed_creation_ns > vm_swapfile_creation_delay_ns);
772 }
773
774 bool vm_swapfile_create_thread_inited = false;
775
776 static void
vm_swapfile_create_thread(void)777 vm_swapfile_create_thread(void)
778 {
779 uint64_t now;
780
781 if (!vm_swapfile_create_thread_inited) {
782 #if CONFIG_THREAD_GROUPS
783 thread_group_vm_add();
784 #endif /* CONFIG_THREAD_GROUPS */
785 current_thread()->options |= TH_OPT_VMPRIV;
786
787 vm_swapfile_create_thread_inited = true;
788 }
789
790 vm_swapfile_create_thread_awakened++;
791 vm_swapfile_create_thread_running = 1;
792
793 while (TRUE) {
794 /*
795 * walk through the list of swap files
796 * and do the delayed frees/trims for
797 * any swap file whose count of delayed
798 * frees is above the batch limit
799 */
800 vm_swap_handle_delayed_trims(FALSE);
801
802 lck_mtx_lock(&vm_swap_data_lock);
803
804 if (hibernate_in_progress_with_pinned_swap == TRUE) {
805 break;
806 }
807
808 if (compressor_store_stop_compaction == TRUE) {
809 break;
810 }
811
812 now = mach_absolute_time();
813
814 if (!vm_swapfile_should_create(now)) {
815 break;
816 }
817
818 lck_mtx_unlock(&vm_swap_data_lock);
819
820 if (vm_swap_create_file() == FALSE) {
821 vm_swapfile_last_failed_to_create_ts = now;
822 HIBLOG("low swap: failed to create swapfile\n");
823 } else {
824 vm_swapfile_last_successful_create_ts = now;
825 }
826 }
827 vm_swapfile_create_thread_running = 0;
828
829 if (hibernate_in_progress_with_pinned_swap == TRUE) {
830 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
831 }
832
833 if (compressor_store_stop_compaction == TRUE) {
834 thread_wakeup((event_t)&compressor_store_stop_compaction);
835 }
836
837 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
838
839 lck_mtx_unlock(&vm_swap_data_lock);
840
841 thread_block((thread_continue_t)vm_swapfile_create_thread);
842
843 /* NOTREACHED */
844 }
845
846
847 #if HIBERNATION
848
849 kern_return_t
hibernate_pin_swap(boolean_t start)850 hibernate_pin_swap(boolean_t start)
851 {
852 vm_compaction_swapper_do_init();
853
854 if (start == FALSE) {
855 lck_mtx_lock(&vm_swap_data_lock);
856 hibernate_in_progress_with_pinned_swap = FALSE;
857 lck_mtx_unlock(&vm_swap_data_lock);
858
859 return KERN_SUCCESS;
860 }
861 if (vm_swappin_enabled == FALSE) {
862 return KERN_SUCCESS;
863 }
864
865 lck_mtx_lock(&vm_swap_data_lock);
866
867 hibernate_in_progress_with_pinned_swap = TRUE;
868
869 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
870 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
871
872 lck_mtx_unlock(&vm_swap_data_lock);
873
874 thread_block(THREAD_CONTINUE_NULL);
875
876 lck_mtx_lock(&vm_swap_data_lock);
877 }
878 if (vm_num_swap_files > vm_num_pinned_swap_files) {
879 hibernate_in_progress_with_pinned_swap = FALSE;
880 lck_mtx_unlock(&vm_swap_data_lock);
881
882 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
883 vm_num_swap_files, vm_num_pinned_swap_files);
884 return KERN_FAILURE;
885 }
886 lck_mtx_unlock(&vm_swap_data_lock);
887
888 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
889 if (vm_swap_create_file() == FALSE) {
890 break;
891 }
892 }
893 return KERN_SUCCESS;
894 }
895 #endif
896 bool vm_swapfile_gc_thread_inited = false;
897 static void
vm_swapfile_gc_thread(void)898 vm_swapfile_gc_thread(void)
899 {
900 boolean_t need_defragment;
901 boolean_t need_reclaim;
902
903 if (!vm_swapfile_gc_thread_inited) {
904 #if CONFIG_THREAD_GROUPS
905 thread_group_vm_add();
906 #endif /* CONFIG_THREAD_GROUPS */
907 vm_swapfile_gc_thread_inited = true;
908 }
909
910 vm_swapfile_gc_thread_awakened++;
911 vm_swapfile_gc_thread_running = 1;
912
913 while (TRUE) {
914 lck_mtx_lock(&vm_swap_data_lock);
915
916 if (hibernate_in_progress_with_pinned_swap == TRUE) {
917 break;
918 }
919
920 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
921 break;
922 }
923
924 need_defragment = FALSE;
925 need_reclaim = FALSE;
926
927 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
928 need_defragment = TRUE;
929 }
930
931 if (VM_SWAP_SHOULD_RECLAIM()) {
932 need_defragment = TRUE;
933 need_reclaim = TRUE;
934 }
935 if (need_defragment == FALSE && need_reclaim == FALSE) {
936 break;
937 }
938
939 vm_swap_force_defrag = FALSE;
940 vm_swap_force_reclaim = FALSE;
941
942 lck_mtx_unlock(&vm_swap_data_lock);
943
944 if (need_defragment == TRUE) {
945 vm_swap_defragment();
946 }
947 if (need_reclaim == TRUE) {
948 vm_swap_reclaim();
949 }
950 }
951 vm_swapfile_gc_thread_running = 0;
952
953 if (hibernate_in_progress_with_pinned_swap == TRUE) {
954 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
955 }
956
957 if (compressor_store_stop_compaction == TRUE) {
958 thread_wakeup((event_t)&compressor_store_stop_compaction);
959 }
960
961 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
962
963 lck_mtx_unlock(&vm_swap_data_lock);
964
965 thread_block((thread_continue_t)vm_swapfile_gc_thread);
966
967 /* NOTREACHED */
968 }
969
970
971
972 #define VM_SWAPOUT_LIMIT_T2P 4
973 #define VM_SWAPOUT_LIMIT_T1P 4
974 #define VM_SWAPOUT_LIMIT_T0P 6
975 #define VM_SWAPOUT_LIMIT_T0 8
976 #define VM_SWAPOUT_LIMIT_MAX 8
977
978 #define VM_SWAPOUT_START 0
979 #define VM_SWAPOUT_T2_PASSIVE 1
980 #define VM_SWAPOUT_T1_PASSIVE 2
981 #define VM_SWAPOUT_T0_PASSIVE 3
982 #define VM_SWAPOUT_T0 4
983
984 int vm_swapout_state = VM_SWAPOUT_START;
985 int vm_swapout_limit = 1;
986
987 int vm_swapper_entered_T0 = 0;
988 int vm_swapper_entered_T0P = 0;
989 int vm_swapper_entered_T1P = 0;
990 int vm_swapper_entered_T2P = 0;
991
992
993 static void
vm_swapout_thread_throttle_adjust(void)994 vm_swapout_thread_throttle_adjust(void)
995 {
996 switch (vm_swapout_state) {
997 case VM_SWAPOUT_START:
998
999 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1000 vm_swapper_entered_T2P++;
1001
1002 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1003 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1004 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1005 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1006 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1007 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1008
1009 break;
1010
1011 case VM_SWAPOUT_T2_PASSIVE:
1012
1013 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1014 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1015 vm_swapper_entered_T0P++;
1016
1017 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1018 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1019 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1020 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1021 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1022 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1023
1024 break;
1025 }
1026 if (swapout_target_age || hibernate_flushing == TRUE) {
1027 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
1028 vm_swapper_entered_T1P++;
1029
1030 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1031 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1032 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1033 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1034 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
1035 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
1036 }
1037 break;
1038
1039 case VM_SWAPOUT_T1_PASSIVE:
1040
1041 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1042 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1043 vm_swapper_entered_T0P++;
1044
1045 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1046 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1047 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1048 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1049 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1050 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1051
1052 break;
1053 }
1054 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
1055 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1056 vm_swapper_entered_T2P++;
1057
1058 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1059 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1060 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1061 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1062 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1063 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1064 }
1065 break;
1066
1067 case VM_SWAPOUT_T0_PASSIVE:
1068
1069 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1070 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1071 vm_swapper_entered_T2P++;
1072
1073 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1074 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1075 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1076 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1077 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1078 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1079
1080 break;
1081 }
1082 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1083 vm_swapper_entered_T0++;
1084
1085 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1086 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1087 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1088 vm_swapout_state = VM_SWAPOUT_T0;
1089 }
1090 break;
1091
1092 case VM_SWAPOUT_T0:
1093
1094 if (SWAPPER_HAS_CAUGHTUP()) {
1095 vm_swapper_entered_T0P++;
1096
1097 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1098 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1099 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1100 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1101 }
1102 break;
1103 }
1104 }
1105
1106 int vm_swapout_found_empty = 0;
1107
1108 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1109
1110 int vm_swapout_soc_busy = 0;
1111 int vm_swapout_soc_done = 0;
1112
1113
1114 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1115 vm_swapout_find_free_soc(void)
1116 {
1117 int i;
1118
1119 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1120 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1121 return &vm_swapout_ctx[i];
1122 }
1123 }
1124 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1125
1126 return NULL;
1127 }
1128
1129 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1130 vm_swapout_find_done_soc(void)
1131 {
1132 int i;
1133
1134 if (vm_swapout_soc_done) {
1135 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1136 if (vm_swapout_ctx[i].swp_io_done) {
1137 return &vm_swapout_ctx[i];
1138 }
1139 }
1140 }
1141 return NULL;
1142 }
1143
1144 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1145 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1146 {
1147 kern_return_t kr;
1148
1149 if (soc->swp_io_error) {
1150 kr = KERN_FAILURE;
1151 } else {
1152 kr = KERN_SUCCESS;
1153 }
1154
1155 lck_mtx_unlock_always(c_list_lock);
1156
1157 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1158 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1159
1160 lck_mtx_lock_spin_always(c_list_lock);
1161
1162 soc->swp_io_done = 0;
1163 soc->swp_io_busy = 0;
1164
1165 vm_swapout_soc_busy--;
1166 vm_swapout_soc_done--;
1167 }
1168
1169 bool vm_swapout_thread_inited = false;
1170 extern uint32_t c_donate_swapout_count;
1171 #if CONFIG_JETSAM
1172 bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
1173 /*
1174 * swapout_sleep_threshold sets the percentage of the swapout threshold at which
1175 * the swap thread will stop processing the swapout queue.
1176 * By default this is 90 which means we will swap until the
1177 * swapout queue size is at 90% of the threshold to wake the swap thread.
1178 * By definition the queue length must be >= 100% of the threshold when the.
1179 * swap thread is woken up. On development builds this can be adjusted with
1180 * the vm.swapout_sleep_threshold sysctl.
1181 */
1182 uint32_t swapout_sleep_threshold = 90;
1183 #endif /* CONFIG_JETSAM */
1184 static bool
should_process_swapout_queue(const queue_head_t * swapout_list_head)1185 should_process_swapout_queue(const queue_head_t *swapout_list_head)
1186 {
1187 bool process_queue = !queue_empty(swapout_list_head) &&
1188 vm_swapout_soc_busy < vm_swapout_limit &&
1189 !compressor_store_stop_compaction;
1190 #if CONFIG_JETSAM
1191 if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
1192 process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
1193 }
1194 #endif /* CONFIG_JETSAM */
1195 return process_queue;
1196 }
1197
1198 void
vm_swapout_thread(void)1199 vm_swapout_thread(void)
1200 {
1201 uint32_t size = 0;
1202 c_segment_t c_seg = NULL;
1203 kern_return_t kr = KERN_SUCCESS;
1204 struct swapout_io_completion *soc;
1205 queue_head_t *swapout_list_head;
1206 bool queues_empty = false;
1207
1208 if (!vm_swapout_thread_inited) {
1209 #if CONFIG_THREAD_GROUPS
1210 thread_group_vm_add();
1211 #endif /* CONFIG_THREAD_GROUPS */
1212 current_thread()->options |= TH_OPT_VMPRIV;
1213 vm_swapout_thread_inited = true;
1214 }
1215
1216 vm_swapout_thread_awakened++;
1217
1218 lck_mtx_lock_spin_always(c_list_lock);
1219
1220 swapout_list_head = &c_early_swapout_list_head;
1221 vm_swapout_thread_running = TRUE;
1222 os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
1223 again:
1224 while (should_process_swapout_queue(swapout_list_head)) {
1225 c_seg = (c_segment_t)queue_first(swapout_list_head);
1226
1227 lck_mtx_lock_spin_always(&c_seg->c_lock);
1228
1229 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1230
1231 if (c_seg->c_busy) {
1232 lck_mtx_unlock_always(c_list_lock);
1233
1234 c_seg_wait_on_busy(c_seg);
1235
1236 lck_mtx_lock_spin_always(c_list_lock);
1237
1238 continue;
1239 }
1240 vm_swapout_thread_processed_segments++;
1241
1242 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1243
1244 if (size == 0) {
1245 assert(c_seg->c_bytes_used == 0);
1246
1247 /*
1248 * c_seg_free_locked will drop the c_list_lock and
1249 * the c_seg->c_lock.
1250 */
1251 C_SEG_BUSY(c_seg);
1252 c_seg_free_locked(c_seg);
1253 c_seg = NULL;
1254
1255 vm_swapout_found_empty++;
1256 goto c_seg_is_empty;
1257 }
1258 C_SEG_BUSY(c_seg);
1259 c_seg->c_busy_swapping = 1;
1260
1261 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1262
1263 lck_mtx_unlock_always(c_list_lock);
1264 lck_mtx_unlock_always(&c_seg->c_lock);
1265
1266 #if CHECKSUM_THE_SWAP
1267 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1268 c_seg->cseg_swap_size = size;
1269 #endif /* CHECKSUM_THE_SWAP */
1270
1271 #if ENCRYPTED_SWAP
1272 vm_swap_encrypt(c_seg);
1273 #endif /* ENCRYPTED_SWAP */
1274
1275 soc = vm_swapout_find_free_soc();
1276 assert(soc);
1277
1278 soc->swp_upl_ctx.io_context = (void *)soc;
1279 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1280 soc->swp_upl_ctx.io_error = 0;
1281
1282 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1283
1284 if (kr != KERN_SUCCESS) {
1285 if (soc->swp_io_done) {
1286 lck_mtx_lock_spin_always(c_list_lock);
1287
1288 soc->swp_io_done = 0;
1289 vm_swapout_soc_done--;
1290
1291 lck_mtx_unlock_always(c_list_lock);
1292 }
1293 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1294 } else {
1295 soc->swp_io_busy = 1;
1296 vm_swapout_soc_busy++;
1297 }
1298
1299 c_seg_is_empty:
1300 if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
1301 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1302 }
1303
1304 lck_mtx_lock_spin_always(c_list_lock);
1305
1306 while ((soc = vm_swapout_find_done_soc())) {
1307 vm_swapout_complete_soc(soc);
1308 }
1309 lck_mtx_unlock_always(c_list_lock);
1310
1311 vm_swapout_thread_throttle_adjust();
1312
1313 lck_mtx_lock_spin_always(c_list_lock);
1314 }
1315 while ((soc = vm_swapout_find_done_soc())) {
1316 vm_swapout_complete_soc(soc);
1317 }
1318 lck_mtx_unlock_always(c_list_lock);
1319
1320 vm_pageout_io_throttle();
1321
1322 lck_mtx_lock_spin_always(c_list_lock);
1323
1324 /*
1325 * Recheck if we have some c_segs to wakeup
1326 * post throttle. And, check to see if we
1327 * have any more swapouts needed.
1328 */
1329 if (vm_swapout_soc_done) {
1330 goto again;
1331 }
1332
1333 #if XNU_TARGET_OS_OSX
1334 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1335 #else /* XNU_TARGET_OS_OSX */
1336 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1337 #endif /* XNU_TARGET_OS_OSX */
1338
1339 if (!queues_empty) {
1340 swapout_list_head = NULL;
1341 if (!queue_empty(&c_early_swapout_list_head)) {
1342 swapout_list_head = &c_early_swapout_list_head;
1343 } else {
1344 #if XNU_TARGET_OS_OSX
1345 /*
1346 * On macOS we _always_ processs all swapout queues.
1347 */
1348 if (!queue_empty(&c_regular_swapout_list_head)) {
1349 swapout_list_head = &c_regular_swapout_list_head;
1350 } else {
1351 swapout_list_head = &c_late_swapout_list_head;
1352 }
1353 #else /* XNU_TARGET_OS_OSX */
1354 /*
1355 * On non-macOS swap-capable platforms, we might want to
1356 * processs just the early queue (Freezer) or process both
1357 * early and late queues (app swap). We processed the early
1358 * queue up above. The late Q will only be processed if the
1359 * checks in should_process_swapout_queue give the go-ahead.
1360 */
1361 swapout_list_head = &c_late_swapout_list_head;
1362 #endif /* XNU_TARGET_OS_OSX */
1363 }
1364 if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
1365 goto again;
1366 }
1367 }
1368
1369 assert_wait((event_t)&vm_swapout_thread, THREAD_UNINT);
1370
1371 vm_swapout_thread_running = FALSE;
1372
1373 lck_mtx_unlock_always(c_list_lock);
1374
1375 thread_block((thread_continue_t)vm_swapout_thread);
1376
1377 /* NOTREACHED */
1378 }
1379
1380
1381 void
vm_swapout_iodone(void * io_context,int error)1382 vm_swapout_iodone(void *io_context, int error)
1383 {
1384 struct swapout_io_completion *soc;
1385
1386 soc = (struct swapout_io_completion *)io_context;
1387
1388 lck_mtx_lock_spin_always(c_list_lock);
1389
1390 soc->swp_io_done = 1;
1391 soc->swp_io_error = error;
1392 vm_swapout_soc_done++;
1393
1394 if (!vm_swapout_thread_running) {
1395 thread_wakeup((event_t)&vm_swapout_thread);
1396 }
1397
1398 lck_mtx_unlock_always(c_list_lock);
1399 }
1400
1401
1402 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1403 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1404 {
1405 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1406
1407 if (kr == KERN_SUCCESS) {
1408 kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
1409 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1410 }
1411 #if ENCRYPTED_SWAP
1412 else {
1413 vm_swap_decrypt(c_seg);
1414 }
1415 #endif /* ENCRYPTED_SWAP */
1416 lck_mtx_lock_spin_always(c_list_lock);
1417 lck_mtx_lock_spin_always(&c_seg->c_lock);
1418
1419 if (kr == KERN_SUCCESS) {
1420 int new_state = C_ON_SWAPPEDOUT_Q;
1421 boolean_t insert_head = FALSE;
1422
1423 if (hibernate_flushing == TRUE) {
1424 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1425 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1426 insert_head = TRUE;
1427 }
1428 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1429 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1430 }
1431
1432 c_seg_switch_state(c_seg, new_state, insert_head);
1433
1434 c_seg->c_store.c_swap_handle = f_offset;
1435
1436 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1437
1438 c_seg->c_swappedin = false;
1439
1440 if (c_seg->c_bytes_used) {
1441 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1442 }
1443
1444 #if CONFIG_FREEZE
1445 /*
1446 * Successful swapout. Decrement the in-core compressed pages count.
1447 */
1448 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1449 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1450 if (c_seg->c_has_donated_pages) {
1451 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore_late_swapout);
1452 }
1453 #endif /* CONFIG_FREEZE */
1454 } else {
1455 if (c_seg->c_overage_swap == TRUE) {
1456 c_seg->c_overage_swap = FALSE;
1457 c_overage_swapped_count--;
1458 }
1459
1460 #if CONFIG_FREEZE
1461 if (c_seg->c_has_freezer_pages) {
1462 if (c_seg->c_task_owner) {
1463 c_seg_update_task_owner(c_seg, NULL);
1464 }
1465 /*
1466 * We failed to swapout a frozen cseg. We need
1467 * to put it back in the queues, specifically the
1468 * AGE_Q. So clear the donated bit otherwise it'll
1469 * land on the swapped_in Q.
1470 */
1471 c_seg->c_has_donated_pages = 0;
1472 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1473 } else
1474 #endif /* CONFIG_FREEZE */
1475 {
1476 if (c_seg->c_has_donated_pages) {
1477 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
1478 } else {
1479 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1480 }
1481 }
1482
1483 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1484 c_seg_need_delayed_compaction(c_seg, TRUE);
1485 }
1486 }
1487 assert(c_seg->c_busy_swapping);
1488 assert(c_seg->c_busy);
1489
1490 c_seg->c_busy_swapping = 0;
1491 lck_mtx_unlock_always(c_list_lock);
1492
1493 C_SEG_WAKEUP_DONE(c_seg);
1494 lck_mtx_unlock_always(&c_seg->c_lock);
1495
1496 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1497 }
1498
1499
1500 boolean_t
vm_swap_create_file()1501 vm_swap_create_file()
1502 {
1503 uint64_t size = 0;
1504 int namelen = 0;
1505 boolean_t swap_file_created = FALSE;
1506 boolean_t swap_file_reuse = FALSE;
1507 boolean_t swap_file_pin = FALSE;
1508 struct swapfile *swf = NULL;
1509
1510 /*
1511 * make sure we've got all the info we need
1512 * to potentially pin a swap file... we could
1513 * be swapping out due to hibernation w/o ever
1514 * having run vm_pageout_scan, which is normally
1515 * the trigger to do the init
1516 */
1517 vm_compaction_swapper_do_init();
1518
1519 /*
1520 * Any swapfile structure ready for re-use?
1521 */
1522
1523 lck_mtx_lock(&vm_swap_data_lock);
1524
1525 swf = (struct swapfile*) queue_first(&swf_global_queue);
1526
1527 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1528 if (swf->swp_flags == SWAP_REUSE) {
1529 swap_file_reuse = TRUE;
1530 break;
1531 }
1532 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1533 }
1534
1535 lck_mtx_unlock(&vm_swap_data_lock);
1536
1537 if (swap_file_reuse == FALSE) {
1538 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1539
1540 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1541 swf->swp_index = vm_num_swap_files + 1;
1542 swf->swp_pathlen = namelen;
1543 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1544
1545 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1546 }
1547
1548 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1549
1550 if (swf->swp_vp == NULL) {
1551 if (swap_file_reuse == FALSE) {
1552 kfree_data(swf->swp_path, swf->swp_pathlen);
1553 kfree_type(struct swapfile, swf);
1554 }
1555 return FALSE;
1556 }
1557 vm_swapfile_can_be_created = TRUE;
1558
1559 size = MAX_SWAP_FILE_SIZE;
1560
1561 while (size >= MIN_SWAP_FILE_SIZE) {
1562 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1563
1564 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1565 int num_bytes_for_bitmap = 0;
1566
1567 swap_file_created = TRUE;
1568
1569 swf->swp_size = size;
1570 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1571 swf->swp_nseginuse = 0;
1572 swf->swp_free_hint = 0;
1573
1574 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1575 /*
1576 * Allocate a bitmap that describes the
1577 * number of segments held by this swapfile.
1578 */
1579 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1580 Z_WAITOK | Z_ZERO);
1581
1582 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1583 Z_WAITOK | Z_ZERO);
1584
1585 /*
1586 * passing a NULL trim_list into vnode_trim_list
1587 * will return ENOTSUP if trim isn't supported
1588 * and 0 if it is
1589 */
1590 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1591 swp_trim_supported = TRUE;
1592 }
1593
1594 lck_mtx_lock(&vm_swap_data_lock);
1595
1596 swf->swp_flags = SWAP_READY;
1597
1598 if (swap_file_reuse == FALSE) {
1599 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1600 }
1601
1602 vm_num_swap_files++;
1603
1604 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1605 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1606 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1607 }
1608
1609 if (swap_file_pin == TRUE) {
1610 vm_num_pinned_swap_files++;
1611 swf->swp_flags |= SWAP_PINNED;
1612 vm_swappin_avail -= swf->swp_size;
1613 }
1614
1615 lck_mtx_unlock(&vm_swap_data_lock);
1616
1617 thread_wakeup((event_t) &vm_num_swap_files);
1618 #if !XNU_TARGET_OS_OSX
1619 if (vm_num_swap_files == 1) {
1620 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1621
1622 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1623 c_overage_swapped_limit /= 2;
1624 }
1625 }
1626 #endif /* !XNU_TARGET_OS_OSX */
1627 break;
1628 } else {
1629 size = size / 2;
1630 }
1631 }
1632 if (swap_file_created == FALSE) {
1633 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1634
1635 swf->swp_vp = NULL;
1636
1637 if (swap_file_reuse == FALSE) {
1638 kfree_data(swf->swp_path, swf->swp_pathlen);
1639 kfree_type(struct swapfile, swf);
1640 }
1641 }
1642 return swap_file_created;
1643 }
1644
1645 extern void vnode_put(struct vnode* vp);
1646 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1647 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1648 {
1649 struct swapfile *swf = NULL;
1650 uint64_t file_offset = 0;
1651 int retval = 0;
1652
1653 assert(c_seg->c_store.c_buffer);
1654
1655 lck_mtx_lock(&vm_swap_data_lock);
1656
1657 swf = vm_swapfile_for_handle(f_offset);
1658
1659 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1660 vm_swap_get_failures++;
1661 retval = 1;
1662 goto done;
1663 }
1664 swf->swp_io_count++;
1665
1666 lck_mtx_unlock(&vm_swap_data_lock);
1667
1668 #if DEVELOPMENT || DEBUG
1669 C_SEG_MAKE_WRITEABLE(c_seg);
1670 #endif
1671 file_offset = (f_offset & SWAP_SLOT_MASK);
1672
1673 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1674 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1675 } else {
1676 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1677 vnode_put(swf->swp_vp);
1678 }
1679
1680 #if DEVELOPMENT || DEBUG
1681 C_SEG_WRITE_PROTECT(c_seg);
1682 #endif
1683 if (retval == 0) {
1684 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1685 } else {
1686 vm_swap_get_failures++;
1687 }
1688
1689 /*
1690 * Free this slot in the swap structure.
1691 */
1692 vm_swap_free(f_offset);
1693
1694 lck_mtx_lock(&vm_swap_data_lock);
1695 swf->swp_io_count--;
1696
1697 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1698 swf->swp_flags &= ~SWAP_WANTED;
1699 thread_wakeup((event_t) &swf->swp_flags);
1700 }
1701 done:
1702 lck_mtx_unlock(&vm_swap_data_lock);
1703
1704 if (retval == 0) {
1705 return KERN_SUCCESS;
1706 } else {
1707 return KERN_FAILURE;
1708 }
1709 }
1710
1711 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1712 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1713 {
1714 unsigned int segidx = 0;
1715 struct swapfile *swf = NULL;
1716 uint64_t file_offset = 0;
1717 uint64_t swapfile_index = 0;
1718 unsigned int byte_for_segidx = 0;
1719 unsigned int offset_within_byte = 0;
1720 boolean_t swf_eligible = FALSE;
1721 boolean_t waiting = FALSE;
1722 boolean_t retried = FALSE;
1723 int error = 0;
1724 uint64_t now;
1725 void *upl_ctx = NULL;
1726 boolean_t drop_iocount = FALSE;
1727
1728 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1729 return KERN_FAILURE;
1730 }
1731 retry:
1732 lck_mtx_lock(&vm_swap_data_lock);
1733
1734 swf = (struct swapfile*) queue_first(&swf_global_queue);
1735
1736 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1737 segidx = swf->swp_free_hint;
1738
1739 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1740
1741 if (swf_eligible) {
1742 while (segidx < swf->swp_nsegs) {
1743 byte_for_segidx = segidx >> 3;
1744 offset_within_byte = segidx % 8;
1745
1746 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1747 segidx++;
1748 continue;
1749 }
1750
1751 (swf->swp_bitmap)[byte_for_segidx] |= (uint8_t)(1 << offset_within_byte);
1752
1753 file_offset = segidx * compressed_swap_chunk_size;
1754 swf->swp_nseginuse++;
1755 swf->swp_io_count++;
1756 swf->swp_csegs[segidx] = c_seg;
1757
1758 swapfile_index = swf->swp_index;
1759 vm_swapfile_total_segs_used++;
1760 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1761 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1762 }
1763
1764 now = mach_absolute_time();
1765
1766 if (vm_swapfile_should_create(now) && !vm_swapfile_create_thread_running) {
1767 thread_wakeup((event_t) &vm_swapfile_create_needed);
1768 }
1769
1770 lck_mtx_unlock(&vm_swap_data_lock);
1771
1772 goto issue_io;
1773 }
1774 }
1775 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1776 }
1777 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1778
1779 /*
1780 * we've run out of swap segments, but may not
1781 * be in a position to immediately create a new swap
1782 * file if we've recently failed to create due to a lack
1783 * of free space in the root filesystem... we'll try
1784 * to kick that create off, but in any event we're going
1785 * to take a breather (up to 1 second) so that we're not caught in a tight
1786 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1787 * segments into swap files only to have them immediately put back
1788 * on the c_age queue due to vm_swap_put failing.
1789 *
1790 * if we're doing these puts due to a hibernation flush,
1791 * no need to block... setting hibernate_no_swapspace to TRUE,
1792 * will cause "vm_compressor_compact_and_swap" to immediately abort
1793 */
1794 now = mach_absolute_time();
1795
1796 if (vm_swapfile_should_create(now)) {
1797 if (!vm_swapfile_create_thread_running) {
1798 thread_wakeup((event_t) &vm_swapfile_create_needed);
1799 }
1800 waiting = TRUE;
1801 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1802 } else {
1803 if (hibernate_flushing) {
1804 hibernate_no_swapspace = TRUE;
1805 }
1806 }
1807
1808 lck_mtx_unlock(&vm_swap_data_lock);
1809
1810 if (waiting == TRUE) {
1811 thread_block(THREAD_CONTINUE_NULL);
1812
1813 if (retried == FALSE && hibernate_flushing == TRUE) {
1814 retried = TRUE;
1815 goto retry;
1816 }
1817 }
1818 vm_swap_put_failures_no_swap_file++;
1819
1820 return KERN_FAILURE;
1821
1822 issue_io:
1823 assert(c_seg->c_busy_swapping);
1824 assert(c_seg->c_busy);
1825 assert(!c_seg->c_on_minorcompact_q);
1826
1827 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1828
1829 if (soc) {
1830 soc->swp_c_seg = c_seg;
1831 soc->swp_c_size = size;
1832
1833 soc->swp_swf = swf;
1834
1835 soc->swp_io_error = 0;
1836 soc->swp_io_done = 0;
1837
1838 upl_ctx = (void *)&soc->swp_upl_ctx;
1839 }
1840
1841 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1842 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1843 } else {
1844 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1845 drop_iocount = TRUE;
1846 }
1847
1848 if (error || upl_ctx == NULL) {
1849 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1850 }
1851
1852 return KERN_SUCCESS;
1853 }
1854
1855 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1856 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1857 {
1858 if (drop_iocount) {
1859 vnode_put(swf->swp_vp);
1860 }
1861
1862 lck_mtx_lock(&vm_swap_data_lock);
1863
1864 swf->swp_io_count--;
1865
1866 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1867 swf->swp_flags &= ~SWAP_WANTED;
1868 thread_wakeup((event_t) &swf->swp_flags);
1869 }
1870 lck_mtx_unlock(&vm_swap_data_lock);
1871
1872 if (error) {
1873 vm_swap_free(*f_offset);
1874 vm_swap_put_failures++;
1875
1876 return KERN_FAILURE;
1877 }
1878 return KERN_SUCCESS;
1879 }
1880
1881
1882 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1883 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1884 {
1885 uint64_t file_offset = 0;
1886 unsigned int segidx = 0;
1887
1888
1889 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1890 unsigned int byte_for_segidx = 0;
1891 unsigned int offset_within_byte = 0;
1892
1893 file_offset = (f_offset & SWAP_SLOT_MASK);
1894 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1895
1896 byte_for_segidx = segidx >> 3;
1897 offset_within_byte = segidx % 8;
1898
1899 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1900 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1901
1902 swf->swp_csegs[segidx] = NULL;
1903
1904 swf->swp_nseginuse--;
1905 vm_swapfile_total_segs_used--;
1906
1907 if (segidx < swf->swp_free_hint) {
1908 swf->swp_free_hint = segidx;
1909 }
1910 }
1911 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1912 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1913 }
1914 }
1915 }
1916
1917
1918 uint32_t vm_swap_free_now_count = 0;
1919 uint32_t vm_swap_free_delayed_count = 0;
1920
1921
1922 void
vm_swap_free(uint64_t f_offset)1923 vm_swap_free(uint64_t f_offset)
1924 {
1925 struct swapfile *swf = NULL;
1926 struct trim_list *tl = NULL;
1927 uint64_t now;
1928
1929 if (swp_trim_supported == TRUE) {
1930 tl = kalloc_type(struct trim_list, Z_WAITOK);
1931 }
1932
1933 lck_mtx_lock(&vm_swap_data_lock);
1934
1935 swf = vm_swapfile_for_handle(f_offset);
1936
1937 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1938 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1939 /*
1940 * don't delay the free if the underlying disk doesn't support
1941 * trim, or we're in the midst of reclaiming this swap file since
1942 * we don't want to move segments that are technically free
1943 * but not yet handled by the delayed free mechanism
1944 */
1945 vm_swap_free_now(swf, f_offset);
1946
1947 vm_swap_free_now_count++;
1948 goto done;
1949 }
1950 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1951 tl->tl_length = compressed_swap_chunk_size;
1952
1953 tl->tl_next = swf->swp_delayed_trim_list_head;
1954 swf->swp_delayed_trim_list_head = tl;
1955 swf->swp_delayed_trim_count++;
1956 tl = NULL;
1957
1958 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1959 now = mach_absolute_time();
1960
1961 if (now > dont_trim_until_ts) {
1962 thread_wakeup((event_t) &vm_swapfile_create_needed);
1963 }
1964 }
1965 vm_swap_free_delayed_count++;
1966 }
1967 done:
1968 lck_mtx_unlock(&vm_swap_data_lock);
1969
1970 if (tl != NULL) {
1971 kfree_type(struct trim_list, tl);
1972 }
1973 }
1974
1975
1976 static void
vm_swap_wait_on_trim_handling_in_progress()1977 vm_swap_wait_on_trim_handling_in_progress()
1978 {
1979 while (delayed_trim_handling_in_progress == TRUE) {
1980 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1981 lck_mtx_unlock(&vm_swap_data_lock);
1982
1983 thread_block(THREAD_CONTINUE_NULL);
1984
1985 lck_mtx_lock(&vm_swap_data_lock);
1986 }
1987 }
1988
1989
1990 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1991 vm_swap_handle_delayed_trims(boolean_t force_now)
1992 {
1993 struct swapfile *swf = NULL;
1994
1995 /*
1996 * serialize the race between us and vm_swap_reclaim...
1997 * if vm_swap_reclaim wins it will turn off SWAP_READY
1998 * on the victim it has chosen... we can just skip over
1999 * that file since vm_swap_reclaim will first process
2000 * all of the delayed trims associated with it
2001 */
2002
2003 if (compressor_store_stop_compaction == TRUE) {
2004 return;
2005 }
2006
2007 lck_mtx_lock(&vm_swap_data_lock);
2008
2009 delayed_trim_handling_in_progress = TRUE;
2010
2011 lck_mtx_unlock(&vm_swap_data_lock);
2012
2013 /*
2014 * no need to hold the lock to walk the swf list since
2015 * vm_swap_create (the only place where we add to this list)
2016 * is run on the same thread as this function
2017 * and vm_swap_reclaim doesn't remove items from this list
2018 * instead marking them with SWAP_REUSE for future re-use
2019 */
2020 swf = (struct swapfile*) queue_first(&swf_global_queue);
2021
2022 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2023 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
2024 assert(!(swf->swp_flags & SWAP_RECLAIM));
2025 vm_swap_do_delayed_trim(swf);
2026 }
2027 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2028 }
2029 lck_mtx_lock(&vm_swap_data_lock);
2030
2031 delayed_trim_handling_in_progress = FALSE;
2032 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
2033
2034 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
2035 thread_wakeup((event_t) &vm_swapfile_gc_needed);
2036 }
2037
2038 lck_mtx_unlock(&vm_swap_data_lock);
2039 }
2040
2041 static void
vm_swap_do_delayed_trim(struct swapfile * swf)2042 vm_swap_do_delayed_trim(struct swapfile *swf)
2043 {
2044 struct trim_list *tl, *tl_head;
2045 int error;
2046
2047 if (compressor_store_stop_compaction == TRUE) {
2048 return;
2049 }
2050
2051 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
2052 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
2053 return;
2054 }
2055
2056 lck_mtx_lock(&vm_swap_data_lock);
2057
2058 tl_head = swf->swp_delayed_trim_list_head;
2059 swf->swp_delayed_trim_list_head = NULL;
2060 swf->swp_delayed_trim_count = 0;
2061
2062 lck_mtx_unlock(&vm_swap_data_lock);
2063
2064 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
2065
2066 (void) vnode_put(swf->swp_vp);
2067
2068 while ((tl = tl_head) != NULL) {
2069 unsigned int segidx = 0;
2070 unsigned int byte_for_segidx = 0;
2071 unsigned int offset_within_byte = 0;
2072
2073 lck_mtx_lock(&vm_swap_data_lock);
2074
2075 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
2076
2077 byte_for_segidx = segidx >> 3;
2078 offset_within_byte = segidx % 8;
2079
2080 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
2081 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2082
2083 swf->swp_csegs[segidx] = NULL;
2084
2085 swf->swp_nseginuse--;
2086 vm_swapfile_total_segs_used--;
2087
2088 if (segidx < swf->swp_free_hint) {
2089 swf->swp_free_hint = segidx;
2090 }
2091 }
2092 lck_mtx_unlock(&vm_swap_data_lock);
2093
2094 tl_head = tl->tl_next;
2095
2096 kfree_type(struct trim_list, tl);
2097 }
2098 }
2099
2100
2101 void
vm_swap_flush()2102 vm_swap_flush()
2103 {
2104 return;
2105 }
2106
2107 int vm_swap_reclaim_yielded = 0;
2108
2109 void
vm_swap_reclaim(void)2110 vm_swap_reclaim(void)
2111 {
2112 vm_offset_t addr = 0;
2113 unsigned int segidx = 0;
2114 uint64_t f_offset = 0;
2115 struct swapfile *swf = NULL;
2116 struct swapfile *smallest_swf = NULL;
2117 unsigned int min_nsegs = 0;
2118 unsigned int byte_for_segidx = 0;
2119 unsigned int offset_within_byte = 0;
2120 uint32_t c_size = 0;
2121
2122 c_segment_t c_seg = NULL;
2123
2124 kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
2125 KMA_NOFAIL | KMA_KOBJECT | KMA_DATA, VM_KERN_MEMORY_COMPRESSOR);
2126
2127 lck_mtx_lock(&vm_swap_data_lock);
2128
2129 /*
2130 * if we're running the swapfile list looking for
2131 * candidates with delayed trims, we need to
2132 * wait before making our decision concerning
2133 * the swapfile we want to reclaim
2134 */
2135 vm_swap_wait_on_trim_handling_in_progress();
2136
2137 /*
2138 * from here until we knock down the SWAP_READY bit,
2139 * we need to remain behind the vm_swap_data_lock...
2140 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
2141 * will not consider this swapfile for processing
2142 */
2143 swf = (struct swapfile*) queue_first(&swf_global_queue);
2144 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
2145 smallest_swf = NULL;
2146
2147 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2148 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
2149 smallest_swf = swf;
2150 min_nsegs = swf->swp_nseginuse;
2151 }
2152 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2153 }
2154
2155 if (smallest_swf == NULL) {
2156 goto done;
2157 }
2158
2159 swf = smallest_swf;
2160
2161
2162 swf->swp_flags &= ~SWAP_READY;
2163 swf->swp_flags |= SWAP_RECLAIM;
2164
2165 if (swf->swp_delayed_trim_count) {
2166 lck_mtx_unlock(&vm_swap_data_lock);
2167
2168 vm_swap_do_delayed_trim(swf);
2169
2170 lck_mtx_lock(&vm_swap_data_lock);
2171 }
2172 segidx = 0;
2173
2174 while (segidx < swf->swp_nsegs) {
2175 ReTry_for_cseg:
2176 /*
2177 * Wait for outgoing I/Os.
2178 */
2179 while (swf->swp_io_count) {
2180 swf->swp_flags |= SWAP_WANTED;
2181
2182 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2183 lck_mtx_unlock(&vm_swap_data_lock);
2184
2185 thread_block(THREAD_CONTINUE_NULL);
2186
2187 lck_mtx_lock(&vm_swap_data_lock);
2188 }
2189 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2190 vm_swap_reclaim_yielded++;
2191 break;
2192 }
2193
2194 byte_for_segidx = segidx >> 3;
2195 offset_within_byte = segidx % 8;
2196
2197 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2198 segidx++;
2199 continue;
2200 }
2201
2202 c_seg = swf->swp_csegs[segidx];
2203 assert(c_seg);
2204
2205 lck_mtx_lock_spin_always(&c_seg->c_lock);
2206
2207 if (c_seg->c_busy) {
2208 /*
2209 * a swapped out c_segment in the process of being freed will remain in the
2210 * busy state until after the vm_swap_free is called on it... vm_swap_free
2211 * takes the vm_swap_data_lock, so can't change the swap state until after
2212 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2213 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2214 * at that point, we re-look up the swap state which will now indicate that
2215 * this c_segment no longer exists.
2216 */
2217 c_seg->c_wanted = 1;
2218
2219 assert_wait((event_t) (c_seg), THREAD_UNINT);
2220 lck_mtx_unlock_always(&c_seg->c_lock);
2221
2222 lck_mtx_unlock(&vm_swap_data_lock);
2223
2224 thread_block(THREAD_CONTINUE_NULL);
2225
2226 lck_mtx_lock(&vm_swap_data_lock);
2227
2228 goto ReTry_for_cseg;
2229 }
2230 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2231
2232 f_offset = segidx * compressed_swap_chunk_size;
2233
2234 assert(c_seg == swf->swp_csegs[segidx]);
2235 swf->swp_csegs[segidx] = NULL;
2236 swf->swp_nseginuse--;
2237
2238 vm_swapfile_total_segs_used--;
2239
2240 lck_mtx_unlock(&vm_swap_data_lock);
2241
2242 assert(C_SEG_IS_ONDISK(c_seg));
2243
2244 C_SEG_BUSY(c_seg);
2245 c_seg->c_busy_swapping = 1;
2246 #if !CHECKSUM_THE_SWAP
2247 c_seg_trim_tail(c_seg);
2248 #endif
2249 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2250
2251 assert(c_size <= c_seg_bufsize && c_size);
2252
2253 lck_mtx_unlock_always(&c_seg->c_lock);
2254
2255 if (vnode_getwithref(swf->swp_vp)) {
2256 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2257 vm_swap_get_failures++;
2258 goto swap_io_failed;
2259 } else {
2260 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2261 /*
2262 * reading the data back in failed, so convert c_seg
2263 * to a swapped in c_segment that contains no data
2264 */
2265 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2266 /*
2267 * returns with c_busy_swapping cleared
2268 */
2269 vnode_put(swf->swp_vp);
2270 vm_swap_get_failures++;
2271 goto swap_io_failed;
2272 }
2273 vnode_put(swf->swp_vp);
2274 }
2275
2276 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2277 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2278
2279 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2280 vm_offset_t c_buffer;
2281
2282 /*
2283 * the put failed, so convert c_seg to a fully swapped in c_segment
2284 * with valid data
2285 */
2286 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2287
2288 kernel_memory_populate(c_buffer, c_size,
2289 KMA_NOFAIL | KMA_COMPRESSOR,
2290 VM_KERN_MEMORY_COMPRESSOR);
2291
2292 memcpy((char *)c_buffer, (char *)addr, c_size);
2293
2294 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2295 #if ENCRYPTED_SWAP
2296 vm_swap_decrypt(c_seg);
2297 #endif /* ENCRYPTED_SWAP */
2298 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2299 /*
2300 * returns with c_busy_swapping cleared
2301 */
2302 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2303
2304 goto swap_io_failed;
2305 }
2306 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2307
2308 lck_mtx_lock_spin_always(&c_seg->c_lock);
2309
2310 c_seg->c_swappedin = false;
2311
2312 assert(C_SEG_IS_ONDISK(c_seg));
2313 /*
2314 * The c_seg will now know about the new location on disk.
2315 */
2316 c_seg->c_store.c_swap_handle = f_offset;
2317
2318 assert(c_seg->c_busy_swapping);
2319 c_seg->c_busy_swapping = 0;
2320 swap_io_failed:
2321 assert(c_seg->c_busy);
2322 C_SEG_WAKEUP_DONE(c_seg);
2323
2324 lck_mtx_unlock_always(&c_seg->c_lock);
2325 lck_mtx_lock(&vm_swap_data_lock);
2326 }
2327
2328 if (swf->swp_nseginuse) {
2329 swf->swp_flags &= ~SWAP_RECLAIM;
2330 swf->swp_flags |= SWAP_READY;
2331
2332 goto done;
2333 }
2334 /*
2335 * We don't remove this inactive swf from the queue.
2336 * That way, we can re-use it when needed again and
2337 * preserve the namespace. The delayed_trim processing
2338 * is also dependent on us not removing swfs from the queue.
2339 */
2340 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2341
2342 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2343
2344 lck_mtx_unlock(&vm_swap_data_lock);
2345
2346 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2347
2348 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2349 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2350
2351 lck_mtx_lock(&vm_swap_data_lock);
2352
2353 if (swf->swp_flags & SWAP_PINNED) {
2354 vm_num_pinned_swap_files--;
2355 vm_swappin_avail += swf->swp_size;
2356 }
2357
2358 swf->swp_vp = NULL;
2359 swf->swp_size = 0;
2360 swf->swp_free_hint = 0;
2361 swf->swp_nsegs = 0;
2362 swf->swp_flags = SWAP_REUSE;
2363
2364 vm_num_swap_files--;
2365
2366 done:
2367 thread_wakeup((event_t) &swf->swp_flags);
2368 lck_mtx_unlock(&vm_swap_data_lock);
2369
2370 kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2371 }
2372
2373
2374 uint64_t
vm_swap_get_total_space(void)2375 vm_swap_get_total_space(void)
2376 {
2377 uint64_t total_space = 0;
2378
2379 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2380
2381 return total_space;
2382 }
2383
2384 uint64_t
vm_swap_get_used_space(void)2385 vm_swap_get_used_space(void)
2386 {
2387 uint64_t used_space = 0;
2388
2389 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2390
2391 return used_space;
2392 }
2393
2394 uint64_t
vm_swap_get_free_space(void)2395 vm_swap_get_free_space(void)
2396 {
2397 return vm_swap_get_total_space() - vm_swap_get_used_space();
2398 }
2399
2400 uint64_t
vm_swap_get_max_configured_space(void)2401 vm_swap_get_max_configured_space(void)
2402 {
2403 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2404 return num_swap_files * MAX_SWAP_FILE_SIZE;
2405 }
2406
2407 bool
vm_swap_low_on_space(void)2408 vm_swap_low_on_space(void)
2409 {
2410 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2411 return false;
2412 }
2413
2414 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2415 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2416 return false;
2417 }
2418
2419 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2420 return true;
2421 }
2422 }
2423 return false;
2424 }
2425
2426 bool
vm_swap_out_of_space(void)2427 vm_swap_out_of_space(void)
2428 {
2429 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2430 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2431 /*
2432 * Last swapfile and we have only space for the
2433 * last few swapouts.
2434 */
2435 return true;
2436 }
2437
2438 return false;
2439 }
2440
2441 boolean_t
vm_swap_files_pinned(void)2442 vm_swap_files_pinned(void)
2443 {
2444 boolean_t result;
2445
2446 if (vm_swappin_enabled == FALSE) {
2447 return TRUE;
2448 }
2449
2450 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2451
2452 return result;
2453 }
2454
2455 #if CONFIG_FREEZE
2456 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2457 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2458 {
2459 boolean_t use_device_value = FALSE;
2460 struct swapfile *swf = NULL;
2461
2462 if (vm_num_swap_files) {
2463 lck_mtx_lock(&vm_swap_data_lock);
2464
2465 swf = (struct swapfile*) queue_first(&swf_global_queue);
2466
2467 if (swf) {
2468 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2469 if (swf->swp_flags == SWAP_READY) {
2470 assert(swf->swp_vp);
2471
2472 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2473 use_device_value = TRUE;
2474 }
2475 break;
2476 }
2477 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2478 }
2479 }
2480
2481 lck_mtx_unlock(&vm_swap_data_lock);
2482 } else {
2483 /*
2484 * This block is used for the initial budget value before any swap files
2485 * are created. We create a temp swap file to get the budget.
2486 */
2487
2488 struct vnode *temp_vp = NULL;
2489
2490 vm_swapfile_open(swapfilename, &temp_vp);
2491
2492 if (temp_vp) {
2493 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2494 use_device_value = TRUE;
2495 }
2496
2497 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2498 temp_vp = NULL;
2499 } else {
2500 *freeze_daily_budget = 0;
2501 }
2502 }
2503
2504 return use_device_value;
2505 }
2506 #endif /* CONFIG_FREEZE */
2507
2508 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2509 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2510 {
2511 lck_mtx_lock(&vm_swap_data_lock);
2512
2513 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2514 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2515
2516 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2517 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2518
2519 lck_mtx_unlock(&vm_swap_data_lock);
2520 }
2521