1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42 * launchd explicitly turns ON swap later during boot on macOS devices.
43 */
44 boolean_t compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t compressor_store_stop_compaction = FALSE;
47 #endif
48
49 boolean_t vm_swapfile_create_needed = FALSE;
50 boolean_t vm_swapfile_gc_needed = FALSE;
51
52 int vm_swapper_throttle = -1;
53 uint64_t vm_swapout_thread_id;
54
55 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t vm_swap_get_failures = 0; /* Fatal */
57 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int vm_num_swap_files_config = 0;
59 int vm_num_swap_files = 0;
60 int vm_num_pinned_swap_files = 0;
61 uint64_t vm_swap_volume_capacity = 0;
62 int vm_swapout_thread_processed_segments = 0;
63 int vm_swapout_thread_awakened = 0;
64 bool vm_swapout_thread_running = FALSE;
65 _Atomic bool vm_swapout_wake_pending = false;
66 int vm_swapfile_create_thread_awakened = 0;
67 int vm_swapfile_create_thread_running = 0;
68 int vm_swapfile_gc_thread_awakened = 0;
69 int vm_swapfile_gc_thread_running = 0;
70
71 int64_t vm_swappin_avail = 0;
72 boolean_t vm_swappin_enabled = FALSE;
73 unsigned int vm_swapfile_total_segs_alloced = 0;
74 unsigned int vm_swapfile_total_segs_alloced_max = 0;
75 unsigned int vm_swapfile_total_segs_used = 0;
76 unsigned int vm_swapfile_total_segs_used_max = 0;
77
78 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
79
80 extern vm_map_t compressor_map;
81 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
82
83 #define SWAP_READY 0x1 /* Swap file is ready to be used */
84 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
85 #define SWAP_WANTED 0x4 /* Swap file has waiters */
86 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
87 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
88
89
90 struct swapfile {
91 queue_head_t swp_queue; /* list of swap files */
92 char *swp_path; /* saved pathname of swap file */
93 struct vnode *swp_vp; /* backing vnode */
94 uint64_t swp_size; /* size of this swap file */
95 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
96 unsigned int swp_pathlen; /* length of pathname */
97 unsigned int swp_nsegs; /* #segments we can use */
98 unsigned int swp_nseginuse; /* #segments in use */
99 unsigned int swp_index; /* index of this swap file */
100 unsigned int swp_flags; /* state of swap file */
101 unsigned int swp_free_hint; /* offset of 1st free chunk */
102 unsigned int swp_io_count; /* count of outstanding I/Os */
103 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
104
105 struct trim_list *swp_delayed_trim_list_head;
106 unsigned int swp_delayed_trim_count;
107 };
108
109 queue_head_t swf_global_queue;
110 boolean_t swp_trim_supported = FALSE;
111
112 extern clock_sec_t dont_trim_until_ts;
113 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
114 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
115 int vm_swapfile_can_be_created = FALSE;
116 boolean_t delayed_trim_handling_in_progress = FALSE;
117
118 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
119
120 static void vm_swapout_thread_throttle_adjust(void);
121 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
122 void vm_swapout_thread(void);
123 static void vm_swapfile_create_thread(void);
124 static void vm_swapfile_gc_thread(void);
125 static void vm_swap_defragment(void);
126 static void vm_swap_handle_delayed_trims(boolean_t);
127 static void vm_swap_do_delayed_trim(struct swapfile *);
128 static void vm_swap_wait_on_trim_handling_in_progress(void);
129 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
130
131 extern int vnode_getwithref(struct vnode* vp);
132
133 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
134
135 #if !XNU_TARGET_OS_OSX
136
137 /*
138 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
139 * number of swapfiles allowed. That increases wired memory overhead.
140 * So we want to keep the max swapfiles same on both DEV/RELEASE so
141 * that the memory overhead is similar for performance comparisons.
142 */
143 #define VM_MAX_SWAP_FILE_NUM 5
144 #if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
145 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
146 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
147 #else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
148 /*
149 * We reserve compressor pool VA at boot for the max # of swap files. If someone
150 * has enabled app swap but we're not an arm large memory device we can't hog
151 * all of the VA so we only go up to 4GB.
152 */
153 #define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
154 #define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
155 #endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
156 #define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))
157
158 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
159
160 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
161 #define VM_SWAP_SHOULD_PIN(_size) FALSE
162 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
163 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
164 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
165
166 #else /* !XNU_TARGET_OS_OSX */
167
168 #define VM_MAX_SWAP_FILE_NUM 100
169 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
170
171 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
172 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
173 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
174 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
175 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
176
177 #endif /* !XNU_TARGET_OS_OSX */
178
179 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
180 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
181 #define VM_SWAPFILE_DELAYED_CREATE 15
182
183 #define VM_SWAP_BUSY() (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
184
185
186 #if CHECKSUM_THE_SWAP
187 extern unsigned int hash_string(char *cp, int len);
188 #endif
189
190 #if RECORD_THE_COMPRESSED_DATA
191 boolean_t c_compressed_record_init_done = FALSE;
192 int c_compressed_record_write_error = 0;
193 struct vnode *c_compressed_record_vp = NULL;
194 uint64_t c_compressed_record_file_offset = 0;
195 void c_compressed_record_init(void);
196 void c_compressed_record_write(char *, int);
197 #endif
198
199 extern void vm_pageout_io_throttle(void);
200
201 static struct swapfile *vm_swapfile_for_handle(uint64_t);
202
203 /*
204 * Called with the vm_swap_data_lock held.
205 */
206
207 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)208 vm_swapfile_for_handle(uint64_t f_offset)
209 {
210 uint64_t file_offset = 0;
211 unsigned int swapfile_index = 0;
212 struct swapfile* swf = NULL;
213
214 file_offset = (f_offset & SWAP_SLOT_MASK);
215 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
216
217 swf = (struct swapfile*) queue_first(&swf_global_queue);
218
219 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
220 if (swapfile_index == swf->swp_index) {
221 break;
222 }
223
224 swf = (struct swapfile*) queue_next(&swf->swp_queue);
225 }
226
227 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
228 swf = NULL;
229 }
230
231 return swf;
232 }
233
234 #if ENCRYPTED_SWAP
235
236 #include <libkern/crypto/aesxts.h>
237
238 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
239
240 boolean_t swap_crypt_initialized;
241 void swap_crypt_initialize(void);
242
243 symmetric_xts xts_modectx;
244 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
245 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
246
247 #if DEVELOPMENT || DEBUG
248 boolean_t swap_crypt_xts_tested = FALSE;
249 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
250 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
251 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
252 #endif /* DEVELOPMENT || DEBUG */
253
254 unsigned long vm_page_encrypt_counter;
255 unsigned long vm_page_decrypt_counter;
256
257
258 void
swap_crypt_initialize(void)259 swap_crypt_initialize(void)
260 {
261 uint8_t *enckey1, *enckey2;
262 int keylen1, keylen2;
263 int error;
264
265 assert(swap_crypt_initialized == FALSE);
266
267 keylen1 = sizeof(swap_crypt_key1);
268 enckey1 = (uint8_t *)&swap_crypt_key1;
269 keylen2 = sizeof(swap_crypt_key2);
270 enckey2 = (uint8_t *)&swap_crypt_key2;
271
272 error = cc_rand_generate((void *)enckey1, keylen1);
273 assert(!error);
274
275 error = cc_rand_generate((void *)enckey2, keylen2);
276 assert(!error);
277
278 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
279 assert(!error);
280
281 swap_crypt_initialized = TRUE;
282
283 #if DEVELOPMENT || DEBUG
284 uint8_t *encptr;
285 uint8_t *decptr;
286 uint8_t *refptr;
287 uint8_t *iv;
288 uint64_t ivnum[2];
289 int size = 0;
290 int i = 0;
291 int rc = 0;
292
293 assert(swap_crypt_xts_tested == FALSE);
294
295 /*
296 * Validate the encryption algorithms.
297 *
298 * First initialize the test data.
299 */
300 for (i = 0; i < 4096; i++) {
301 swap_crypt_test_page_ref[i] = (char) i;
302 }
303 ivnum[0] = (uint64_t)0xaa;
304 ivnum[1] = 0;
305 iv = (uint8_t *)ivnum;
306
307 refptr = (uint8_t *)swap_crypt_test_page_ref;
308 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
309 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
310 size = 4096;
311
312 /* encrypt */
313 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
314 assert(!rc);
315
316 /* compare result with original - should NOT match */
317 for (i = 0; i < 4096; i++) {
318 if (swap_crypt_test_page_encrypt[i] !=
319 swap_crypt_test_page_ref[i]) {
320 break;
321 }
322 }
323 assert(i != 4096);
324
325 /* decrypt */
326 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
327 assert(!rc);
328
329 /* compare result with original */
330 for (i = 0; i < 4096; i++) {
331 if (swap_crypt_test_page_decrypt[i] !=
332 swap_crypt_test_page_ref[i]) {
333 panic("encryption test failed");
334 }
335 }
336 /* encrypt in place */
337 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
338 assert(!rc);
339
340 /* decrypt in place */
341 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
342 assert(!rc);
343
344 for (i = 0; i < 4096; i++) {
345 if (swap_crypt_test_page_decrypt[i] !=
346 swap_crypt_test_page_ref[i]) {
347 panic("in place encryption test failed");
348 }
349 }
350 swap_crypt_xts_tested = TRUE;
351 #endif /* DEVELOPMENT || DEBUG */
352 }
353
354
355 void
vm_swap_encrypt(c_segment_t c_seg)356 vm_swap_encrypt(c_segment_t c_seg)
357 {
358 uint8_t *ptr;
359 uint8_t *iv;
360 uint64_t ivnum[2];
361 int size = 0;
362 int rc = 0;
363
364 if (swap_crypt_initialized == FALSE) {
365 swap_crypt_initialize();
366 }
367
368 #if DEVELOPMENT || DEBUG
369 C_SEG_MAKE_WRITEABLE(c_seg);
370 #endif
371 ptr = (uint8_t *)c_seg->c_store.c_buffer;
372 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
373
374 ivnum[0] = (uint64_t)c_seg;
375 ivnum[1] = 0;
376 iv = (uint8_t *)ivnum;
377
378 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
379 assert(!rc);
380
381 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
382
383 #if DEVELOPMENT || DEBUG
384 C_SEG_WRITE_PROTECT(c_seg);
385 #endif
386 }
387
388 void
vm_swap_decrypt(c_segment_t c_seg)389 vm_swap_decrypt(c_segment_t c_seg)
390 {
391 uint8_t *ptr;
392 uint8_t *iv;
393 uint64_t ivnum[2];
394 int size = 0;
395 int rc = 0;
396
397 assert(swap_crypt_initialized);
398
399 #if DEVELOPMENT || DEBUG
400 C_SEG_MAKE_WRITEABLE(c_seg);
401 #endif
402 ptr = (uint8_t *)c_seg->c_store.c_buffer;
403 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
404
405 ivnum[0] = (uint64_t)c_seg;
406 ivnum[1] = 0;
407 iv = (uint8_t *)ivnum;
408
409 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
410 assert(!rc);
411
412 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
413
414 #if DEVELOPMENT || DEBUG
415 C_SEG_WRITE_PROTECT(c_seg);
416 #endif
417 }
418 #endif /* ENCRYPTED_SWAP */
419
420 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
421 extern bool memorystatus_swap_all_apps;
422
423 void
vm_compressor_swap_init_swap_file_limit(void)424 vm_compressor_swap_init_swap_file_limit(void)
425 {
426 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
427 #if CONFIG_JETSAM
428 if (memorystatus_swap_all_apps) {
429 if (vm_swap_volume_capacity == 0) {
430 /*
431 * Early in boot we don't know the swap volume capacity.
432 * That's fine. Reserve space for the maximum config
433 * and we'll lower this later in boot once we have the capacity.
434 */
435 vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
436 } else {
437 static uint64_t kFixedPointFactor = 100;
438 /*
439 * Scale the max number of swap files linearly.
440 * But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
441 */
442 vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
443 * VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
444 vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
445 vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
446 }
447 }
448 #endif /* CONFIG_JETSAM */
449 #if DEVELOPMENT || DEBUG
450 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
451 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
452 if (parsed_vm_max_num_swap_files > 0) {
453 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
454 } else {
455 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
456 }
457 }
458 #endif
459 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
460 }
461 void
vm_compressor_swap_init(void)462 vm_compressor_swap_init(void)
463 {
464 thread_t thread = NULL;
465
466 queue_init(&swf_global_queue);
467
468 #if !XNU_TARGET_OS_OSX
469 /*
470 * dummy value until the swap file gets created
471 * when we drive the first c_segment_t to the
472 * swapout queue... at that time we will
473 * know the true size we have to work with
474 */
475 c_overage_swapped_limit = 16;
476 #endif /* !XNU_TARGET_OS_OSX */
477
478 compressed_swap_chunk_size = c_seg_bufsize;
479 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
480 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
481 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
482
483 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
484 BASEPRI_VM, &thread) != KERN_SUCCESS) {
485 panic("vm_swapout_thread: create failed");
486 }
487 thread_set_thread_name(thread, "VM_swapout");
488 vm_swapout_thread_id = thread->thread_id;
489 thread_deallocate(thread);
490
491 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
492 BASEPRI_VM, &thread) != KERN_SUCCESS) {
493 panic("vm_swapfile_create_thread: create failed");
494 }
495 thread_set_thread_name(thread, "VM_swapfile_create");
496 thread_deallocate(thread);
497
498 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
499 BASEPRI_VM, &thread) != KERN_SUCCESS) {
500 panic("vm_swapfile_gc_thread: create failed");
501 }
502 thread_set_thread_name(thread, "VM_swapfile_gc");
503 /*
504 * Swapfile garbage collection will need to allocate memory
505 * to complete its swap reclaim and in-memory compaction.
506 * So allow it to dip into the reserved VM page pool.
507 */
508 thread_lock(thread);
509 thread->options |= TH_OPT_VMPRIV;
510 thread_unlock(thread);
511 thread_deallocate(thread);
512 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
513 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
514 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
515 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
516
517 printf("VM Swap Subsystem is ON\n");
518 }
519
520
521 #if RECORD_THE_COMPRESSED_DATA
522
523 void
c_compressed_record_init()524 c_compressed_record_init()
525 {
526 if (c_compressed_record_init_done == FALSE) {
527 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
528 c_compressed_record_init_done = TRUE;
529 }
530 }
531
532 void
c_compressed_record_write(char * buf,int size)533 c_compressed_record_write(char *buf, int size)
534 {
535 if (c_compressed_record_write_error == 0) {
536 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
537 c_compressed_record_file_offset += size;
538 }
539 }
540 #endif
541
542
543 int compaction_swapper_inited = 0;
544
545 void
vm_compaction_swapper_do_init(void)546 vm_compaction_swapper_do_init(void)
547 {
548 struct vnode *vp;
549 char *pathname;
550 int namelen;
551
552 if (compaction_swapper_inited) {
553 return;
554 }
555
556 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
557 compaction_swapper_inited = 1;
558 return;
559 }
560 lck_mtx_lock(&vm_swap_data_lock);
561
562 if (!compaction_swapper_inited) {
563 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
564 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
565 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
566
567 vm_swapfile_open(pathname, &vp);
568
569 if (vp) {
570 if (vnode_pager_isSSD(vp) == FALSE) {
571 /*
572 * swap files live on an HDD, so let's make sure to start swapping
573 * much earlier since we're not worried about SSD write-wear and
574 * we have so little write bandwidth to work with
575 * these values were derived expermentially by running the performance
576 * teams stock test for evaluating HDD performance against various
577 * combinations and looking and comparing overall results.
578 * Note that the > relationship between these 4 values must be maintained
579 */
580 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
581 vm_compressor_minorcompact_threshold_divisor = 15;
582 }
583 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
584 vm_compressor_majorcompact_threshold_divisor = 18;
585 }
586 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
587 vm_compressor_unthrottle_threshold_divisor = 24;
588 }
589 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
590 vm_compressor_catchup_threshold_divisor = 30;
591 }
592 }
593 #if XNU_TARGET_OS_OSX
594 vnode_setswapmount(vp);
595 vm_swappin_avail = vnode_getswappin_avail(vp);
596
597 if (vm_swappin_avail) {
598 vm_swappin_enabled = TRUE;
599 }
600 #endif /* XNU_TARGET_OS_OSX */
601 vm_swapfile_close((uint64_t)pathname, vp);
602 }
603 kfree_data(pathname, namelen);
604
605 compaction_swapper_inited = 1;
606 }
607 lck_mtx_unlock(&vm_swap_data_lock);
608 }
609
610
611 void
vm_swap_consider_defragmenting(int flags)612 vm_swap_consider_defragmenting(int flags)
613 {
614 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
615 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
616
617 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
618 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
619 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
620 lck_mtx_lock(&vm_swap_data_lock);
621
622 if (force_defrag) {
623 vm_swap_force_defrag = TRUE;
624 }
625
626 if (force_reclaim) {
627 vm_swap_force_reclaim = TRUE;
628 }
629
630 if (!vm_swapfile_gc_thread_running) {
631 thread_wakeup((event_t) &vm_swapfile_gc_needed);
632 }
633
634 lck_mtx_unlock(&vm_swap_data_lock);
635 }
636 }
637 }
638
639
640 int vm_swap_defragment_yielded = 0;
641 int vm_swap_defragment_swapin = 0;
642 int vm_swap_defragment_free = 0;
643 int vm_swap_defragment_busy = 0;
644
645 #if CONFIG_FREEZE
646 extern int32_t c_segment_pages_compressed_incore;
647 extern int32_t c_segment_pages_compressed_incore_late_swapout;
648 extern uint32_t c_segment_pages_compressed_nearing_limit;
649 extern uint32_t c_segment_count;
650 extern uint32_t c_segments_nearing_limit;
651
652 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
653
654 extern bool freezer_incore_cseg_acct;
655 #endif /* CONFIG_FREEZE */
656
657 static void
vm_swap_defragment()658 vm_swap_defragment()
659 {
660 c_segment_t c_seg;
661
662 /*
663 * have to grab the master lock w/o holding
664 * any locks in spin mode
665 */
666 PAGE_REPLACEMENT_DISALLOWED(TRUE);
667
668 lck_mtx_lock_spin_always(c_list_lock);
669
670 while (!queue_empty(&c_swappedout_sparse_list_head)) {
671 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
672 vm_swap_defragment_yielded++;
673 break;
674 }
675 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
676
677 lck_mtx_lock_spin_always(&c_seg->c_lock);
678
679 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
680
681 if (c_seg->c_busy) {
682 lck_mtx_unlock_always(c_list_lock);
683
684 PAGE_REPLACEMENT_DISALLOWED(FALSE);
685 /*
686 * c_seg_wait_on_busy consumes c_seg->c_lock
687 */
688 c_seg_wait_on_busy(c_seg);
689
690 PAGE_REPLACEMENT_DISALLOWED(TRUE);
691
692 lck_mtx_lock_spin_always(c_list_lock);
693
694 vm_swap_defragment_busy++;
695 continue;
696 }
697 if (c_seg->c_bytes_used == 0) {
698 /*
699 * c_seg_free_locked consumes the c_list_lock
700 * and c_seg->c_lock
701 */
702 C_SEG_BUSY(c_seg);
703 c_seg_free_locked(c_seg);
704
705 vm_swap_defragment_free++;
706 } else {
707 lck_mtx_unlock_always(c_list_lock);
708
709 #if CONFIG_FREEZE
710 if (freezer_incore_cseg_acct) {
711 /*
712 * TODO(jason): These two are tricky because they're pre-emptive jetsams.
713 * The system is not unhealthy, but we know that it's about to become unhealthy once
714 * we do this swapin.
715 * So we're waking up the memorystatus thread to make space
716 * (hopefully) before this segment comes in.
717 *
718 * I think the compressor_backing_store needs to keep track of
719 * two new globals that will track the number of segments
720 * being swapped in due to defrag and the number of slots used
721 * in those segments.
722 * Then the health check below can be called from the memorystatus
723 * thread.
724 */
725 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
726 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
727 }
728
729 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
730 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
731 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
732 }
733 }
734 #endif /* CONFIG_FREEZE */
735 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
736 lck_mtx_unlock_always(&c_seg->c_lock);
737 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
738 }
739
740 vm_swap_defragment_swapin++;
741 }
742 PAGE_REPLACEMENT_DISALLOWED(FALSE);
743
744 vm_pageout_io_throttle();
745
746 /*
747 * because write waiters have privilege over readers,
748 * dropping and immediately retaking the master lock will
749 * still allow any thread waiting to acquire the
750 * master lock exclusively an opportunity to take it
751 */
752 PAGE_REPLACEMENT_DISALLOWED(TRUE);
753
754 lck_mtx_lock_spin_always(c_list_lock);
755 }
756 lck_mtx_unlock_always(c_list_lock);
757
758 PAGE_REPLACEMENT_DISALLOWED(FALSE);
759 }
760
761
762 bool vm_swapfile_create_thread_inited = false;
763 static void
vm_swapfile_create_thread(void)764 vm_swapfile_create_thread(void)
765 {
766 clock_sec_t sec;
767 clock_nsec_t nsec;
768
769 if (!vm_swapfile_create_thread_inited) {
770 #if CONFIG_THREAD_GROUPS
771 thread_group_vm_add();
772 #endif /* CONFIG_THREAD_GROUPS */
773 current_thread()->options |= TH_OPT_VMPRIV;
774 vm_swapfile_create_thread_inited = true;
775 }
776
777 vm_swapfile_create_thread_awakened++;
778 vm_swapfile_create_thread_running = 1;
779
780 while (TRUE) {
781 /*
782 * walk through the list of swap files
783 * and do the delayed frees/trims for
784 * any swap file whose count of delayed
785 * frees is above the batch limit
786 */
787 vm_swap_handle_delayed_trims(FALSE);
788
789 lck_mtx_lock(&vm_swap_data_lock);
790
791 if (hibernate_in_progress_with_pinned_swap == TRUE) {
792 break;
793 }
794
795 if (compressor_store_stop_compaction == TRUE) {
796 break;
797 }
798
799 clock_get_system_nanotime(&sec, &nsec);
800
801 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
802 break;
803 }
804
805 lck_mtx_unlock(&vm_swap_data_lock);
806
807 if (vm_swap_create_file() == FALSE) {
808 vm_swapfile_last_failed_to_create_ts = sec;
809 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
810 } else {
811 vm_swapfile_last_successful_create_ts = sec;
812 }
813 }
814 vm_swapfile_create_thread_running = 0;
815
816 if (hibernate_in_progress_with_pinned_swap == TRUE) {
817 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
818 }
819
820 if (compressor_store_stop_compaction == TRUE) {
821 thread_wakeup((event_t)&compressor_store_stop_compaction);
822 }
823
824 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
825
826 lck_mtx_unlock(&vm_swap_data_lock);
827
828 thread_block((thread_continue_t)vm_swapfile_create_thread);
829
830 /* NOTREACHED */
831 }
832
833
834 #if HIBERNATION
835
836 kern_return_t
hibernate_pin_swap(boolean_t start)837 hibernate_pin_swap(boolean_t start)
838 {
839 vm_compaction_swapper_do_init();
840
841 if (start == FALSE) {
842 lck_mtx_lock(&vm_swap_data_lock);
843 hibernate_in_progress_with_pinned_swap = FALSE;
844 lck_mtx_unlock(&vm_swap_data_lock);
845
846 return KERN_SUCCESS;
847 }
848 if (vm_swappin_enabled == FALSE) {
849 return KERN_SUCCESS;
850 }
851
852 lck_mtx_lock(&vm_swap_data_lock);
853
854 hibernate_in_progress_with_pinned_swap = TRUE;
855
856 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
857 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
858
859 lck_mtx_unlock(&vm_swap_data_lock);
860
861 thread_block(THREAD_CONTINUE_NULL);
862
863 lck_mtx_lock(&vm_swap_data_lock);
864 }
865 if (vm_num_swap_files > vm_num_pinned_swap_files) {
866 hibernate_in_progress_with_pinned_swap = FALSE;
867 lck_mtx_unlock(&vm_swap_data_lock);
868
869 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
870 vm_num_swap_files, vm_num_pinned_swap_files);
871 return KERN_FAILURE;
872 }
873 lck_mtx_unlock(&vm_swap_data_lock);
874
875 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
876 if (vm_swap_create_file() == FALSE) {
877 break;
878 }
879 }
880 return KERN_SUCCESS;
881 }
882 #endif
883 bool vm_swapfile_gc_thread_inited = false;
884 static void
vm_swapfile_gc_thread(void)885 vm_swapfile_gc_thread(void)
886 {
887 boolean_t need_defragment;
888 boolean_t need_reclaim;
889
890 if (!vm_swapfile_gc_thread_inited) {
891 #if CONFIG_THREAD_GROUPS
892 thread_group_vm_add();
893 #endif /* CONFIG_THREAD_GROUPS */
894 vm_swapfile_gc_thread_inited = true;
895 }
896
897 vm_swapfile_gc_thread_awakened++;
898 vm_swapfile_gc_thread_running = 1;
899
900 while (TRUE) {
901 lck_mtx_lock(&vm_swap_data_lock);
902
903 if (hibernate_in_progress_with_pinned_swap == TRUE) {
904 break;
905 }
906
907 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
908 break;
909 }
910
911 need_defragment = FALSE;
912 need_reclaim = FALSE;
913
914 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
915 need_defragment = TRUE;
916 }
917
918 if (VM_SWAP_SHOULD_RECLAIM()) {
919 need_defragment = TRUE;
920 need_reclaim = TRUE;
921 }
922 if (need_defragment == FALSE && need_reclaim == FALSE) {
923 break;
924 }
925
926 vm_swap_force_defrag = FALSE;
927 vm_swap_force_reclaim = FALSE;
928
929 lck_mtx_unlock(&vm_swap_data_lock);
930
931 if (need_defragment == TRUE) {
932 vm_swap_defragment();
933 }
934 if (need_reclaim == TRUE) {
935 vm_swap_reclaim();
936 }
937 }
938 vm_swapfile_gc_thread_running = 0;
939
940 if (hibernate_in_progress_with_pinned_swap == TRUE) {
941 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
942 }
943
944 if (compressor_store_stop_compaction == TRUE) {
945 thread_wakeup((event_t)&compressor_store_stop_compaction);
946 }
947
948 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
949
950 lck_mtx_unlock(&vm_swap_data_lock);
951
952 thread_block((thread_continue_t)vm_swapfile_gc_thread);
953
954 /* NOTREACHED */
955 }
956
957
958
959 #define VM_SWAPOUT_LIMIT_T2P 4
960 #define VM_SWAPOUT_LIMIT_T1P 4
961 #define VM_SWAPOUT_LIMIT_T0P 6
962 #define VM_SWAPOUT_LIMIT_T0 8
963 #define VM_SWAPOUT_LIMIT_MAX 8
964
965 #define VM_SWAPOUT_START 0
966 #define VM_SWAPOUT_T2_PASSIVE 1
967 #define VM_SWAPOUT_T1_PASSIVE 2
968 #define VM_SWAPOUT_T0_PASSIVE 3
969 #define VM_SWAPOUT_T0 4
970
971 int vm_swapout_state = VM_SWAPOUT_START;
972 int vm_swapout_limit = 1;
973
974 int vm_swapper_entered_T0 = 0;
975 int vm_swapper_entered_T0P = 0;
976 int vm_swapper_entered_T1P = 0;
977 int vm_swapper_entered_T2P = 0;
978
979
980 static void
vm_swapout_thread_throttle_adjust(void)981 vm_swapout_thread_throttle_adjust(void)
982 {
983 switch (vm_swapout_state) {
984 case VM_SWAPOUT_START:
985
986 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
987 vm_swapper_entered_T2P++;
988
989 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
990 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
991 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
992 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
993 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
994 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
995
996 break;
997
998 case VM_SWAPOUT_T2_PASSIVE:
999
1000 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1001 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1002 vm_swapper_entered_T0P++;
1003
1004 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1005 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1006 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1007 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1008 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1009 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1010
1011 break;
1012 }
1013 if (swapout_target_age || hibernate_flushing == TRUE) {
1014 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
1015 vm_swapper_entered_T1P++;
1016
1017 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1018 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1019 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1020 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1021 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
1022 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
1023 }
1024 break;
1025
1026 case VM_SWAPOUT_T1_PASSIVE:
1027
1028 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1029 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1030 vm_swapper_entered_T0P++;
1031
1032 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1033 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1034 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1035 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1036 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1037 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1038
1039 break;
1040 }
1041 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
1042 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1043 vm_swapper_entered_T2P++;
1044
1045 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1046 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1047 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1048 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1049 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1050 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1051 }
1052 break;
1053
1054 case VM_SWAPOUT_T0_PASSIVE:
1055
1056 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1057 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1058 vm_swapper_entered_T2P++;
1059
1060 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1061 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1062 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1063 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1064 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1065 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1066
1067 break;
1068 }
1069 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1070 vm_swapper_entered_T0++;
1071
1072 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1073 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1074 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1075 vm_swapout_state = VM_SWAPOUT_T0;
1076 }
1077 break;
1078
1079 case VM_SWAPOUT_T0:
1080
1081 if (SWAPPER_HAS_CAUGHTUP()) {
1082 vm_swapper_entered_T0P++;
1083
1084 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1085 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1086 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1087 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1088 }
1089 break;
1090 }
1091 }
1092
1093 int vm_swapout_found_empty = 0;
1094
1095 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1096
1097 int vm_swapout_soc_busy = 0;
1098 int vm_swapout_soc_done = 0;
1099
1100
1101 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1102 vm_swapout_find_free_soc(void)
1103 {
1104 int i;
1105
1106 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1107 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1108 return &vm_swapout_ctx[i];
1109 }
1110 }
1111 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1112
1113 return NULL;
1114 }
1115
1116 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1117 vm_swapout_find_done_soc(void)
1118 {
1119 int i;
1120
1121 if (vm_swapout_soc_done) {
1122 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1123 if (vm_swapout_ctx[i].swp_io_done) {
1124 return &vm_swapout_ctx[i];
1125 }
1126 }
1127 }
1128 return NULL;
1129 }
1130
1131 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1132 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1133 {
1134 kern_return_t kr;
1135
1136 if (soc->swp_io_error) {
1137 kr = KERN_FAILURE;
1138 } else {
1139 kr = KERN_SUCCESS;
1140 }
1141
1142 lck_mtx_unlock_always(c_list_lock);
1143
1144 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1145 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1146
1147 lck_mtx_lock_spin_always(c_list_lock);
1148
1149 soc->swp_io_done = 0;
1150 soc->swp_io_busy = 0;
1151
1152 vm_swapout_soc_busy--;
1153 vm_swapout_soc_done--;
1154 }
1155
1156 bool vm_swapout_thread_inited = false;
1157 extern uint32_t c_donate_swapout_count;
1158 #if CONFIG_JETSAM
1159 bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
1160 /*
1161 * swapout_sleep_threshold sets the percentage of the swapout threshold at which
1162 * the swap thread will stop processing the swapout queue.
1163 * By default this is 90 which means we will swap until the
1164 * swapout queue size is at 90% of the threshold to wake the swap thread.
1165 * By definition the queue length must be >= 100% of the threshold when the.
1166 * swap thread is woken up. On development builds this can be adjusted with
1167 * the vm.swapout_sleep_threshold sysctl.
1168 */
1169 uint32_t swapout_sleep_threshold = 90;
1170 #endif /* CONFIG_JETSAM */
1171 static bool
should_process_swapout_queue(const queue_head_t * swapout_list_head)1172 should_process_swapout_queue(const queue_head_t *swapout_list_head)
1173 {
1174 bool process_queue = !queue_empty(swapout_list_head) &&
1175 vm_swapout_soc_busy < vm_swapout_limit &&
1176 !compressor_store_stop_compaction;
1177 #if CONFIG_JETSAM
1178 if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
1179 process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
1180 }
1181 #endif /* CONFIG_JETSAM */
1182 return process_queue;
1183 }
1184
1185 void
vm_swapout_thread(void)1186 vm_swapout_thread(void)
1187 {
1188 uint32_t size = 0;
1189 c_segment_t c_seg = NULL;
1190 kern_return_t kr = KERN_SUCCESS;
1191 struct swapout_io_completion *soc;
1192 queue_head_t *swapout_list_head;
1193 bool queues_empty = false;
1194
1195 if (!vm_swapout_thread_inited) {
1196 #if CONFIG_THREAD_GROUPS
1197 thread_group_vm_add();
1198 #endif /* CONFIG_THREAD_GROUPS */
1199 current_thread()->options |= TH_OPT_VMPRIV;
1200 vm_swapout_thread_inited = true;
1201 }
1202
1203 vm_swapout_thread_awakened++;
1204
1205 lck_mtx_lock_spin_always(c_list_lock);
1206
1207 swapout_list_head = &c_early_swapout_list_head;
1208 vm_swapout_thread_running = TRUE;
1209 os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
1210 again:
1211 while (should_process_swapout_queue(swapout_list_head)) {
1212 c_seg = (c_segment_t)queue_first(swapout_list_head);
1213
1214 lck_mtx_lock_spin_always(&c_seg->c_lock);
1215
1216 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1217
1218 if (c_seg->c_busy) {
1219 lck_mtx_unlock_always(c_list_lock);
1220
1221 c_seg_wait_on_busy(c_seg);
1222
1223 lck_mtx_lock_spin_always(c_list_lock);
1224
1225 continue;
1226 }
1227 vm_swapout_thread_processed_segments++;
1228
1229 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1230
1231 if (size == 0) {
1232 assert(c_seg->c_bytes_used == 0);
1233
1234 /*
1235 * c_seg_free_locked will drop the c_list_lock and
1236 * the c_seg->c_lock.
1237 */
1238 C_SEG_BUSY(c_seg);
1239 c_seg_free_locked(c_seg);
1240 c_seg = NULL;
1241
1242 vm_swapout_found_empty++;
1243 goto c_seg_is_empty;
1244 }
1245 C_SEG_BUSY(c_seg);
1246 c_seg->c_busy_swapping = 1;
1247
1248 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1249
1250 lck_mtx_unlock_always(c_list_lock);
1251 lck_mtx_unlock_always(&c_seg->c_lock);
1252
1253 #if CHECKSUM_THE_SWAP
1254 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1255 c_seg->cseg_swap_size = size;
1256 #endif /* CHECKSUM_THE_SWAP */
1257
1258 #if ENCRYPTED_SWAP
1259 vm_swap_encrypt(c_seg);
1260 #endif /* ENCRYPTED_SWAP */
1261
1262 soc = vm_swapout_find_free_soc();
1263 assert(soc);
1264
1265 soc->swp_upl_ctx.io_context = (void *)soc;
1266 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1267 soc->swp_upl_ctx.io_error = 0;
1268
1269 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1270
1271 if (kr != KERN_SUCCESS) {
1272 if (soc->swp_io_done) {
1273 lck_mtx_lock_spin_always(c_list_lock);
1274
1275 soc->swp_io_done = 0;
1276 vm_swapout_soc_done--;
1277
1278 lck_mtx_unlock_always(c_list_lock);
1279 }
1280 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1281 } else {
1282 soc->swp_io_busy = 1;
1283 vm_swapout_soc_busy++;
1284 }
1285
1286 c_seg_is_empty:
1287 if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
1288 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1289 }
1290
1291 lck_mtx_lock_spin_always(c_list_lock);
1292
1293 while ((soc = vm_swapout_find_done_soc())) {
1294 vm_swapout_complete_soc(soc);
1295 }
1296 lck_mtx_unlock_always(c_list_lock);
1297
1298 vm_swapout_thread_throttle_adjust();
1299
1300 lck_mtx_lock_spin_always(c_list_lock);
1301 }
1302 while ((soc = vm_swapout_find_done_soc())) {
1303 vm_swapout_complete_soc(soc);
1304 }
1305 lck_mtx_unlock_always(c_list_lock);
1306
1307 vm_pageout_io_throttle();
1308
1309 lck_mtx_lock_spin_always(c_list_lock);
1310
1311 /*
1312 * Recheck if we have some c_segs to wakeup
1313 * post throttle. And, check to see if we
1314 * have any more swapouts needed.
1315 */
1316 if (vm_swapout_soc_done) {
1317 goto again;
1318 }
1319
1320 #if XNU_TARGET_OS_OSX
1321 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1322 #else /* XNU_TARGET_OS_OSX */
1323 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1324 #endif /* XNU_TARGET_OS_OSX */
1325
1326 if (!queues_empty) {
1327 swapout_list_head = NULL;
1328 if (!queue_empty(&c_early_swapout_list_head)) {
1329 swapout_list_head = &c_early_swapout_list_head;
1330 } else {
1331 #if XNU_TARGET_OS_OSX
1332 /*
1333 * On macOS we _always_ processs all swapout queues.
1334 */
1335 if (!queue_empty(&c_regular_swapout_list_head)) {
1336 swapout_list_head = &c_regular_swapout_list_head;
1337 } else {
1338 swapout_list_head = &c_late_swapout_list_head;
1339 }
1340 #else /* XNU_TARGET_OS_OSX */
1341 /*
1342 * On non-macOS swap-capable platforms, we might want to
1343 * processs just the early queue (Freezer) or process both
1344 * early and late queues (app swap). We processed the early
1345 * queue up above. The late Q will only be processed if the
1346 * checks in should_process_swapout_queue give the go-ahead.
1347 */
1348 swapout_list_head = &c_late_swapout_list_head;
1349 #endif /* XNU_TARGET_OS_OSX */
1350 }
1351 if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
1352 goto again;
1353 }
1354 }
1355
1356 assert_wait((event_t)&vm_swapout_thread, THREAD_UNINT);
1357
1358 vm_swapout_thread_running = FALSE;
1359
1360 lck_mtx_unlock_always(c_list_lock);
1361
1362 thread_block((thread_continue_t)vm_swapout_thread);
1363
1364 /* NOTREACHED */
1365 }
1366
1367
1368 void
vm_swapout_iodone(void * io_context,int error)1369 vm_swapout_iodone(void *io_context, int error)
1370 {
1371 struct swapout_io_completion *soc;
1372
1373 soc = (struct swapout_io_completion *)io_context;
1374
1375 lck_mtx_lock_spin_always(c_list_lock);
1376
1377 soc->swp_io_done = 1;
1378 soc->swp_io_error = error;
1379 vm_swapout_soc_done++;
1380
1381 if (!vm_swapout_thread_running) {
1382 thread_wakeup((event_t)&vm_swapout_thread);
1383 }
1384
1385 lck_mtx_unlock_always(c_list_lock);
1386 }
1387
1388
1389 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1390 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1391 {
1392 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1393
1394 if (kr == KERN_SUCCESS) {
1395 kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
1396 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1397 }
1398 #if ENCRYPTED_SWAP
1399 else {
1400 vm_swap_decrypt(c_seg);
1401 }
1402 #endif /* ENCRYPTED_SWAP */
1403 lck_mtx_lock_spin_always(c_list_lock);
1404 lck_mtx_lock_spin_always(&c_seg->c_lock);
1405
1406 if (kr == KERN_SUCCESS) {
1407 int new_state = C_ON_SWAPPEDOUT_Q;
1408 boolean_t insert_head = FALSE;
1409
1410 if (hibernate_flushing == TRUE) {
1411 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1412 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1413 insert_head = TRUE;
1414 }
1415 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1416 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1417 }
1418
1419 c_seg_switch_state(c_seg, new_state, insert_head);
1420
1421 c_seg->c_store.c_swap_handle = f_offset;
1422
1423 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1424
1425 c_seg->c_swappedin = false;
1426
1427 if (c_seg->c_bytes_used) {
1428 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1429 }
1430
1431 #if CONFIG_FREEZE
1432 /*
1433 * Successful swapout. Decrement the in-core compressed pages count.
1434 */
1435 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1436 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1437 if (c_seg->c_has_donated_pages) {
1438 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore_late_swapout);
1439 }
1440 #endif /* CONFIG_FREEZE */
1441 } else {
1442 if (c_seg->c_overage_swap == TRUE) {
1443 c_seg->c_overage_swap = FALSE;
1444 c_overage_swapped_count--;
1445 }
1446
1447 #if CONFIG_FREEZE
1448 if (c_seg->c_has_freezer_pages) {
1449 if (c_seg->c_task_owner) {
1450 c_seg_update_task_owner(c_seg, NULL);
1451 }
1452 /*
1453 * We failed to swapout a frozen cseg. We need
1454 * to put it back in the queues, specifically the
1455 * AGE_Q. So clear the donated bit otherwise it'll
1456 * land on the swapped_in Q.
1457 */
1458 c_seg->c_has_donated_pages = 0;
1459 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1460 } else
1461 #endif /* CONFIG_FREEZE */
1462 {
1463 if (c_seg->c_has_donated_pages) {
1464 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
1465 } else {
1466 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1467 }
1468 }
1469
1470 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1471 c_seg_need_delayed_compaction(c_seg, TRUE);
1472 }
1473 }
1474 assert(c_seg->c_busy_swapping);
1475 assert(c_seg->c_busy);
1476
1477 c_seg->c_busy_swapping = 0;
1478 lck_mtx_unlock_always(c_list_lock);
1479
1480 C_SEG_WAKEUP_DONE(c_seg);
1481 lck_mtx_unlock_always(&c_seg->c_lock);
1482
1483 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1484 }
1485
1486
1487 boolean_t
vm_swap_create_file()1488 vm_swap_create_file()
1489 {
1490 uint64_t size = 0;
1491 int namelen = 0;
1492 boolean_t swap_file_created = FALSE;
1493 boolean_t swap_file_reuse = FALSE;
1494 boolean_t swap_file_pin = FALSE;
1495 struct swapfile *swf = NULL;
1496
1497 /*
1498 * make sure we've got all the info we need
1499 * to potentially pin a swap file... we could
1500 * be swapping out due to hibernation w/o ever
1501 * having run vm_pageout_scan, which is normally
1502 * the trigger to do the init
1503 */
1504 vm_compaction_swapper_do_init();
1505
1506 /*
1507 * Any swapfile structure ready for re-use?
1508 */
1509
1510 lck_mtx_lock(&vm_swap_data_lock);
1511
1512 swf = (struct swapfile*) queue_first(&swf_global_queue);
1513
1514 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1515 if (swf->swp_flags == SWAP_REUSE) {
1516 swap_file_reuse = TRUE;
1517 break;
1518 }
1519 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1520 }
1521
1522 lck_mtx_unlock(&vm_swap_data_lock);
1523
1524 if (swap_file_reuse == FALSE) {
1525 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1526
1527 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1528 swf->swp_index = vm_num_swap_files + 1;
1529 swf->swp_pathlen = namelen;
1530 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1531
1532 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1533 }
1534
1535 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1536
1537 if (swf->swp_vp == NULL) {
1538 if (swap_file_reuse == FALSE) {
1539 kfree_data(swf->swp_path, swf->swp_pathlen);
1540 kfree_type(struct swapfile, swf);
1541 }
1542 return FALSE;
1543 }
1544 vm_swapfile_can_be_created = TRUE;
1545
1546 size = MAX_SWAP_FILE_SIZE;
1547
1548 while (size >= MIN_SWAP_FILE_SIZE) {
1549 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1550
1551 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1552 int num_bytes_for_bitmap = 0;
1553
1554 swap_file_created = TRUE;
1555
1556 swf->swp_size = size;
1557 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1558 swf->swp_nseginuse = 0;
1559 swf->swp_free_hint = 0;
1560
1561 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1562 /*
1563 * Allocate a bitmap that describes the
1564 * number of segments held by this swapfile.
1565 */
1566 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1567 Z_WAITOK | Z_ZERO);
1568
1569 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1570 Z_WAITOK | Z_ZERO);
1571
1572 /*
1573 * passing a NULL trim_list into vnode_trim_list
1574 * will return ENOTSUP if trim isn't supported
1575 * and 0 if it is
1576 */
1577 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1578 swp_trim_supported = TRUE;
1579 }
1580
1581 lck_mtx_lock(&vm_swap_data_lock);
1582
1583 swf->swp_flags = SWAP_READY;
1584
1585 if (swap_file_reuse == FALSE) {
1586 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1587 }
1588
1589 vm_num_swap_files++;
1590
1591 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1592 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1593 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1594 }
1595
1596 if (swap_file_pin == TRUE) {
1597 vm_num_pinned_swap_files++;
1598 swf->swp_flags |= SWAP_PINNED;
1599 vm_swappin_avail -= swf->swp_size;
1600 }
1601
1602 lck_mtx_unlock(&vm_swap_data_lock);
1603
1604 thread_wakeup((event_t) &vm_num_swap_files);
1605 #if !XNU_TARGET_OS_OSX
1606 if (vm_num_swap_files == 1) {
1607 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1608
1609 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1610 c_overage_swapped_limit /= 2;
1611 }
1612 }
1613 #endif /* !XNU_TARGET_OS_OSX */
1614 break;
1615 } else {
1616 size = size / 2;
1617 }
1618 }
1619 if (swap_file_created == FALSE) {
1620 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1621
1622 swf->swp_vp = NULL;
1623
1624 if (swap_file_reuse == FALSE) {
1625 kfree_data(swf->swp_path, swf->swp_pathlen);
1626 kfree_type(struct swapfile, swf);
1627 }
1628 }
1629 return swap_file_created;
1630 }
1631
1632 extern void vnode_put(struct vnode* vp);
1633 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1634 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1635 {
1636 struct swapfile *swf = NULL;
1637 uint64_t file_offset = 0;
1638 int retval = 0;
1639
1640 assert(c_seg->c_store.c_buffer);
1641
1642 lck_mtx_lock(&vm_swap_data_lock);
1643
1644 swf = vm_swapfile_for_handle(f_offset);
1645
1646 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1647 vm_swap_get_failures++;
1648 retval = 1;
1649 goto done;
1650 }
1651 swf->swp_io_count++;
1652
1653 lck_mtx_unlock(&vm_swap_data_lock);
1654
1655 #if DEVELOPMENT || DEBUG
1656 C_SEG_MAKE_WRITEABLE(c_seg);
1657 #endif
1658 file_offset = (f_offset & SWAP_SLOT_MASK);
1659
1660 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1661 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1662 } else {
1663 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1664 vnode_put(swf->swp_vp);
1665 }
1666
1667 #if DEVELOPMENT || DEBUG
1668 C_SEG_WRITE_PROTECT(c_seg);
1669 #endif
1670 if (retval == 0) {
1671 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1672 } else {
1673 vm_swap_get_failures++;
1674 }
1675
1676 /*
1677 * Free this slot in the swap structure.
1678 */
1679 vm_swap_free(f_offset);
1680
1681 lck_mtx_lock(&vm_swap_data_lock);
1682 swf->swp_io_count--;
1683
1684 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1685 swf->swp_flags &= ~SWAP_WANTED;
1686 thread_wakeup((event_t) &swf->swp_flags);
1687 }
1688 done:
1689 lck_mtx_unlock(&vm_swap_data_lock);
1690
1691 if (retval == 0) {
1692 return KERN_SUCCESS;
1693 } else {
1694 return KERN_FAILURE;
1695 }
1696 }
1697
1698 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1699 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1700 {
1701 unsigned int segidx = 0;
1702 struct swapfile *swf = NULL;
1703 uint64_t file_offset = 0;
1704 uint64_t swapfile_index = 0;
1705 unsigned int byte_for_segidx = 0;
1706 unsigned int offset_within_byte = 0;
1707 boolean_t swf_eligible = FALSE;
1708 boolean_t waiting = FALSE;
1709 boolean_t retried = FALSE;
1710 int error = 0;
1711 clock_sec_t sec;
1712 clock_nsec_t nsec;
1713 void *upl_ctx = NULL;
1714 boolean_t drop_iocount = FALSE;
1715
1716 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1717 return KERN_FAILURE;
1718 }
1719 retry:
1720 lck_mtx_lock(&vm_swap_data_lock);
1721
1722 swf = (struct swapfile*) queue_first(&swf_global_queue);
1723
1724 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1725 segidx = swf->swp_free_hint;
1726
1727 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1728
1729 if (swf_eligible) {
1730 while (segidx < swf->swp_nsegs) {
1731 byte_for_segidx = segidx >> 3;
1732 offset_within_byte = segidx % 8;
1733
1734 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1735 segidx++;
1736 continue;
1737 }
1738
1739 (swf->swp_bitmap)[byte_for_segidx] |= (uint8_t)(1 << offset_within_byte);
1740
1741 file_offset = segidx * compressed_swap_chunk_size;
1742 swf->swp_nseginuse++;
1743 swf->swp_io_count++;
1744 swf->swp_csegs[segidx] = c_seg;
1745
1746 swapfile_index = swf->swp_index;
1747 vm_swapfile_total_segs_used++;
1748 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1749 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1750 }
1751
1752 clock_get_system_nanotime(&sec, &nsec);
1753
1754 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1755 thread_wakeup((event_t) &vm_swapfile_create_needed);
1756 }
1757
1758 lck_mtx_unlock(&vm_swap_data_lock);
1759
1760 goto issue_io;
1761 }
1762 }
1763 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1764 }
1765 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1766
1767 /*
1768 * we've run out of swap segments, but may not
1769 * be in a position to immediately create a new swap
1770 * file if we've recently failed to create due to a lack
1771 * of free space in the root filesystem... we'll try
1772 * to kick that create off, but in any event we're going
1773 * to take a breather (up to 1 second) so that we're not caught in a tight
1774 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1775 * segments into swap files only to have them immediately put back
1776 * on the c_age queue due to vm_swap_put failing.
1777 *
1778 * if we're doing these puts due to a hibernation flush,
1779 * no need to block... setting hibernate_no_swapspace to TRUE,
1780 * will cause "vm_compressor_compact_and_swap" to immediately abort
1781 */
1782 clock_get_system_nanotime(&sec, &nsec);
1783
1784 if (VM_SWAP_SHOULD_CREATE(sec)) {
1785 if (!vm_swapfile_create_thread_running) {
1786 thread_wakeup((event_t) &vm_swapfile_create_needed);
1787 }
1788 waiting = TRUE;
1789 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1790 } else {
1791 if (hibernate_flushing) {
1792 hibernate_no_swapspace = TRUE;
1793 }
1794 }
1795
1796 lck_mtx_unlock(&vm_swap_data_lock);
1797
1798 if (waiting == TRUE) {
1799 thread_block(THREAD_CONTINUE_NULL);
1800
1801 if (retried == FALSE && hibernate_flushing == TRUE) {
1802 retried = TRUE;
1803 goto retry;
1804 }
1805 }
1806 vm_swap_put_failures_no_swap_file++;
1807
1808 return KERN_FAILURE;
1809
1810 issue_io:
1811 assert(c_seg->c_busy_swapping);
1812 assert(c_seg->c_busy);
1813 assert(!c_seg->c_on_minorcompact_q);
1814
1815 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1816
1817 if (soc) {
1818 soc->swp_c_seg = c_seg;
1819 soc->swp_c_size = size;
1820
1821 soc->swp_swf = swf;
1822
1823 soc->swp_io_error = 0;
1824 soc->swp_io_done = 0;
1825
1826 upl_ctx = (void *)&soc->swp_upl_ctx;
1827 }
1828
1829 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1830 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1831 } else {
1832 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1833 drop_iocount = TRUE;
1834 }
1835
1836 if (error || upl_ctx == NULL) {
1837 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1838 }
1839
1840 return KERN_SUCCESS;
1841 }
1842
1843 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1844 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1845 {
1846 if (drop_iocount) {
1847 vnode_put(swf->swp_vp);
1848 }
1849
1850 lck_mtx_lock(&vm_swap_data_lock);
1851
1852 swf->swp_io_count--;
1853
1854 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1855 swf->swp_flags &= ~SWAP_WANTED;
1856 thread_wakeup((event_t) &swf->swp_flags);
1857 }
1858 lck_mtx_unlock(&vm_swap_data_lock);
1859
1860 if (error) {
1861 vm_swap_free(*f_offset);
1862 vm_swap_put_failures++;
1863
1864 return KERN_FAILURE;
1865 }
1866 return KERN_SUCCESS;
1867 }
1868
1869
1870 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1871 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1872 {
1873 uint64_t file_offset = 0;
1874 unsigned int segidx = 0;
1875
1876
1877 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1878 unsigned int byte_for_segidx = 0;
1879 unsigned int offset_within_byte = 0;
1880
1881 file_offset = (f_offset & SWAP_SLOT_MASK);
1882 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1883
1884 byte_for_segidx = segidx >> 3;
1885 offset_within_byte = segidx % 8;
1886
1887 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1888 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1889
1890 swf->swp_csegs[segidx] = NULL;
1891
1892 swf->swp_nseginuse--;
1893 vm_swapfile_total_segs_used--;
1894
1895 if (segidx < swf->swp_free_hint) {
1896 swf->swp_free_hint = segidx;
1897 }
1898 }
1899 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1900 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1901 }
1902 }
1903 }
1904
1905
1906 uint32_t vm_swap_free_now_count = 0;
1907 uint32_t vm_swap_free_delayed_count = 0;
1908
1909
1910 void
vm_swap_free(uint64_t f_offset)1911 vm_swap_free(uint64_t f_offset)
1912 {
1913 struct swapfile *swf = NULL;
1914 struct trim_list *tl = NULL;
1915 clock_sec_t sec;
1916 clock_nsec_t nsec;
1917
1918 if (swp_trim_supported == TRUE) {
1919 tl = kalloc_type(struct trim_list, Z_WAITOK);
1920 }
1921
1922 lck_mtx_lock(&vm_swap_data_lock);
1923
1924 swf = vm_swapfile_for_handle(f_offset);
1925
1926 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1927 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1928 /*
1929 * don't delay the free if the underlying disk doesn't support
1930 * trim, or we're in the midst of reclaiming this swap file since
1931 * we don't want to move segments that are technically free
1932 * but not yet handled by the delayed free mechanism
1933 */
1934 vm_swap_free_now(swf, f_offset);
1935
1936 vm_swap_free_now_count++;
1937 goto done;
1938 }
1939 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1940 tl->tl_length = compressed_swap_chunk_size;
1941
1942 tl->tl_next = swf->swp_delayed_trim_list_head;
1943 swf->swp_delayed_trim_list_head = tl;
1944 swf->swp_delayed_trim_count++;
1945 tl = NULL;
1946
1947 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1948 clock_get_system_nanotime(&sec, &nsec);
1949
1950 if (sec > dont_trim_until_ts) {
1951 thread_wakeup((event_t) &vm_swapfile_create_needed);
1952 }
1953 }
1954 vm_swap_free_delayed_count++;
1955 }
1956 done:
1957 lck_mtx_unlock(&vm_swap_data_lock);
1958
1959 if (tl != NULL) {
1960 kfree_type(struct trim_list, tl);
1961 }
1962 }
1963
1964
1965 static void
vm_swap_wait_on_trim_handling_in_progress()1966 vm_swap_wait_on_trim_handling_in_progress()
1967 {
1968 while (delayed_trim_handling_in_progress == TRUE) {
1969 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1970 lck_mtx_unlock(&vm_swap_data_lock);
1971
1972 thread_block(THREAD_CONTINUE_NULL);
1973
1974 lck_mtx_lock(&vm_swap_data_lock);
1975 }
1976 }
1977
1978
1979 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1980 vm_swap_handle_delayed_trims(boolean_t force_now)
1981 {
1982 struct swapfile *swf = NULL;
1983
1984 /*
1985 * serialize the race between us and vm_swap_reclaim...
1986 * if vm_swap_reclaim wins it will turn off SWAP_READY
1987 * on the victim it has chosen... we can just skip over
1988 * that file since vm_swap_reclaim will first process
1989 * all of the delayed trims associated with it
1990 */
1991
1992 if (compressor_store_stop_compaction == TRUE) {
1993 return;
1994 }
1995
1996 lck_mtx_lock(&vm_swap_data_lock);
1997
1998 delayed_trim_handling_in_progress = TRUE;
1999
2000 lck_mtx_unlock(&vm_swap_data_lock);
2001
2002 /*
2003 * no need to hold the lock to walk the swf list since
2004 * vm_swap_create (the only place where we add to this list)
2005 * is run on the same thread as this function
2006 * and vm_swap_reclaim doesn't remove items from this list
2007 * instead marking them with SWAP_REUSE for future re-use
2008 */
2009 swf = (struct swapfile*) queue_first(&swf_global_queue);
2010
2011 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2012 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
2013 assert(!(swf->swp_flags & SWAP_RECLAIM));
2014 vm_swap_do_delayed_trim(swf);
2015 }
2016 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2017 }
2018 lck_mtx_lock(&vm_swap_data_lock);
2019
2020 delayed_trim_handling_in_progress = FALSE;
2021 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
2022
2023 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
2024 thread_wakeup((event_t) &vm_swapfile_gc_needed);
2025 }
2026
2027 lck_mtx_unlock(&vm_swap_data_lock);
2028 }
2029
2030 static void
vm_swap_do_delayed_trim(struct swapfile * swf)2031 vm_swap_do_delayed_trim(struct swapfile *swf)
2032 {
2033 struct trim_list *tl, *tl_head;
2034 int error;
2035
2036 if (compressor_store_stop_compaction == TRUE) {
2037 return;
2038 }
2039
2040 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
2041 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
2042 return;
2043 }
2044
2045 lck_mtx_lock(&vm_swap_data_lock);
2046
2047 tl_head = swf->swp_delayed_trim_list_head;
2048 swf->swp_delayed_trim_list_head = NULL;
2049 swf->swp_delayed_trim_count = 0;
2050
2051 lck_mtx_unlock(&vm_swap_data_lock);
2052
2053 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
2054
2055 (void) vnode_put(swf->swp_vp);
2056
2057 while ((tl = tl_head) != NULL) {
2058 unsigned int segidx = 0;
2059 unsigned int byte_for_segidx = 0;
2060 unsigned int offset_within_byte = 0;
2061
2062 lck_mtx_lock(&vm_swap_data_lock);
2063
2064 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
2065
2066 byte_for_segidx = segidx >> 3;
2067 offset_within_byte = segidx % 8;
2068
2069 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
2070 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2071
2072 swf->swp_csegs[segidx] = NULL;
2073
2074 swf->swp_nseginuse--;
2075 vm_swapfile_total_segs_used--;
2076
2077 if (segidx < swf->swp_free_hint) {
2078 swf->swp_free_hint = segidx;
2079 }
2080 }
2081 lck_mtx_unlock(&vm_swap_data_lock);
2082
2083 tl_head = tl->tl_next;
2084
2085 kfree_type(struct trim_list, tl);
2086 }
2087 }
2088
2089
2090 void
vm_swap_flush()2091 vm_swap_flush()
2092 {
2093 return;
2094 }
2095
2096 int vm_swap_reclaim_yielded = 0;
2097
2098 void
vm_swap_reclaim(void)2099 vm_swap_reclaim(void)
2100 {
2101 vm_offset_t addr = 0;
2102 unsigned int segidx = 0;
2103 uint64_t f_offset = 0;
2104 struct swapfile *swf = NULL;
2105 struct swapfile *smallest_swf = NULL;
2106 unsigned int min_nsegs = 0;
2107 unsigned int byte_for_segidx = 0;
2108 unsigned int offset_within_byte = 0;
2109 uint32_t c_size = 0;
2110
2111 c_segment_t c_seg = NULL;
2112
2113 kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
2114 KMA_NOFAIL | KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
2115
2116 lck_mtx_lock(&vm_swap_data_lock);
2117
2118 /*
2119 * if we're running the swapfile list looking for
2120 * candidates with delayed trims, we need to
2121 * wait before making our decision concerning
2122 * the swapfile we want to reclaim
2123 */
2124 vm_swap_wait_on_trim_handling_in_progress();
2125
2126 /*
2127 * from here until we knock down the SWAP_READY bit,
2128 * we need to remain behind the vm_swap_data_lock...
2129 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
2130 * will not consider this swapfile for processing
2131 */
2132 swf = (struct swapfile*) queue_first(&swf_global_queue);
2133 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
2134 smallest_swf = NULL;
2135
2136 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2137 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
2138 smallest_swf = swf;
2139 min_nsegs = swf->swp_nseginuse;
2140 }
2141 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2142 }
2143
2144 if (smallest_swf == NULL) {
2145 goto done;
2146 }
2147
2148 swf = smallest_swf;
2149
2150
2151 swf->swp_flags &= ~SWAP_READY;
2152 swf->swp_flags |= SWAP_RECLAIM;
2153
2154 if (swf->swp_delayed_trim_count) {
2155 lck_mtx_unlock(&vm_swap_data_lock);
2156
2157 vm_swap_do_delayed_trim(swf);
2158
2159 lck_mtx_lock(&vm_swap_data_lock);
2160 }
2161 segidx = 0;
2162
2163 while (segidx < swf->swp_nsegs) {
2164 ReTry_for_cseg:
2165 /*
2166 * Wait for outgoing I/Os.
2167 */
2168 while (swf->swp_io_count) {
2169 swf->swp_flags |= SWAP_WANTED;
2170
2171 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2172 lck_mtx_unlock(&vm_swap_data_lock);
2173
2174 thread_block(THREAD_CONTINUE_NULL);
2175
2176 lck_mtx_lock(&vm_swap_data_lock);
2177 }
2178 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2179 vm_swap_reclaim_yielded++;
2180 break;
2181 }
2182
2183 byte_for_segidx = segidx >> 3;
2184 offset_within_byte = segidx % 8;
2185
2186 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2187 segidx++;
2188 continue;
2189 }
2190
2191 c_seg = swf->swp_csegs[segidx];
2192 assert(c_seg);
2193
2194 lck_mtx_lock_spin_always(&c_seg->c_lock);
2195
2196 if (c_seg->c_busy) {
2197 /*
2198 * a swapped out c_segment in the process of being freed will remain in the
2199 * busy state until after the vm_swap_free is called on it... vm_swap_free
2200 * takes the vm_swap_data_lock, so can't change the swap state until after
2201 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2202 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2203 * at that point, we re-look up the swap state which will now indicate that
2204 * this c_segment no longer exists.
2205 */
2206 c_seg->c_wanted = 1;
2207
2208 assert_wait((event_t) (c_seg), THREAD_UNINT);
2209 lck_mtx_unlock_always(&c_seg->c_lock);
2210
2211 lck_mtx_unlock(&vm_swap_data_lock);
2212
2213 thread_block(THREAD_CONTINUE_NULL);
2214
2215 lck_mtx_lock(&vm_swap_data_lock);
2216
2217 goto ReTry_for_cseg;
2218 }
2219 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2220
2221 f_offset = segidx * compressed_swap_chunk_size;
2222
2223 assert(c_seg == swf->swp_csegs[segidx]);
2224 swf->swp_csegs[segidx] = NULL;
2225 swf->swp_nseginuse--;
2226
2227 vm_swapfile_total_segs_used--;
2228
2229 lck_mtx_unlock(&vm_swap_data_lock);
2230
2231 assert(C_SEG_IS_ONDISK(c_seg));
2232
2233 C_SEG_BUSY(c_seg);
2234 c_seg->c_busy_swapping = 1;
2235 #if !CHECKSUM_THE_SWAP
2236 c_seg_trim_tail(c_seg);
2237 #endif
2238 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2239
2240 assert(c_size <= c_seg_bufsize && c_size);
2241
2242 lck_mtx_unlock_always(&c_seg->c_lock);
2243
2244 if (vnode_getwithref(swf->swp_vp)) {
2245 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2246 vm_swap_get_failures++;
2247 goto swap_io_failed;
2248 } else {
2249 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2250 /*
2251 * reading the data back in failed, so convert c_seg
2252 * to a swapped in c_segment that contains no data
2253 */
2254 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2255 /*
2256 * returns with c_busy_swapping cleared
2257 */
2258 vnode_put(swf->swp_vp);
2259 vm_swap_get_failures++;
2260 goto swap_io_failed;
2261 }
2262 vnode_put(swf->swp_vp);
2263 }
2264
2265 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2266 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2267
2268 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2269 vm_offset_t c_buffer;
2270
2271 /*
2272 * the put failed, so convert c_seg to a fully swapped in c_segment
2273 * with valid data
2274 */
2275 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2276
2277 kernel_memory_populate(c_buffer, c_size,
2278 KMA_NOFAIL | KMA_COMPRESSOR,
2279 VM_KERN_MEMORY_COMPRESSOR);
2280
2281 memcpy((char *)c_buffer, (char *)addr, c_size);
2282
2283 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2284 #if ENCRYPTED_SWAP
2285 vm_swap_decrypt(c_seg);
2286 #endif /* ENCRYPTED_SWAP */
2287 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2288 /*
2289 * returns with c_busy_swapping cleared
2290 */
2291 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2292
2293 goto swap_io_failed;
2294 }
2295 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2296
2297 lck_mtx_lock_spin_always(&c_seg->c_lock);
2298
2299 c_seg->c_swappedin = false;
2300
2301 assert(C_SEG_IS_ONDISK(c_seg));
2302 /*
2303 * The c_seg will now know about the new location on disk.
2304 */
2305 c_seg->c_store.c_swap_handle = f_offset;
2306
2307 assert(c_seg->c_busy_swapping);
2308 c_seg->c_busy_swapping = 0;
2309 swap_io_failed:
2310 assert(c_seg->c_busy);
2311 C_SEG_WAKEUP_DONE(c_seg);
2312
2313 lck_mtx_unlock_always(&c_seg->c_lock);
2314 lck_mtx_lock(&vm_swap_data_lock);
2315 }
2316
2317 if (swf->swp_nseginuse) {
2318 swf->swp_flags &= ~SWAP_RECLAIM;
2319 swf->swp_flags |= SWAP_READY;
2320
2321 goto done;
2322 }
2323 /*
2324 * We don't remove this inactive swf from the queue.
2325 * That way, we can re-use it when needed again and
2326 * preserve the namespace. The delayed_trim processing
2327 * is also dependent on us not removing swfs from the queue.
2328 */
2329 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2330
2331 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2332
2333 lck_mtx_unlock(&vm_swap_data_lock);
2334
2335 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2336
2337 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2338 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2339
2340 lck_mtx_lock(&vm_swap_data_lock);
2341
2342 if (swf->swp_flags & SWAP_PINNED) {
2343 vm_num_pinned_swap_files--;
2344 vm_swappin_avail += swf->swp_size;
2345 }
2346
2347 swf->swp_vp = NULL;
2348 swf->swp_size = 0;
2349 swf->swp_free_hint = 0;
2350 swf->swp_nsegs = 0;
2351 swf->swp_flags = SWAP_REUSE;
2352
2353 vm_num_swap_files--;
2354
2355 done:
2356 thread_wakeup((event_t) &swf->swp_flags);
2357 lck_mtx_unlock(&vm_swap_data_lock);
2358
2359 kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2360 }
2361
2362
2363 uint64_t
vm_swap_get_total_space(void)2364 vm_swap_get_total_space(void)
2365 {
2366 uint64_t total_space = 0;
2367
2368 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2369
2370 return total_space;
2371 }
2372
2373 uint64_t
vm_swap_get_used_space(void)2374 vm_swap_get_used_space(void)
2375 {
2376 uint64_t used_space = 0;
2377
2378 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2379
2380 return used_space;
2381 }
2382
2383 uint64_t
vm_swap_get_free_space(void)2384 vm_swap_get_free_space(void)
2385 {
2386 return vm_swap_get_total_space() - vm_swap_get_used_space();
2387 }
2388
2389 uint64_t
vm_swap_get_max_configured_space(void)2390 vm_swap_get_max_configured_space(void)
2391 {
2392 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2393 return num_swap_files * MAX_SWAP_FILE_SIZE;
2394 }
2395
2396 int
vm_swap_low_on_space(void)2397 vm_swap_low_on_space(void)
2398 {
2399 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2400 return 0;
2401 }
2402
2403 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2404 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2405 return 0;
2406 }
2407
2408 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2409 return 1;
2410 }
2411 }
2412 return 0;
2413 }
2414
2415 int
vm_swap_out_of_space(void)2416 vm_swap_out_of_space(void)
2417 {
2418 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2419 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2420 /*
2421 * Last swapfile and we have only space for the
2422 * last few swapouts.
2423 */
2424 return 1;
2425 }
2426
2427 return 0;
2428 }
2429
2430 boolean_t
vm_swap_files_pinned(void)2431 vm_swap_files_pinned(void)
2432 {
2433 boolean_t result;
2434
2435 if (vm_swappin_enabled == FALSE) {
2436 return TRUE;
2437 }
2438
2439 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2440
2441 return result;
2442 }
2443
2444 #if CONFIG_FREEZE
2445 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2446 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2447 {
2448 boolean_t use_device_value = FALSE;
2449 struct swapfile *swf = NULL;
2450
2451 if (vm_num_swap_files) {
2452 lck_mtx_lock(&vm_swap_data_lock);
2453
2454 swf = (struct swapfile*) queue_first(&swf_global_queue);
2455
2456 if (swf) {
2457 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2458 if (swf->swp_flags == SWAP_READY) {
2459 assert(swf->swp_vp);
2460
2461 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2462 use_device_value = TRUE;
2463 }
2464 break;
2465 }
2466 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2467 }
2468 }
2469
2470 lck_mtx_unlock(&vm_swap_data_lock);
2471 } else {
2472 /*
2473 * This block is used for the initial budget value before any swap files
2474 * are created. We create a temp swap file to get the budget.
2475 */
2476
2477 struct vnode *temp_vp = NULL;
2478
2479 vm_swapfile_open(swapfilename, &temp_vp);
2480
2481 if (temp_vp) {
2482 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2483 use_device_value = TRUE;
2484 }
2485
2486 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2487 temp_vp = NULL;
2488 } else {
2489 *freeze_daily_budget = 0;
2490 }
2491 }
2492
2493 return use_device_value;
2494 }
2495 #endif /* CONFIG_FREEZE */
2496
2497 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2498 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2499 {
2500 lck_mtx_lock(&vm_swap_data_lock);
2501
2502 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2503 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2504
2505 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2506 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2507
2508 lck_mtx_unlock(&vm_swap_data_lock);
2509 }
2510