1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42 * launchd explicitly turns ON swap later during boot on macOS devices.
43 */
44 boolean_t compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t compressor_store_stop_compaction = FALSE;
47 #endif
48
49 boolean_t vm_swapfile_create_needed = FALSE;
50 boolean_t vm_swapfile_gc_needed = FALSE;
51
52 int vm_swapper_throttle = -1;
53 uint64_t vm_swapout_thread_id;
54
55 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t vm_swap_get_failures = 0; /* Fatal */
57 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int vm_num_swap_files_config = 0;
59 int vm_num_swap_files = 0;
60 int vm_num_pinned_swap_files = 0;
61 int vm_swapout_thread_processed_segments = 0;
62 int vm_swapout_thread_awakened = 0;
63 bool vm_swapout_thread_running = FALSE;
64 int vm_swapfile_create_thread_awakened = 0;
65 int vm_swapfile_create_thread_running = 0;
66 int vm_swapfile_gc_thread_awakened = 0;
67 int vm_swapfile_gc_thread_running = 0;
68
69 int64_t vm_swappin_avail = 0;
70 boolean_t vm_swappin_enabled = FALSE;
71 unsigned int vm_swapfile_total_segs_alloced = 0;
72 unsigned int vm_swapfile_total_segs_alloced_max = 0;
73 unsigned int vm_swapfile_total_segs_used = 0;
74 unsigned int vm_swapfile_total_segs_used_max = 0;
75
76 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
77
78 extern vm_map_t compressor_map;
79 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
80
81 #define SWAP_READY 0x1 /* Swap file is ready to be used */
82 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
83 #define SWAP_WANTED 0x4 /* Swap file has waiters */
84 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
85 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
86
87
88 struct swapfile {
89 queue_head_t swp_queue; /* list of swap files */
90 char *swp_path; /* saved pathname of swap file */
91 struct vnode *swp_vp; /* backing vnode */
92 uint64_t swp_size; /* size of this swap file */
93 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
94 unsigned int swp_pathlen; /* length of pathname */
95 unsigned int swp_nsegs; /* #segments we can use */
96 unsigned int swp_nseginuse; /* #segments in use */
97 unsigned int swp_index; /* index of this swap file */
98 unsigned int swp_flags; /* state of swap file */
99 unsigned int swp_free_hint; /* offset of 1st free chunk */
100 unsigned int swp_io_count; /* count of outstanding I/Os */
101 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
102
103 struct trim_list *swp_delayed_trim_list_head;
104 unsigned int swp_delayed_trim_count;
105 };
106
107 queue_head_t swf_global_queue;
108 boolean_t swp_trim_supported = FALSE;
109
110 extern clock_sec_t dont_trim_until_ts;
111 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
112 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
113 int vm_swapfile_can_be_created = FALSE;
114 boolean_t delayed_trim_handling_in_progress = FALSE;
115
116 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
117
118 static void vm_swapout_thread_throttle_adjust(void);
119 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
120 static void vm_swapout_thread(void);
121 static void vm_swapfile_create_thread(void);
122 static void vm_swapfile_gc_thread(void);
123 static void vm_swap_defragment(void);
124 static void vm_swap_handle_delayed_trims(boolean_t);
125 static void vm_swap_do_delayed_trim(struct swapfile *);
126 static void vm_swap_wait_on_trim_handling_in_progress(void);
127 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
128
129 extern int vnode_getwithref(struct vnode* vp);
130
131 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
132
133 #if !XNU_TARGET_OS_OSX
134
135 /*
136 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
137 * number of swapfiles allowed. That increases wired memory overhead.
138 * So we want to keep the max swapfiles same on both DEV/RELEASE so
139 * that the memory overhead is similar for performance comparisons.
140 */
141 #define VM_MAX_SWAP_FILE_NUM 5
142
143 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
144
145 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
146 #define VM_SWAP_SHOULD_PIN(_size) FALSE
147 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
148 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
149 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
150
151 #else /* !XNU_TARGET_OS_OSX */
152
153 #define VM_MAX_SWAP_FILE_NUM 100
154 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
155
156 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
157 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
158 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
159 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
160 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
161
162 #endif /* !XNU_TARGET_OS_OSX */
163
164 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
165 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
166 #define VM_SWAPFILE_DELAYED_CREATE 15
167
168 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
169
170
171 #if CHECKSUM_THE_SWAP
172 extern unsigned int hash_string(char *cp, int len);
173 #endif
174
175 #if RECORD_THE_COMPRESSED_DATA
176 boolean_t c_compressed_record_init_done = FALSE;
177 int c_compressed_record_write_error = 0;
178 struct vnode *c_compressed_record_vp = NULL;
179 uint64_t c_compressed_record_file_offset = 0;
180 void c_compressed_record_init(void);
181 void c_compressed_record_write(char *, int);
182 #endif
183
184 extern void vm_pageout_io_throttle(void);
185
186 static struct swapfile *vm_swapfile_for_handle(uint64_t);
187
188 /*
189 * Called with the vm_swap_data_lock held.
190 */
191
192 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)193 vm_swapfile_for_handle(uint64_t f_offset)
194 {
195 uint64_t file_offset = 0;
196 unsigned int swapfile_index = 0;
197 struct swapfile* swf = NULL;
198
199 file_offset = (f_offset & SWAP_SLOT_MASK);
200 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
201
202 swf = (struct swapfile*) queue_first(&swf_global_queue);
203
204 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
205 if (swapfile_index == swf->swp_index) {
206 break;
207 }
208
209 swf = (struct swapfile*) queue_next(&swf->swp_queue);
210 }
211
212 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
213 swf = NULL;
214 }
215
216 return swf;
217 }
218
219 #if ENCRYPTED_SWAP
220
221 #include <libkern/crypto/aesxts.h>
222
223 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
224
225 boolean_t swap_crypt_initialized;
226 void swap_crypt_initialize(void);
227
228 symmetric_xts xts_modectx;
229 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
230 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
231
232 #if DEVELOPMENT || DEBUG
233 boolean_t swap_crypt_xts_tested = FALSE;
234 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
235 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
236 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
237 #endif /* DEVELOPMENT || DEBUG */
238
239 unsigned long vm_page_encrypt_counter;
240 unsigned long vm_page_decrypt_counter;
241
242
243 void
swap_crypt_initialize(void)244 swap_crypt_initialize(void)
245 {
246 uint8_t *enckey1, *enckey2;
247 int keylen1, keylen2;
248 int error;
249
250 assert(swap_crypt_initialized == FALSE);
251
252 keylen1 = sizeof(swap_crypt_key1);
253 enckey1 = (uint8_t *)&swap_crypt_key1;
254 keylen2 = sizeof(swap_crypt_key2);
255 enckey2 = (uint8_t *)&swap_crypt_key2;
256
257 error = cc_rand_generate((void *)enckey1, keylen1);
258 assert(!error);
259
260 error = cc_rand_generate((void *)enckey2, keylen2);
261 assert(!error);
262
263 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
264 assert(!error);
265
266 swap_crypt_initialized = TRUE;
267
268 #if DEVELOPMENT || DEBUG
269 uint8_t *encptr;
270 uint8_t *decptr;
271 uint8_t *refptr;
272 uint8_t *iv;
273 uint64_t ivnum[2];
274 int size = 0;
275 int i = 0;
276 int rc = 0;
277
278 assert(swap_crypt_xts_tested == FALSE);
279
280 /*
281 * Validate the encryption algorithms.
282 *
283 * First initialize the test data.
284 */
285 for (i = 0; i < 4096; i++) {
286 swap_crypt_test_page_ref[i] = (char) i;
287 }
288 ivnum[0] = (uint64_t)0xaa;
289 ivnum[1] = 0;
290 iv = (uint8_t *)ivnum;
291
292 refptr = (uint8_t *)swap_crypt_test_page_ref;
293 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
294 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
295 size = 4096;
296
297 /* encrypt */
298 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
299 assert(!rc);
300
301 /* compare result with original - should NOT match */
302 for (i = 0; i < 4096; i++) {
303 if (swap_crypt_test_page_encrypt[i] !=
304 swap_crypt_test_page_ref[i]) {
305 break;
306 }
307 }
308 assert(i != 4096);
309
310 /* decrypt */
311 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
312 assert(!rc);
313
314 /* compare result with original */
315 for (i = 0; i < 4096; i++) {
316 if (swap_crypt_test_page_decrypt[i] !=
317 swap_crypt_test_page_ref[i]) {
318 panic("encryption test failed");
319 }
320 }
321 /* encrypt in place */
322 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
323 assert(!rc);
324
325 /* decrypt in place */
326 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
327 assert(!rc);
328
329 for (i = 0; i < 4096; i++) {
330 if (swap_crypt_test_page_decrypt[i] !=
331 swap_crypt_test_page_ref[i]) {
332 panic("in place encryption test failed");
333 }
334 }
335 swap_crypt_xts_tested = TRUE;
336 #endif /* DEVELOPMENT || DEBUG */
337 }
338
339
340 void
vm_swap_encrypt(c_segment_t c_seg)341 vm_swap_encrypt(c_segment_t c_seg)
342 {
343 uint8_t *ptr;
344 uint8_t *iv;
345 uint64_t ivnum[2];
346 int size = 0;
347 int rc = 0;
348
349 if (swap_crypt_initialized == FALSE) {
350 swap_crypt_initialize();
351 }
352
353 #if DEVELOPMENT || DEBUG
354 C_SEG_MAKE_WRITEABLE(c_seg);
355 #endif
356 ptr = (uint8_t *)c_seg->c_store.c_buffer;
357 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
358
359 ivnum[0] = (uint64_t)c_seg;
360 ivnum[1] = 0;
361 iv = (uint8_t *)ivnum;
362
363 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
364 assert(!rc);
365
366 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
367
368 #if DEVELOPMENT || DEBUG
369 C_SEG_WRITE_PROTECT(c_seg);
370 #endif
371 }
372
373 void
vm_swap_decrypt(c_segment_t c_seg)374 vm_swap_decrypt(c_segment_t c_seg)
375 {
376 uint8_t *ptr;
377 uint8_t *iv;
378 uint64_t ivnum[2];
379 int size = 0;
380 int rc = 0;
381
382 assert(swap_crypt_initialized);
383
384 #if DEVELOPMENT || DEBUG
385 C_SEG_MAKE_WRITEABLE(c_seg);
386 #endif
387 ptr = (uint8_t *)c_seg->c_store.c_buffer;
388 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
389
390 ivnum[0] = (uint64_t)c_seg;
391 ivnum[1] = 0;
392 iv = (uint8_t *)ivnum;
393
394 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
395 assert(!rc);
396
397 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
398
399 #if DEVELOPMENT || DEBUG
400 C_SEG_WRITE_PROTECT(c_seg);
401 #endif
402 }
403 #endif /* ENCRYPTED_SWAP */
404
405 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
406 void
vm_compressor_swap_init()407 vm_compressor_swap_init()
408 {
409 thread_t thread = NULL;
410
411 queue_init(&swf_global_queue);
412
413 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
414 BASEPRI_VM, &thread) != KERN_SUCCESS) {
415 panic("vm_swapout_thread: create failed");
416 }
417 thread_set_thread_name(thread, "VM_swapout");
418 vm_swapout_thread_id = thread->thread_id;
419
420 thread_deallocate(thread);
421
422 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
423 BASEPRI_VM, &thread) != KERN_SUCCESS) {
424 panic("vm_swapfile_create_thread: create failed");
425 }
426
427 thread_set_thread_name(thread, "VM_swapfile_create");
428 thread_deallocate(thread);
429
430 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
431 BASEPRI_VM, &thread) != KERN_SUCCESS) {
432 panic("vm_swapfile_gc_thread: create failed");
433 }
434 thread_set_thread_name(thread, "VM_swapfile_gc");
435
436 /*
437 * Swapfile garbage collection will need to allocate memory
438 * to complete its swap reclaim and in-memory compaction.
439 * So allow it to dip into the reserved VM page pool.
440 */
441 thread_lock(thread);
442 thread->options |= TH_OPT_VMPRIV;
443 thread_unlock(thread);
444
445 thread_deallocate(thread);
446
447 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
448 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
449 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
450 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
451
452 #if !XNU_TARGET_OS_OSX
453 /*
454 * dummy value until the swap file gets created
455 * when we drive the first c_segment_t to the
456 * swapout queue... at that time we will
457 * know the true size we have to work with
458 */
459 c_overage_swapped_limit = 16;
460 #endif /* !XNU_TARGET_OS_OSX */
461
462 compressed_swap_chunk_size = c_seg_bufsize;
463 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
464 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
465 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
466 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
467 #if DEVELOPMENT || DEBUG
468 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
469 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
470 if (parsed_vm_max_num_swap_files > 0) {
471 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
472 } else {
473 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
474 }
475 }
476 #endif
477 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
478
479 printf("VM Swap Subsystem is ON\n");
480 }
481
482
483 #if RECORD_THE_COMPRESSED_DATA
484
485 void
c_compressed_record_init()486 c_compressed_record_init()
487 {
488 if (c_compressed_record_init_done == FALSE) {
489 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
490 c_compressed_record_init_done = TRUE;
491 }
492 }
493
494 void
c_compressed_record_write(char * buf,int size)495 c_compressed_record_write(char *buf, int size)
496 {
497 if (c_compressed_record_write_error == 0) {
498 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
499 c_compressed_record_file_offset += size;
500 }
501 }
502 #endif
503
504
505 int compaction_swapper_inited = 0;
506
507 void
vm_compaction_swapper_do_init(void)508 vm_compaction_swapper_do_init(void)
509 {
510 struct vnode *vp;
511 char *pathname;
512 int namelen;
513
514 if (compaction_swapper_inited) {
515 return;
516 }
517
518 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
519 compaction_swapper_inited = 1;
520 return;
521 }
522 lck_mtx_lock(&vm_swap_data_lock);
523
524 if (!compaction_swapper_inited) {
525 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
526 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
527 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
528
529 vm_swapfile_open(pathname, &vp);
530
531 if (vp) {
532 if (vnode_pager_isSSD(vp) == FALSE) {
533 /*
534 * swap files live on an HDD, so let's make sure to start swapping
535 * much earlier since we're not worried about SSD write-wear and
536 * we have so little write bandwidth to work with
537 * these values were derived expermentially by running the performance
538 * teams stock test for evaluating HDD performance against various
539 * combinations and looking and comparing overall results.
540 * Note that the > relationship between these 4 values must be maintained
541 */
542 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
543 vm_compressor_minorcompact_threshold_divisor = 15;
544 }
545 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
546 vm_compressor_majorcompact_threshold_divisor = 18;
547 }
548 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
549 vm_compressor_unthrottle_threshold_divisor = 24;
550 }
551 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
552 vm_compressor_catchup_threshold_divisor = 30;
553 }
554 }
555 #if XNU_TARGET_OS_OSX
556 vnode_setswapmount(vp);
557 vm_swappin_avail = vnode_getswappin_avail(vp);
558
559 if (vm_swappin_avail) {
560 vm_swappin_enabled = TRUE;
561 }
562 #endif /* XNU_TARGET_OS_OSX */
563 vm_swapfile_close((uint64_t)pathname, vp);
564 }
565 kfree_data(pathname, namelen);
566
567 compaction_swapper_inited = 1;
568 }
569 lck_mtx_unlock(&vm_swap_data_lock);
570 }
571
572
573 void
vm_swap_consider_defragmenting(int flags)574 vm_swap_consider_defragmenting(int flags)
575 {
576 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
577 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
578
579 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
580 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
581 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
582 lck_mtx_lock(&vm_swap_data_lock);
583
584 if (force_defrag) {
585 vm_swap_force_defrag = TRUE;
586 }
587
588 if (force_reclaim) {
589 vm_swap_force_reclaim = TRUE;
590 }
591
592 if (!vm_swapfile_gc_thread_running) {
593 thread_wakeup((event_t) &vm_swapfile_gc_needed);
594 }
595
596 lck_mtx_unlock(&vm_swap_data_lock);
597 }
598 }
599 }
600
601
602 int vm_swap_defragment_yielded = 0;
603 int vm_swap_defragment_swapin = 0;
604 int vm_swap_defragment_free = 0;
605 int vm_swap_defragment_busy = 0;
606
607 #if CONFIG_FREEZE
608 extern uint32_t c_segment_pages_compressed_incore;
609 extern uint32_t c_segment_pages_compressed_nearing_limit;
610 extern uint32_t c_segment_count;
611 extern uint32_t c_segments_nearing_limit;
612
613 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
614
615 extern bool freezer_incore_cseg_acct;
616 #endif /* CONFIG_FREEZE */
617
618 static void
vm_swap_defragment()619 vm_swap_defragment()
620 {
621 c_segment_t c_seg;
622
623 /*
624 * have to grab the master lock w/o holding
625 * any locks in spin mode
626 */
627 PAGE_REPLACEMENT_DISALLOWED(TRUE);
628
629 lck_mtx_lock_spin_always(c_list_lock);
630
631 while (!queue_empty(&c_swappedout_sparse_list_head)) {
632 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
633 vm_swap_defragment_yielded++;
634 break;
635 }
636 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
637
638 lck_mtx_lock_spin_always(&c_seg->c_lock);
639
640 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
641
642 if (c_seg->c_busy) {
643 lck_mtx_unlock_always(c_list_lock);
644
645 PAGE_REPLACEMENT_DISALLOWED(FALSE);
646 /*
647 * c_seg_wait_on_busy consumes c_seg->c_lock
648 */
649 c_seg_wait_on_busy(c_seg);
650
651 PAGE_REPLACEMENT_DISALLOWED(TRUE);
652
653 lck_mtx_lock_spin_always(c_list_lock);
654
655 vm_swap_defragment_busy++;
656 continue;
657 }
658 if (c_seg->c_bytes_used == 0) {
659 /*
660 * c_seg_free_locked consumes the c_list_lock
661 * and c_seg->c_lock
662 */
663 C_SEG_BUSY(c_seg);
664 c_seg_free_locked(c_seg);
665
666 vm_swap_defragment_free++;
667 } else {
668 lck_mtx_unlock_always(c_list_lock);
669
670 #if CONFIG_FREEZE
671 if (freezer_incore_cseg_acct) {
672 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
673 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
674 }
675
676 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
677 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
678 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
679 }
680 }
681 #endif /* CONFIG_FREEZE */
682 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
683 lck_mtx_unlock_always(&c_seg->c_lock);
684 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
685 }
686
687 vm_swap_defragment_swapin++;
688 }
689 PAGE_REPLACEMENT_DISALLOWED(FALSE);
690
691 vm_pageout_io_throttle();
692
693 /*
694 * because write waiters have privilege over readers,
695 * dropping and immediately retaking the master lock will
696 * still allow any thread waiting to acquire the
697 * master lock exclusively an opportunity to take it
698 */
699 PAGE_REPLACEMENT_DISALLOWED(TRUE);
700
701 lck_mtx_lock_spin_always(c_list_lock);
702 }
703 lck_mtx_unlock_always(c_list_lock);
704
705 PAGE_REPLACEMENT_DISALLOWED(FALSE);
706 }
707
708
709 bool vm_swapfile_create_thread_inited = false;
710 static void
vm_swapfile_create_thread(void)711 vm_swapfile_create_thread(void)
712 {
713 clock_sec_t sec;
714 clock_nsec_t nsec;
715
716 if (!vm_swapfile_create_thread_inited) {
717 #if CONFIG_THREAD_GROUPS
718 thread_group_vm_add();
719 #endif /* CONFIG_THREAD_GROUPS */
720 current_thread()->options |= TH_OPT_VMPRIV;
721 vm_swapfile_create_thread_inited = true;
722 }
723
724 vm_swapfile_create_thread_awakened++;
725 vm_swapfile_create_thread_running = 1;
726
727 while (TRUE) {
728 /*
729 * walk through the list of swap files
730 * and do the delayed frees/trims for
731 * any swap file whose count of delayed
732 * frees is above the batch limit
733 */
734 vm_swap_handle_delayed_trims(FALSE);
735
736 lck_mtx_lock(&vm_swap_data_lock);
737
738 if (hibernate_in_progress_with_pinned_swap == TRUE) {
739 break;
740 }
741
742 if (compressor_store_stop_compaction == TRUE) {
743 break;
744 }
745
746 clock_get_system_nanotime(&sec, &nsec);
747
748 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
749 break;
750 }
751
752 lck_mtx_unlock(&vm_swap_data_lock);
753
754 if (vm_swap_create_file() == FALSE) {
755 vm_swapfile_last_failed_to_create_ts = sec;
756 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
757 } else {
758 vm_swapfile_last_successful_create_ts = sec;
759 }
760 }
761 vm_swapfile_create_thread_running = 0;
762
763 if (hibernate_in_progress_with_pinned_swap == TRUE) {
764 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
765 }
766
767 if (compressor_store_stop_compaction == TRUE) {
768 thread_wakeup((event_t)&compressor_store_stop_compaction);
769 }
770
771 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
772
773 lck_mtx_unlock(&vm_swap_data_lock);
774
775 thread_block((thread_continue_t)vm_swapfile_create_thread);
776
777 /* NOTREACHED */
778 }
779
780
781 #if HIBERNATION
782
783 kern_return_t
hibernate_pin_swap(boolean_t start)784 hibernate_pin_swap(boolean_t start)
785 {
786 vm_compaction_swapper_do_init();
787
788 if (start == FALSE) {
789 lck_mtx_lock(&vm_swap_data_lock);
790 hibernate_in_progress_with_pinned_swap = FALSE;
791 lck_mtx_unlock(&vm_swap_data_lock);
792
793 return KERN_SUCCESS;
794 }
795 if (vm_swappin_enabled == FALSE) {
796 return KERN_SUCCESS;
797 }
798
799 lck_mtx_lock(&vm_swap_data_lock);
800
801 hibernate_in_progress_with_pinned_swap = TRUE;
802
803 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
804 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
805
806 lck_mtx_unlock(&vm_swap_data_lock);
807
808 thread_block(THREAD_CONTINUE_NULL);
809
810 lck_mtx_lock(&vm_swap_data_lock);
811 }
812 if (vm_num_swap_files > vm_num_pinned_swap_files) {
813 hibernate_in_progress_with_pinned_swap = FALSE;
814 lck_mtx_unlock(&vm_swap_data_lock);
815
816 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
817 vm_num_swap_files, vm_num_pinned_swap_files);
818 return KERN_FAILURE;
819 }
820 lck_mtx_unlock(&vm_swap_data_lock);
821
822 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
823 if (vm_swap_create_file() == FALSE) {
824 break;
825 }
826 }
827 return KERN_SUCCESS;
828 }
829 #endif
830 bool vm_swapfile_gc_thread_inited = false;
831 static void
vm_swapfile_gc_thread(void)832 vm_swapfile_gc_thread(void)
833 {
834 boolean_t need_defragment;
835 boolean_t need_reclaim;
836
837 if (!vm_swapfile_gc_thread_inited) {
838 #if CONFIG_THREAD_GROUPS
839 thread_group_vm_add();
840 #endif /* CONFIG_THREAD_GROUPS */
841 vm_swapfile_gc_thread_inited = true;
842 }
843
844 vm_swapfile_gc_thread_awakened++;
845 vm_swapfile_gc_thread_running = 1;
846
847 while (TRUE) {
848 lck_mtx_lock(&vm_swap_data_lock);
849
850 if (hibernate_in_progress_with_pinned_swap == TRUE) {
851 break;
852 }
853
854 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
855 break;
856 }
857
858 need_defragment = FALSE;
859 need_reclaim = FALSE;
860
861 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
862 need_defragment = TRUE;
863 }
864
865 if (VM_SWAP_SHOULD_RECLAIM()) {
866 need_defragment = TRUE;
867 need_reclaim = TRUE;
868 }
869 if (need_defragment == FALSE && need_reclaim == FALSE) {
870 break;
871 }
872
873 vm_swap_force_defrag = FALSE;
874 vm_swap_force_reclaim = FALSE;
875
876 lck_mtx_unlock(&vm_swap_data_lock);
877
878 if (need_defragment == TRUE) {
879 vm_swap_defragment();
880 }
881 if (need_reclaim == TRUE) {
882 vm_swap_reclaim();
883 }
884 }
885 vm_swapfile_gc_thread_running = 0;
886
887 if (hibernate_in_progress_with_pinned_swap == TRUE) {
888 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
889 }
890
891 if (compressor_store_stop_compaction == TRUE) {
892 thread_wakeup((event_t)&compressor_store_stop_compaction);
893 }
894
895 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
896
897 lck_mtx_unlock(&vm_swap_data_lock);
898
899 thread_block((thread_continue_t)vm_swapfile_gc_thread);
900
901 /* NOTREACHED */
902 }
903
904
905
906 #define VM_SWAPOUT_LIMIT_T2P 4
907 #define VM_SWAPOUT_LIMIT_T1P 4
908 #define VM_SWAPOUT_LIMIT_T0P 6
909 #define VM_SWAPOUT_LIMIT_T0 8
910 #define VM_SWAPOUT_LIMIT_MAX 8
911
912 #define VM_SWAPOUT_START 0
913 #define VM_SWAPOUT_T2_PASSIVE 1
914 #define VM_SWAPOUT_T1_PASSIVE 2
915 #define VM_SWAPOUT_T0_PASSIVE 3
916 #define VM_SWAPOUT_T0 4
917
918 int vm_swapout_state = VM_SWAPOUT_START;
919 int vm_swapout_limit = 1;
920
921 int vm_swapper_entered_T0 = 0;
922 int vm_swapper_entered_T0P = 0;
923 int vm_swapper_entered_T1P = 0;
924 int vm_swapper_entered_T2P = 0;
925
926
927 static void
vm_swapout_thread_throttle_adjust(void)928 vm_swapout_thread_throttle_adjust(void)
929 {
930 switch (vm_swapout_state) {
931 case VM_SWAPOUT_START:
932
933 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
934 vm_swapper_entered_T2P++;
935
936 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
937 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
938 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
939 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
940 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
941 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
942
943 break;
944
945 case VM_SWAPOUT_T2_PASSIVE:
946
947 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
948 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
949 vm_swapper_entered_T0P++;
950
951 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
952 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
953 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
954 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
955 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
956 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
957
958 break;
959 }
960 if (swapout_target_age || hibernate_flushing == TRUE) {
961 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
962 vm_swapper_entered_T1P++;
963
964 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
965 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
966 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
967 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
968 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
969 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
970 }
971 break;
972
973 case VM_SWAPOUT_T1_PASSIVE:
974
975 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
976 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
977 vm_swapper_entered_T0P++;
978
979 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
980 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
981 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
982 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
983 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
984 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
985
986 break;
987 }
988 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
989 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
990 vm_swapper_entered_T2P++;
991
992 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
993 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
994 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
995 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
996 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
997 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
998 }
999 break;
1000
1001 case VM_SWAPOUT_T0_PASSIVE:
1002
1003 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1004 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1005 vm_swapper_entered_T2P++;
1006
1007 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1008 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1009 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1010 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1011 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1012 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1013
1014 break;
1015 }
1016 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1017 vm_swapper_entered_T0++;
1018
1019 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1020 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1021 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1022 vm_swapout_state = VM_SWAPOUT_T0;
1023 }
1024 break;
1025
1026 case VM_SWAPOUT_T0:
1027
1028 if (SWAPPER_HAS_CAUGHTUP()) {
1029 vm_swapper_entered_T0P++;
1030
1031 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1032 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1033 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1034 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1035 }
1036 break;
1037 }
1038 }
1039
1040 int vm_swapout_found_empty = 0;
1041
1042 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1043
1044 int vm_swapout_soc_busy = 0;
1045 int vm_swapout_soc_done = 0;
1046
1047
1048 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1049 vm_swapout_find_free_soc(void)
1050 {
1051 int i;
1052
1053 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1054 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1055 return &vm_swapout_ctx[i];
1056 }
1057 }
1058 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1059
1060 return NULL;
1061 }
1062
1063 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1064 vm_swapout_find_done_soc(void)
1065 {
1066 int i;
1067
1068 if (vm_swapout_soc_done) {
1069 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1070 if (vm_swapout_ctx[i].swp_io_done) {
1071 return &vm_swapout_ctx[i];
1072 }
1073 }
1074 }
1075 return NULL;
1076 }
1077
1078 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1079 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1080 {
1081 kern_return_t kr;
1082
1083 if (soc->swp_io_error) {
1084 kr = KERN_FAILURE;
1085 } else {
1086 kr = KERN_SUCCESS;
1087 }
1088
1089 lck_mtx_unlock_always(c_list_lock);
1090
1091 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1092 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1093
1094 lck_mtx_lock_spin_always(c_list_lock);
1095
1096 soc->swp_io_done = 0;
1097 soc->swp_io_busy = 0;
1098
1099 vm_swapout_soc_busy--;
1100 vm_swapout_soc_done--;
1101 }
1102
1103 bool vm_swapout_thread_inited = false;
1104 static void
vm_swapout_thread(void)1105 vm_swapout_thread(void)
1106 {
1107 uint32_t size = 0;
1108 c_segment_t c_seg = NULL;
1109 kern_return_t kr = KERN_SUCCESS;
1110 struct swapout_io_completion *soc;
1111
1112 if (!vm_swapout_thread_inited) {
1113 #if CONFIG_THREAD_GROUPS
1114 thread_group_vm_add();
1115 #endif /* CONFIG_THREAD_GROUPS */
1116 current_thread()->options |= TH_OPT_VMPRIV;
1117 vm_swapout_thread_inited = true;
1118 }
1119
1120 vm_swapout_thread_awakened++;
1121
1122 lck_mtx_lock_spin_always(c_list_lock);
1123
1124 vm_swapout_thread_running = TRUE;
1125 again:
1126 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1127 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1128
1129 lck_mtx_lock_spin_always(&c_seg->c_lock);
1130
1131 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1132
1133 if (c_seg->c_busy) {
1134 lck_mtx_unlock_always(c_list_lock);
1135
1136 c_seg_wait_on_busy(c_seg);
1137
1138 lck_mtx_lock_spin_always(c_list_lock);
1139
1140 continue;
1141 }
1142 vm_swapout_thread_processed_segments++;
1143
1144 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1145
1146 if (size == 0) {
1147 assert(c_seg->c_bytes_used == 0);
1148
1149 if (!c_seg->c_on_minorcompact_q) {
1150 c_seg_need_delayed_compaction(c_seg, TRUE);
1151 }
1152
1153 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1154 lck_mtx_unlock_always(&c_seg->c_lock);
1155 lck_mtx_unlock_always(c_list_lock);
1156
1157 vm_swapout_found_empty++;
1158 goto c_seg_is_empty;
1159 }
1160 C_SEG_BUSY(c_seg);
1161 c_seg->c_busy_swapping = 1;
1162
1163 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1164
1165 lck_mtx_unlock_always(c_list_lock);
1166 lck_mtx_unlock_always(&c_seg->c_lock);
1167
1168 #if CHECKSUM_THE_SWAP
1169 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1170 c_seg->cseg_swap_size = size;
1171 #endif /* CHECKSUM_THE_SWAP */
1172
1173 #if ENCRYPTED_SWAP
1174 vm_swap_encrypt(c_seg);
1175 #endif /* ENCRYPTED_SWAP */
1176
1177 soc = vm_swapout_find_free_soc();
1178 assert(soc);
1179
1180 soc->swp_upl_ctx.io_context = (void *)soc;
1181 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1182 soc->swp_upl_ctx.io_error = 0;
1183
1184 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1185
1186 if (kr != KERN_SUCCESS) {
1187 if (soc->swp_io_done) {
1188 lck_mtx_lock_spin_always(c_list_lock);
1189
1190 soc->swp_io_done = 0;
1191 vm_swapout_soc_done--;
1192
1193 lck_mtx_unlock_always(c_list_lock);
1194 }
1195 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1196 } else {
1197 soc->swp_io_busy = 1;
1198 vm_swapout_soc_busy++;
1199 }
1200
1201 c_seg_is_empty:
1202 if (c_swapout_count == 0) {
1203 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1204 }
1205
1206 lck_mtx_lock_spin_always(c_list_lock);
1207
1208 while ((soc = vm_swapout_find_done_soc())) {
1209 vm_swapout_complete_soc(soc);
1210 }
1211 lck_mtx_unlock_always(c_list_lock);
1212
1213 vm_swapout_thread_throttle_adjust();
1214
1215 lck_mtx_lock_spin_always(c_list_lock);
1216 }
1217 while ((soc = vm_swapout_find_done_soc())) {
1218 vm_swapout_complete_soc(soc);
1219 }
1220 lck_mtx_unlock_always(c_list_lock);
1221
1222 vm_pageout_io_throttle();
1223
1224 lck_mtx_lock_spin_always(c_list_lock);
1225
1226 /*
1227 * Recheck if we have some c_segs to wakeup
1228 * post throttle. And, check to see if we
1229 * have any more swapouts needed.
1230 */
1231 if (vm_swapout_soc_done) {
1232 goto again;
1233 }
1234
1235 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1236
1237 vm_swapout_thread_running = FALSE;
1238
1239 lck_mtx_unlock_always(c_list_lock);
1240
1241 thread_block((thread_continue_t)vm_swapout_thread);
1242
1243 /* NOTREACHED */
1244 }
1245
1246
1247 void
vm_swapout_iodone(void * io_context,int error)1248 vm_swapout_iodone(void *io_context, int error)
1249 {
1250 struct swapout_io_completion *soc;
1251
1252 soc = (struct swapout_io_completion *)io_context;
1253
1254 lck_mtx_lock_spin_always(c_list_lock);
1255
1256 soc->swp_io_done = 1;
1257 soc->swp_io_error = error;
1258 vm_swapout_soc_done++;
1259
1260 if (!vm_swapout_thread_running) {
1261 thread_wakeup((event_t)&c_swapout_list_head);
1262 }
1263
1264 lck_mtx_unlock_always(c_list_lock);
1265 }
1266
1267
1268 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1269 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1270 {
1271 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1272
1273 if (kr == KERN_SUCCESS) {
1274 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size,
1275 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1276 }
1277 #if ENCRYPTED_SWAP
1278 else {
1279 vm_swap_decrypt(c_seg);
1280 }
1281 #endif /* ENCRYPTED_SWAP */
1282 lck_mtx_lock_spin_always(c_list_lock);
1283 lck_mtx_lock_spin_always(&c_seg->c_lock);
1284
1285 if (kr == KERN_SUCCESS) {
1286 int new_state = C_ON_SWAPPEDOUT_Q;
1287 boolean_t insert_head = FALSE;
1288
1289 if (hibernate_flushing == TRUE) {
1290 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1291 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1292 insert_head = TRUE;
1293 }
1294 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1295 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1296 }
1297
1298 c_seg_switch_state(c_seg, new_state, insert_head);
1299
1300 c_seg->c_store.c_swap_handle = f_offset;
1301
1302 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1303
1304 c_seg->c_swappedin = false;
1305
1306 if (c_seg->c_bytes_used) {
1307 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1308 }
1309
1310 #if CONFIG_FREEZE
1311 /*
1312 * Successful swapout. Decrement the in-core compressed pages count.
1313 */
1314 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1315 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1316 #endif /* CONFIG_FREEZE */
1317 } else {
1318 if (c_seg->c_overage_swap == TRUE) {
1319 c_seg->c_overage_swap = FALSE;
1320 c_overage_swapped_count--;
1321 }
1322
1323 #if CONFIG_FREEZE
1324 if (c_seg->c_task_owner) {
1325 c_seg_update_task_owner(c_seg, NULL);
1326 }
1327 #endif /* CONFIG_FREEZE */
1328
1329 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1330
1331 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1332 c_seg_need_delayed_compaction(c_seg, TRUE);
1333 }
1334 }
1335 assert(c_seg->c_busy_swapping);
1336 assert(c_seg->c_busy);
1337
1338 c_seg->c_busy_swapping = 0;
1339 lck_mtx_unlock_always(c_list_lock);
1340
1341 C_SEG_WAKEUP_DONE(c_seg);
1342 lck_mtx_unlock_always(&c_seg->c_lock);
1343
1344 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1345 }
1346
1347
1348 boolean_t
vm_swap_create_file()1349 vm_swap_create_file()
1350 {
1351 uint64_t size = 0;
1352 int namelen = 0;
1353 boolean_t swap_file_created = FALSE;
1354 boolean_t swap_file_reuse = FALSE;
1355 boolean_t swap_file_pin = FALSE;
1356 struct swapfile *swf = NULL;
1357
1358 /*
1359 * make sure we've got all the info we need
1360 * to potentially pin a swap file... we could
1361 * be swapping out due to hibernation w/o ever
1362 * having run vm_pageout_scan, which is normally
1363 * the trigger to do the init
1364 */
1365 vm_compaction_swapper_do_init();
1366
1367 /*
1368 * Any swapfile structure ready for re-use?
1369 */
1370
1371 lck_mtx_lock(&vm_swap_data_lock);
1372
1373 swf = (struct swapfile*) queue_first(&swf_global_queue);
1374
1375 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1376 if (swf->swp_flags == SWAP_REUSE) {
1377 swap_file_reuse = TRUE;
1378 break;
1379 }
1380 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1381 }
1382
1383 lck_mtx_unlock(&vm_swap_data_lock);
1384
1385 if (swap_file_reuse == FALSE) {
1386 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1387
1388 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1389 swf->swp_index = vm_num_swap_files + 1;
1390 swf->swp_pathlen = namelen;
1391 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1392
1393 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1394 }
1395
1396 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1397
1398 if (swf->swp_vp == NULL) {
1399 if (swap_file_reuse == FALSE) {
1400 kfree_data(swf->swp_path, swf->swp_pathlen);
1401 kfree_type(struct swapfile, swf);
1402 }
1403 return FALSE;
1404 }
1405 vm_swapfile_can_be_created = TRUE;
1406
1407 size = MAX_SWAP_FILE_SIZE;
1408
1409 while (size >= MIN_SWAP_FILE_SIZE) {
1410 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1411
1412 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1413 int num_bytes_for_bitmap = 0;
1414
1415 swap_file_created = TRUE;
1416
1417 swf->swp_size = size;
1418 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1419 swf->swp_nseginuse = 0;
1420 swf->swp_free_hint = 0;
1421
1422 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1423 /*
1424 * Allocate a bitmap that describes the
1425 * number of segments held by this swapfile.
1426 */
1427 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1428 Z_WAITOK | Z_ZERO);
1429
1430 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1431 Z_WAITOK | Z_ZERO);
1432
1433 /*
1434 * passing a NULL trim_list into vnode_trim_list
1435 * will return ENOTSUP if trim isn't supported
1436 * and 0 if it is
1437 */
1438 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1439 swp_trim_supported = TRUE;
1440 }
1441
1442 lck_mtx_lock(&vm_swap_data_lock);
1443
1444 swf->swp_flags = SWAP_READY;
1445
1446 if (swap_file_reuse == FALSE) {
1447 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1448 }
1449
1450 vm_num_swap_files++;
1451
1452 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1453 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1454 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1455 }
1456
1457 if (swap_file_pin == TRUE) {
1458 vm_num_pinned_swap_files++;
1459 swf->swp_flags |= SWAP_PINNED;
1460 vm_swappin_avail -= swf->swp_size;
1461 }
1462
1463 lck_mtx_unlock(&vm_swap_data_lock);
1464
1465 thread_wakeup((event_t) &vm_num_swap_files);
1466 #if !XNU_TARGET_OS_OSX
1467 if (vm_num_swap_files == 1) {
1468 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1469
1470 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1471 c_overage_swapped_limit /= 2;
1472 }
1473 }
1474 #endif /* !XNU_TARGET_OS_OSX */
1475 break;
1476 } else {
1477 size = size / 2;
1478 }
1479 }
1480 if (swap_file_created == FALSE) {
1481 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1482
1483 swf->swp_vp = NULL;
1484
1485 if (swap_file_reuse == FALSE) {
1486 kfree_data(swf->swp_path, swf->swp_pathlen);
1487 kfree_type(struct swapfile, swf);
1488 }
1489 }
1490 return swap_file_created;
1491 }
1492
1493 extern void vnode_put(struct vnode* vp);
1494 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1495 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1496 {
1497 struct swapfile *swf = NULL;
1498 uint64_t file_offset = 0;
1499 int retval = 0;
1500
1501 assert(c_seg->c_store.c_buffer);
1502
1503 lck_mtx_lock(&vm_swap_data_lock);
1504
1505 swf = vm_swapfile_for_handle(f_offset);
1506
1507 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1508 vm_swap_get_failures++;
1509 retval = 1;
1510 goto done;
1511 }
1512 swf->swp_io_count++;
1513
1514 lck_mtx_unlock(&vm_swap_data_lock);
1515
1516 #if DEVELOPMENT || DEBUG
1517 C_SEG_MAKE_WRITEABLE(c_seg);
1518 #endif
1519 file_offset = (f_offset & SWAP_SLOT_MASK);
1520
1521 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1522 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1523 } else {
1524 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1525 vnode_put(swf->swp_vp);
1526 }
1527
1528 #if DEVELOPMENT || DEBUG
1529 C_SEG_WRITE_PROTECT(c_seg);
1530 #endif
1531 if (retval == 0) {
1532 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1533 } else {
1534 vm_swap_get_failures++;
1535 }
1536
1537 /*
1538 * Free this slot in the swap structure.
1539 */
1540 vm_swap_free(f_offset);
1541
1542 lck_mtx_lock(&vm_swap_data_lock);
1543 swf->swp_io_count--;
1544
1545 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1546 swf->swp_flags &= ~SWAP_WANTED;
1547 thread_wakeup((event_t) &swf->swp_flags);
1548 }
1549 done:
1550 lck_mtx_unlock(&vm_swap_data_lock);
1551
1552 if (retval == 0) {
1553 return KERN_SUCCESS;
1554 } else {
1555 return KERN_FAILURE;
1556 }
1557 }
1558
1559 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1560 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1561 {
1562 unsigned int segidx = 0;
1563 struct swapfile *swf = NULL;
1564 uint64_t file_offset = 0;
1565 uint64_t swapfile_index = 0;
1566 unsigned int byte_for_segidx = 0;
1567 unsigned int offset_within_byte = 0;
1568 boolean_t swf_eligible = FALSE;
1569 boolean_t waiting = FALSE;
1570 boolean_t retried = FALSE;
1571 int error = 0;
1572 clock_sec_t sec;
1573 clock_nsec_t nsec;
1574 void *upl_ctx = NULL;
1575 boolean_t drop_iocount = FALSE;
1576
1577 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1578 return KERN_FAILURE;
1579 }
1580 retry:
1581 lck_mtx_lock(&vm_swap_data_lock);
1582
1583 swf = (struct swapfile*) queue_first(&swf_global_queue);
1584
1585 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1586 segidx = swf->swp_free_hint;
1587
1588 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1589
1590 if (swf_eligible) {
1591 while (segidx < swf->swp_nsegs) {
1592 byte_for_segidx = segidx >> 3;
1593 offset_within_byte = segidx % 8;
1594
1595 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1596 segidx++;
1597 continue;
1598 }
1599
1600 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1601
1602 file_offset = segidx * compressed_swap_chunk_size;
1603 swf->swp_nseginuse++;
1604 swf->swp_io_count++;
1605 swf->swp_csegs[segidx] = c_seg;
1606
1607 swapfile_index = swf->swp_index;
1608 vm_swapfile_total_segs_used++;
1609 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1610 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1611 }
1612
1613 clock_get_system_nanotime(&sec, &nsec);
1614
1615 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1616 thread_wakeup((event_t) &vm_swapfile_create_needed);
1617 }
1618
1619 lck_mtx_unlock(&vm_swap_data_lock);
1620
1621 goto issue_io;
1622 }
1623 }
1624 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1625 }
1626 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1627
1628 /*
1629 * we've run out of swap segments, but may not
1630 * be in a position to immediately create a new swap
1631 * file if we've recently failed to create due to a lack
1632 * of free space in the root filesystem... we'll try
1633 * to kick that create off, but in any event we're going
1634 * to take a breather (up to 1 second) so that we're not caught in a tight
1635 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1636 * segments into swap files only to have them immediately put back
1637 * on the c_age queue due to vm_swap_put failing.
1638 *
1639 * if we're doing these puts due to a hibernation flush,
1640 * no need to block... setting hibernate_no_swapspace to TRUE,
1641 * will cause "vm_compressor_compact_and_swap" to immediately abort
1642 */
1643 clock_get_system_nanotime(&sec, &nsec);
1644
1645 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1646 thread_wakeup((event_t) &vm_swapfile_create_needed);
1647 }
1648
1649 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1650 waiting = TRUE;
1651 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1652 } else {
1653 hibernate_no_swapspace = TRUE;
1654 }
1655
1656 lck_mtx_unlock(&vm_swap_data_lock);
1657
1658 if (waiting == TRUE) {
1659 thread_block(THREAD_CONTINUE_NULL);
1660
1661 if (retried == FALSE && hibernate_flushing == TRUE) {
1662 retried = TRUE;
1663 goto retry;
1664 }
1665 }
1666 vm_swap_put_failures_no_swap_file++;
1667
1668 return KERN_FAILURE;
1669
1670 issue_io:
1671 assert(c_seg->c_busy_swapping);
1672 assert(c_seg->c_busy);
1673 assert(!c_seg->c_on_minorcompact_q);
1674
1675 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1676
1677 if (soc) {
1678 soc->swp_c_seg = c_seg;
1679 soc->swp_c_size = size;
1680
1681 soc->swp_swf = swf;
1682
1683 soc->swp_io_error = 0;
1684 soc->swp_io_done = 0;
1685
1686 upl_ctx = (void *)&soc->swp_upl_ctx;
1687 }
1688
1689 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1690 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1691 } else {
1692 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1693 drop_iocount = TRUE;
1694 }
1695
1696 if (error || upl_ctx == NULL) {
1697 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1698 }
1699
1700 return KERN_SUCCESS;
1701 }
1702
1703 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1704 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1705 {
1706 if (drop_iocount) {
1707 vnode_put(swf->swp_vp);
1708 }
1709
1710 lck_mtx_lock(&vm_swap_data_lock);
1711
1712 swf->swp_io_count--;
1713
1714 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1715 swf->swp_flags &= ~SWAP_WANTED;
1716 thread_wakeup((event_t) &swf->swp_flags);
1717 }
1718 lck_mtx_unlock(&vm_swap_data_lock);
1719
1720 if (error) {
1721 vm_swap_free(*f_offset);
1722 vm_swap_put_failures++;
1723
1724 return KERN_FAILURE;
1725 }
1726 return KERN_SUCCESS;
1727 }
1728
1729
1730 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1731 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1732 {
1733 uint64_t file_offset = 0;
1734 unsigned int segidx = 0;
1735
1736
1737 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1738 unsigned int byte_for_segidx = 0;
1739 unsigned int offset_within_byte = 0;
1740
1741 file_offset = (f_offset & SWAP_SLOT_MASK);
1742 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1743
1744 byte_for_segidx = segidx >> 3;
1745 offset_within_byte = segidx % 8;
1746
1747 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1748 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1749
1750 swf->swp_csegs[segidx] = NULL;
1751
1752 swf->swp_nseginuse--;
1753 vm_swapfile_total_segs_used--;
1754
1755 if (segidx < swf->swp_free_hint) {
1756 swf->swp_free_hint = segidx;
1757 }
1758 }
1759 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1760 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1761 }
1762 }
1763 }
1764
1765
1766 uint32_t vm_swap_free_now_count = 0;
1767 uint32_t vm_swap_free_delayed_count = 0;
1768
1769
1770 void
vm_swap_free(uint64_t f_offset)1771 vm_swap_free(uint64_t f_offset)
1772 {
1773 struct swapfile *swf = NULL;
1774 struct trim_list *tl = NULL;
1775 clock_sec_t sec;
1776 clock_nsec_t nsec;
1777
1778 if (swp_trim_supported == TRUE) {
1779 tl = kalloc_type(struct trim_list, Z_WAITOK);
1780 }
1781
1782 lck_mtx_lock(&vm_swap_data_lock);
1783
1784 swf = vm_swapfile_for_handle(f_offset);
1785
1786 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1787 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1788 /*
1789 * don't delay the free if the underlying disk doesn't support
1790 * trim, or we're in the midst of reclaiming this swap file since
1791 * we don't want to move segments that are technically free
1792 * but not yet handled by the delayed free mechanism
1793 */
1794 vm_swap_free_now(swf, f_offset);
1795
1796 vm_swap_free_now_count++;
1797 goto done;
1798 }
1799 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1800 tl->tl_length = compressed_swap_chunk_size;
1801
1802 tl->tl_next = swf->swp_delayed_trim_list_head;
1803 swf->swp_delayed_trim_list_head = tl;
1804 swf->swp_delayed_trim_count++;
1805 tl = NULL;
1806
1807 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1808 clock_get_system_nanotime(&sec, &nsec);
1809
1810 if (sec > dont_trim_until_ts) {
1811 thread_wakeup((event_t) &vm_swapfile_create_needed);
1812 }
1813 }
1814 vm_swap_free_delayed_count++;
1815 }
1816 done:
1817 lck_mtx_unlock(&vm_swap_data_lock);
1818
1819 if (tl != NULL) {
1820 kfree_type(struct trim_list, tl);
1821 }
1822 }
1823
1824
1825 static void
vm_swap_wait_on_trim_handling_in_progress()1826 vm_swap_wait_on_trim_handling_in_progress()
1827 {
1828 while (delayed_trim_handling_in_progress == TRUE) {
1829 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1830 lck_mtx_unlock(&vm_swap_data_lock);
1831
1832 thread_block(THREAD_CONTINUE_NULL);
1833
1834 lck_mtx_lock(&vm_swap_data_lock);
1835 }
1836 }
1837
1838
1839 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1840 vm_swap_handle_delayed_trims(boolean_t force_now)
1841 {
1842 struct swapfile *swf = NULL;
1843
1844 /*
1845 * serialize the race between us and vm_swap_reclaim...
1846 * if vm_swap_reclaim wins it will turn off SWAP_READY
1847 * on the victim it has chosen... we can just skip over
1848 * that file since vm_swap_reclaim will first process
1849 * all of the delayed trims associated with it
1850 */
1851
1852 if (compressor_store_stop_compaction == TRUE) {
1853 return;
1854 }
1855
1856 lck_mtx_lock(&vm_swap_data_lock);
1857
1858 delayed_trim_handling_in_progress = TRUE;
1859
1860 lck_mtx_unlock(&vm_swap_data_lock);
1861
1862 /*
1863 * no need to hold the lock to walk the swf list since
1864 * vm_swap_create (the only place where we add to this list)
1865 * is run on the same thread as this function
1866 * and vm_swap_reclaim doesn't remove items from this list
1867 * instead marking them with SWAP_REUSE for future re-use
1868 */
1869 swf = (struct swapfile*) queue_first(&swf_global_queue);
1870
1871 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1872 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1873 assert(!(swf->swp_flags & SWAP_RECLAIM));
1874 vm_swap_do_delayed_trim(swf);
1875 }
1876 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1877 }
1878 lck_mtx_lock(&vm_swap_data_lock);
1879
1880 delayed_trim_handling_in_progress = FALSE;
1881 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1882
1883 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1884 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1885 }
1886
1887 lck_mtx_unlock(&vm_swap_data_lock);
1888 }
1889
1890 static void
vm_swap_do_delayed_trim(struct swapfile * swf)1891 vm_swap_do_delayed_trim(struct swapfile *swf)
1892 {
1893 struct trim_list *tl, *tl_head;
1894 int error;
1895
1896 if (compressor_store_stop_compaction == TRUE) {
1897 return;
1898 }
1899
1900 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1901 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1902 return;
1903 }
1904
1905 lck_mtx_lock(&vm_swap_data_lock);
1906
1907 tl_head = swf->swp_delayed_trim_list_head;
1908 swf->swp_delayed_trim_list_head = NULL;
1909 swf->swp_delayed_trim_count = 0;
1910
1911 lck_mtx_unlock(&vm_swap_data_lock);
1912
1913 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1914
1915 (void) vnode_put(swf->swp_vp);
1916
1917 while ((tl = tl_head) != NULL) {
1918 unsigned int segidx = 0;
1919 unsigned int byte_for_segidx = 0;
1920 unsigned int offset_within_byte = 0;
1921
1922 lck_mtx_lock(&vm_swap_data_lock);
1923
1924 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
1925
1926 byte_for_segidx = segidx >> 3;
1927 offset_within_byte = segidx % 8;
1928
1929 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1930 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1931
1932 swf->swp_csegs[segidx] = NULL;
1933
1934 swf->swp_nseginuse--;
1935 vm_swapfile_total_segs_used--;
1936
1937 if (segidx < swf->swp_free_hint) {
1938 swf->swp_free_hint = segidx;
1939 }
1940 }
1941 lck_mtx_unlock(&vm_swap_data_lock);
1942
1943 tl_head = tl->tl_next;
1944
1945 kfree_type(struct trim_list, tl);
1946 }
1947 }
1948
1949
1950 void
vm_swap_flush()1951 vm_swap_flush()
1952 {
1953 return;
1954 }
1955
1956 int vm_swap_reclaim_yielded = 0;
1957
1958 void
vm_swap_reclaim(void)1959 vm_swap_reclaim(void)
1960 {
1961 vm_offset_t addr = 0;
1962 unsigned int segidx = 0;
1963 uint64_t f_offset = 0;
1964 struct swapfile *swf = NULL;
1965 struct swapfile *smallest_swf = NULL;
1966 unsigned int min_nsegs = 0;
1967 unsigned int byte_for_segidx = 0;
1968 unsigned int offset_within_byte = 0;
1969 uint32_t c_size = 0;
1970
1971 c_segment_t c_seg = NULL;
1972
1973 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), c_seg_bufsize, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1974 panic("vm_swap_reclaim: kernel_memory_allocate failed");
1975 }
1976
1977 lck_mtx_lock(&vm_swap_data_lock);
1978
1979 /*
1980 * if we're running the swapfile list looking for
1981 * candidates with delayed trims, we need to
1982 * wait before making our decision concerning
1983 * the swapfile we want to reclaim
1984 */
1985 vm_swap_wait_on_trim_handling_in_progress();
1986
1987 /*
1988 * from here until we knock down the SWAP_READY bit,
1989 * we need to remain behind the vm_swap_data_lock...
1990 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1991 * will not consider this swapfile for processing
1992 */
1993 swf = (struct swapfile*) queue_first(&swf_global_queue);
1994 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
1995 smallest_swf = NULL;
1996
1997 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1998 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1999 smallest_swf = swf;
2000 min_nsegs = swf->swp_nseginuse;
2001 }
2002 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2003 }
2004
2005 if (smallest_swf == NULL) {
2006 goto done;
2007 }
2008
2009 swf = smallest_swf;
2010
2011
2012 swf->swp_flags &= ~SWAP_READY;
2013 swf->swp_flags |= SWAP_RECLAIM;
2014
2015 if (swf->swp_delayed_trim_count) {
2016 lck_mtx_unlock(&vm_swap_data_lock);
2017
2018 vm_swap_do_delayed_trim(swf);
2019
2020 lck_mtx_lock(&vm_swap_data_lock);
2021 }
2022 segidx = 0;
2023
2024 while (segidx < swf->swp_nsegs) {
2025 ReTry_for_cseg:
2026 /*
2027 * Wait for outgoing I/Os.
2028 */
2029 while (swf->swp_io_count) {
2030 swf->swp_flags |= SWAP_WANTED;
2031
2032 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2033 lck_mtx_unlock(&vm_swap_data_lock);
2034
2035 thread_block(THREAD_CONTINUE_NULL);
2036
2037 lck_mtx_lock(&vm_swap_data_lock);
2038 }
2039 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2040 vm_swap_reclaim_yielded++;
2041 break;
2042 }
2043
2044 byte_for_segidx = segidx >> 3;
2045 offset_within_byte = segidx % 8;
2046
2047 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2048 segidx++;
2049 continue;
2050 }
2051
2052 c_seg = swf->swp_csegs[segidx];
2053 assert(c_seg);
2054
2055 lck_mtx_lock_spin_always(&c_seg->c_lock);
2056
2057 if (c_seg->c_busy) {
2058 /*
2059 * a swapped out c_segment in the process of being freed will remain in the
2060 * busy state until after the vm_swap_free is called on it... vm_swap_free
2061 * takes the vm_swap_data_lock, so can't change the swap state until after
2062 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2063 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2064 * at that point, we re-look up the swap state which will now indicate that
2065 * this c_segment no longer exists.
2066 */
2067 c_seg->c_wanted = 1;
2068
2069 assert_wait((event_t) (c_seg), THREAD_UNINT);
2070 lck_mtx_unlock_always(&c_seg->c_lock);
2071
2072 lck_mtx_unlock(&vm_swap_data_lock);
2073
2074 thread_block(THREAD_CONTINUE_NULL);
2075
2076 lck_mtx_lock(&vm_swap_data_lock);
2077
2078 goto ReTry_for_cseg;
2079 }
2080 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2081
2082 f_offset = segidx * compressed_swap_chunk_size;
2083
2084 assert(c_seg == swf->swp_csegs[segidx]);
2085 swf->swp_csegs[segidx] = NULL;
2086 swf->swp_nseginuse--;
2087
2088 vm_swapfile_total_segs_used--;
2089
2090 lck_mtx_unlock(&vm_swap_data_lock);
2091
2092 assert(C_SEG_IS_ONDISK(c_seg));
2093
2094 C_SEG_BUSY(c_seg);
2095 c_seg->c_busy_swapping = 1;
2096 #if !CHECKSUM_THE_SWAP
2097 c_seg_trim_tail(c_seg);
2098 #endif
2099 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2100
2101 assert(c_size <= c_seg_bufsize && c_size);
2102
2103 lck_mtx_unlock_always(&c_seg->c_lock);
2104
2105 if (vnode_getwithref(swf->swp_vp)) {
2106 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2107 vm_swap_get_failures++;
2108 goto swap_io_failed;
2109 } else {
2110 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2111 /*
2112 * reading the data back in failed, so convert c_seg
2113 * to a swapped in c_segment that contains no data
2114 */
2115 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2116 /*
2117 * returns with c_busy_swapping cleared
2118 */
2119 vnode_put(swf->swp_vp);
2120 vm_swap_get_failures++;
2121 goto swap_io_failed;
2122 }
2123 vnode_put(swf->swp_vp);
2124 }
2125
2126 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2127 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2128
2129 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2130 vm_offset_t c_buffer;
2131
2132 /*
2133 * the put failed, so convert c_seg to a fully swapped in c_segment
2134 * with valid data
2135 */
2136 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2137
2138 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
2139
2140 memcpy((char *)c_buffer, (char *)addr, c_size);
2141
2142 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2143 #if ENCRYPTED_SWAP
2144 vm_swap_decrypt(c_seg);
2145 #endif /* ENCRYPTED_SWAP */
2146 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2147 /*
2148 * returns with c_busy_swapping cleared
2149 */
2150 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2151
2152 goto swap_io_failed;
2153 }
2154 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2155
2156 lck_mtx_lock_spin_always(&c_seg->c_lock);
2157
2158 c_seg->c_swappedin = false;
2159
2160 assert(C_SEG_IS_ONDISK(c_seg));
2161 /*
2162 * The c_seg will now know about the new location on disk.
2163 */
2164 c_seg->c_store.c_swap_handle = f_offset;
2165
2166 assert(c_seg->c_busy_swapping);
2167 c_seg->c_busy_swapping = 0;
2168 swap_io_failed:
2169 assert(c_seg->c_busy);
2170 C_SEG_WAKEUP_DONE(c_seg);
2171
2172 lck_mtx_unlock_always(&c_seg->c_lock);
2173 lck_mtx_lock(&vm_swap_data_lock);
2174 }
2175
2176 if (swf->swp_nseginuse) {
2177 swf->swp_flags &= ~SWAP_RECLAIM;
2178 swf->swp_flags |= SWAP_READY;
2179
2180 goto done;
2181 }
2182 /*
2183 * We don't remove this inactive swf from the queue.
2184 * That way, we can re-use it when needed again and
2185 * preserve the namespace. The delayed_trim processing
2186 * is also dependent on us not removing swfs from the queue.
2187 */
2188 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2189
2190 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2191
2192 lck_mtx_unlock(&vm_swap_data_lock);
2193
2194 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2195
2196 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2197 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2198
2199 lck_mtx_lock(&vm_swap_data_lock);
2200
2201 if (swf->swp_flags & SWAP_PINNED) {
2202 vm_num_pinned_swap_files--;
2203 vm_swappin_avail += swf->swp_size;
2204 }
2205
2206 swf->swp_vp = NULL;
2207 swf->swp_size = 0;
2208 swf->swp_free_hint = 0;
2209 swf->swp_nsegs = 0;
2210 swf->swp_flags = SWAP_REUSE;
2211
2212 vm_num_swap_files--;
2213
2214 done:
2215 thread_wakeup((event_t) &swf->swp_flags);
2216 lck_mtx_unlock(&vm_swap_data_lock);
2217
2218 kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2219 }
2220
2221
2222 uint64_t
vm_swap_get_total_space(void)2223 vm_swap_get_total_space(void)
2224 {
2225 uint64_t total_space = 0;
2226
2227 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2228
2229 return total_space;
2230 }
2231
2232 uint64_t
vm_swap_get_used_space(void)2233 vm_swap_get_used_space(void)
2234 {
2235 uint64_t used_space = 0;
2236
2237 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2238
2239 return used_space;
2240 }
2241
2242 uint64_t
vm_swap_get_free_space(void)2243 vm_swap_get_free_space(void)
2244 {
2245 return vm_swap_get_total_space() - vm_swap_get_used_space();
2246 }
2247
2248 uint64_t
vm_swap_get_max_configured_space(void)2249 vm_swap_get_max_configured_space(void)
2250 {
2251 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2252 return num_swap_files * MAX_SWAP_FILE_SIZE;
2253 }
2254
2255 int
vm_swap_low_on_space(void)2256 vm_swap_low_on_space(void)
2257 {
2258 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2259 return 0;
2260 }
2261
2262 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2263 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2264 return 0;
2265 }
2266
2267 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2268 return 1;
2269 }
2270 }
2271 return 0;
2272 }
2273
2274 int
vm_swap_out_of_space(void)2275 vm_swap_out_of_space(void)
2276 {
2277 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2278 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2279 /*
2280 * Last swapfile and we have only space for the
2281 * last few swapouts.
2282 */
2283 return 1;
2284 }
2285
2286 return 0;
2287 }
2288
2289 boolean_t
vm_swap_files_pinned(void)2290 vm_swap_files_pinned(void)
2291 {
2292 boolean_t result;
2293
2294 if (vm_swappin_enabled == FALSE) {
2295 return TRUE;
2296 }
2297
2298 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2299
2300 return result;
2301 }
2302
2303 #if CONFIG_FREEZE
2304 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2305 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2306 {
2307 boolean_t use_device_value = FALSE;
2308 struct swapfile *swf = NULL;
2309
2310 if (vm_num_swap_files) {
2311 lck_mtx_lock(&vm_swap_data_lock);
2312
2313 swf = (struct swapfile*) queue_first(&swf_global_queue);
2314
2315 if (swf) {
2316 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2317 if (swf->swp_flags == SWAP_READY) {
2318 assert(swf->swp_vp);
2319
2320 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2321 use_device_value = TRUE;
2322 }
2323 break;
2324 }
2325 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2326 }
2327 }
2328
2329 lck_mtx_unlock(&vm_swap_data_lock);
2330 } else {
2331 /*
2332 * This block is used for the initial budget value before any swap files
2333 * are created. We create a temp swap file to get the budget.
2334 */
2335
2336 struct vnode *temp_vp = NULL;
2337
2338 vm_swapfile_open(swapfilename, &temp_vp);
2339
2340 if (temp_vp) {
2341 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2342 use_device_value = TRUE;
2343 }
2344
2345 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2346 temp_vp = NULL;
2347 } else {
2348 *freeze_daily_budget = 0;
2349 }
2350 }
2351
2352 return use_device_value;
2353 }
2354 #endif /* CONFIG_FREEZE */
2355
2356 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2357 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2358 {
2359 lck_mtx_lock(&vm_swap_data_lock);
2360
2361 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2362 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2363
2364 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2365 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2366
2367 lck_mtx_unlock(&vm_swap_data_lock);
2368 }
2369