1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42 * launchd explicitly turns ON swap later during boot on macOS devices.
43 */
44 boolean_t compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t compressor_store_stop_compaction = FALSE;
47 #endif
48
49 boolean_t vm_swapfile_create_needed = FALSE;
50 boolean_t vm_swapfile_gc_needed = FALSE;
51
52 int vm_swapper_throttle = -1;
53 uint64_t vm_swapout_thread_id;
54
55 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t vm_swap_get_failures = 0; /* Fatal */
57 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int vm_num_swap_files_config = 0;
59 int vm_num_swap_files = 0;
60 int vm_num_pinned_swap_files = 0;
61 int vm_swapout_thread_processed_segments = 0;
62 int vm_swapout_thread_awakened = 0;
63 bool vm_swapout_thread_running = FALSE;
64 int vm_swapfile_create_thread_awakened = 0;
65 int vm_swapfile_create_thread_running = 0;
66 int vm_swapfile_gc_thread_awakened = 0;
67 int vm_swapfile_gc_thread_running = 0;
68
69 int64_t vm_swappin_avail = 0;
70 boolean_t vm_swappin_enabled = FALSE;
71 unsigned int vm_swapfile_total_segs_alloced = 0;
72 unsigned int vm_swapfile_total_segs_alloced_max = 0;
73 unsigned int vm_swapfile_total_segs_used = 0;
74 unsigned int vm_swapfile_total_segs_used_max = 0;
75
76 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
77
78 extern vm_map_t compressor_map;
79 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
80
81 #define SWAP_READY 0x1 /* Swap file is ready to be used */
82 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
83 #define SWAP_WANTED 0x4 /* Swap file has waiters */
84 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
85 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
86
87
88 struct swapfile {
89 queue_head_t swp_queue; /* list of swap files */
90 char *swp_path; /* saved pathname of swap file */
91 struct vnode *swp_vp; /* backing vnode */
92 uint64_t swp_size; /* size of this swap file */
93 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
94 unsigned int swp_pathlen; /* length of pathname */
95 unsigned int swp_nsegs; /* #segments we can use */
96 unsigned int swp_nseginuse; /* #segments in use */
97 unsigned int swp_index; /* index of this swap file */
98 unsigned int swp_flags; /* state of swap file */
99 unsigned int swp_free_hint; /* offset of 1st free chunk */
100 unsigned int swp_io_count; /* count of outstanding I/Os */
101 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
102
103 struct trim_list *swp_delayed_trim_list_head;
104 unsigned int swp_delayed_trim_count;
105 };
106
107 queue_head_t swf_global_queue;
108 boolean_t swp_trim_supported = FALSE;
109
110 extern clock_sec_t dont_trim_until_ts;
111 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
112 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
113 int vm_swapfile_can_be_created = FALSE;
114 boolean_t delayed_trim_handling_in_progress = FALSE;
115
116 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
117
118 static void vm_swapout_thread_throttle_adjust(void);
119 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
120 static void vm_swapout_thread(void);
121 static void vm_swapfile_create_thread(void);
122 static void vm_swapfile_gc_thread(void);
123 static void vm_swap_defragment(void);
124 static void vm_swap_handle_delayed_trims(boolean_t);
125 static void vm_swap_do_delayed_trim(struct swapfile *);
126 static void vm_swap_wait_on_trim_handling_in_progress(void);
127 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
128
129 extern int vnode_getwithref(struct vnode* vp);
130
131 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
132
133 #if !XNU_TARGET_OS_OSX
134
135 /*
136 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
137 * number of swapfiles allowed. That increases wired memory overhead.
138 * So we want to keep the max swapfiles same on both DEV/RELEASE so
139 * that the memory overhead is similar for performance comparisons.
140 */
141 #define VM_MAX_SWAP_FILE_NUM 5
142
143 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
144
145 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
146 #define VM_SWAP_SHOULD_PIN(_size) FALSE
147 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
148 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
149 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
150
151 #else /* !XNU_TARGET_OS_OSX */
152
153 #define VM_MAX_SWAP_FILE_NUM 100
154 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
155
156 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
157 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
158 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
159 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
160 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
161
162 #endif /* !XNU_TARGET_OS_OSX */
163
164 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
165 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
166 #define VM_SWAPFILE_DELAYED_CREATE 15
167
168 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
169
170
171 #if CHECKSUM_THE_SWAP
172 extern unsigned int hash_string(char *cp, int len);
173 #endif
174
175 #if RECORD_THE_COMPRESSED_DATA
176 boolean_t c_compressed_record_init_done = FALSE;
177 int c_compressed_record_write_error = 0;
178 struct vnode *c_compressed_record_vp = NULL;
179 uint64_t c_compressed_record_file_offset = 0;
180 void c_compressed_record_init(void);
181 void c_compressed_record_write(char *, int);
182 #endif
183
184 extern void vm_pageout_io_throttle(void);
185
186 static struct swapfile *vm_swapfile_for_handle(uint64_t);
187
188 /*
189 * Called with the vm_swap_data_lock held.
190 */
191
192 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)193 vm_swapfile_for_handle(uint64_t f_offset)
194 {
195 uint64_t file_offset = 0;
196 unsigned int swapfile_index = 0;
197 struct swapfile* swf = NULL;
198
199 file_offset = (f_offset & SWAP_SLOT_MASK);
200 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
201
202 swf = (struct swapfile*) queue_first(&swf_global_queue);
203
204 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
205 if (swapfile_index == swf->swp_index) {
206 break;
207 }
208
209 swf = (struct swapfile*) queue_next(&swf->swp_queue);
210 }
211
212 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
213 swf = NULL;
214 }
215
216 return swf;
217 }
218
219 #if ENCRYPTED_SWAP
220
221 #include <libkern/crypto/aesxts.h>
222
223 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
224
225 boolean_t swap_crypt_initialized;
226 void swap_crypt_initialize(void);
227
228 symmetric_xts xts_modectx;
229 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
230 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
231
232 #if DEVELOPMENT || DEBUG
233 boolean_t swap_crypt_xts_tested = FALSE;
234 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
235 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
236 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
237 #endif /* DEVELOPMENT || DEBUG */
238
239 unsigned long vm_page_encrypt_counter;
240 unsigned long vm_page_decrypt_counter;
241
242
243 void
swap_crypt_initialize(void)244 swap_crypt_initialize(void)
245 {
246 uint8_t *enckey1, *enckey2;
247 int keylen1, keylen2;
248 int error;
249
250 assert(swap_crypt_initialized == FALSE);
251
252 keylen1 = sizeof(swap_crypt_key1);
253 enckey1 = (uint8_t *)&swap_crypt_key1;
254 keylen2 = sizeof(swap_crypt_key2);
255 enckey2 = (uint8_t *)&swap_crypt_key2;
256
257 error = cc_rand_generate((void *)enckey1, keylen1);
258 assert(!error);
259
260 error = cc_rand_generate((void *)enckey2, keylen2);
261 assert(!error);
262
263 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
264 assert(!error);
265
266 swap_crypt_initialized = TRUE;
267
268 #if DEVELOPMENT || DEBUG
269 uint8_t *encptr;
270 uint8_t *decptr;
271 uint8_t *refptr;
272 uint8_t *iv;
273 uint64_t ivnum[2];
274 int size = 0;
275 int i = 0;
276 int rc = 0;
277
278 assert(swap_crypt_xts_tested == FALSE);
279
280 /*
281 * Validate the encryption algorithms.
282 *
283 * First initialize the test data.
284 */
285 for (i = 0; i < 4096; i++) {
286 swap_crypt_test_page_ref[i] = (char) i;
287 }
288 ivnum[0] = (uint64_t)0xaa;
289 ivnum[1] = 0;
290 iv = (uint8_t *)ivnum;
291
292 refptr = (uint8_t *)swap_crypt_test_page_ref;
293 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
294 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
295 size = 4096;
296
297 /* encrypt */
298 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
299 assert(!rc);
300
301 /* compare result with original - should NOT match */
302 for (i = 0; i < 4096; i++) {
303 if (swap_crypt_test_page_encrypt[i] !=
304 swap_crypt_test_page_ref[i]) {
305 break;
306 }
307 }
308 assert(i != 4096);
309
310 /* decrypt */
311 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
312 assert(!rc);
313
314 /* compare result with original */
315 for (i = 0; i < 4096; i++) {
316 if (swap_crypt_test_page_decrypt[i] !=
317 swap_crypt_test_page_ref[i]) {
318 panic("encryption test failed");
319 }
320 }
321 /* encrypt in place */
322 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
323 assert(!rc);
324
325 /* decrypt in place */
326 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
327 assert(!rc);
328
329 for (i = 0; i < 4096; i++) {
330 if (swap_crypt_test_page_decrypt[i] !=
331 swap_crypt_test_page_ref[i]) {
332 panic("in place encryption test failed");
333 }
334 }
335 swap_crypt_xts_tested = TRUE;
336 #endif /* DEVELOPMENT || DEBUG */
337 }
338
339
340 void
vm_swap_encrypt(c_segment_t c_seg)341 vm_swap_encrypt(c_segment_t c_seg)
342 {
343 uint8_t *ptr;
344 uint8_t *iv;
345 uint64_t ivnum[2];
346 int size = 0;
347 int rc = 0;
348
349 if (swap_crypt_initialized == FALSE) {
350 swap_crypt_initialize();
351 }
352
353 #if DEVELOPMENT || DEBUG
354 C_SEG_MAKE_WRITEABLE(c_seg);
355 #endif
356 ptr = (uint8_t *)c_seg->c_store.c_buffer;
357 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
358
359 ivnum[0] = (uint64_t)c_seg;
360 ivnum[1] = 0;
361 iv = (uint8_t *)ivnum;
362
363 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
364 assert(!rc);
365
366 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
367
368 #if DEVELOPMENT || DEBUG
369 C_SEG_WRITE_PROTECT(c_seg);
370 #endif
371 }
372
373 void
vm_swap_decrypt(c_segment_t c_seg)374 vm_swap_decrypt(c_segment_t c_seg)
375 {
376 uint8_t *ptr;
377 uint8_t *iv;
378 uint64_t ivnum[2];
379 int size = 0;
380 int rc = 0;
381
382 assert(swap_crypt_initialized);
383
384 #if DEVELOPMENT || DEBUG
385 C_SEG_MAKE_WRITEABLE(c_seg);
386 #endif
387 ptr = (uint8_t *)c_seg->c_store.c_buffer;
388 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
389
390 ivnum[0] = (uint64_t)c_seg;
391 ivnum[1] = 0;
392 iv = (uint8_t *)ivnum;
393
394 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
395 assert(!rc);
396
397 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
398
399 #if DEVELOPMENT || DEBUG
400 C_SEG_WRITE_PROTECT(c_seg);
401 #endif
402 }
403 #endif /* ENCRYPTED_SWAP */
404
405 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
406 void
vm_compressor_swap_init(void)407 vm_compressor_swap_init(void)
408 {
409 thread_t thread = NULL;
410
411 queue_init(&swf_global_queue);
412
413 #if !XNU_TARGET_OS_OSX
414 /*
415 * dummy value until the swap file gets created
416 * when we drive the first c_segment_t to the
417 * swapout queue... at that time we will
418 * know the true size we have to work with
419 */
420 c_overage_swapped_limit = 16;
421 #endif /* !XNU_TARGET_OS_OSX */
422
423 compressed_swap_chunk_size = c_seg_bufsize;
424 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
425 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
426 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
427 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
428 #if DEVELOPMENT || DEBUG
429 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
430 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
431 if (parsed_vm_max_num_swap_files > 0) {
432 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
433 } else {
434 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
435 }
436 }
437 #endif
438 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
439
440 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
441 BASEPRI_VM, &thread) != KERN_SUCCESS) {
442 panic("vm_swapout_thread: create failed");
443 }
444 thread_set_thread_name(thread, "VM_swapout");
445 vm_swapout_thread_id = thread->thread_id;
446 thread_deallocate(thread);
447
448 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
449 BASEPRI_VM, &thread) != KERN_SUCCESS) {
450 panic("vm_swapfile_create_thread: create failed");
451 }
452 thread_set_thread_name(thread, "VM_swapfile_create");
453 thread_deallocate(thread);
454
455 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
456 BASEPRI_VM, &thread) != KERN_SUCCESS) {
457 panic("vm_swapfile_gc_thread: create failed");
458 }
459 thread_set_thread_name(thread, "VM_swapfile_gc");
460 /*
461 * Swapfile garbage collection will need to allocate memory
462 * to complete its swap reclaim and in-memory compaction.
463 * So allow it to dip into the reserved VM page pool.
464 */
465 thread_lock(thread);
466 thread->options |= TH_OPT_VMPRIV;
467 thread_unlock(thread);
468 thread_deallocate(thread);
469 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
470 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
471 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
472 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
473
474 printf("VM Swap Subsystem is ON\n");
475 }
476
477
478 #if RECORD_THE_COMPRESSED_DATA
479
480 void
c_compressed_record_init()481 c_compressed_record_init()
482 {
483 if (c_compressed_record_init_done == FALSE) {
484 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
485 c_compressed_record_init_done = TRUE;
486 }
487 }
488
489 void
c_compressed_record_write(char * buf,int size)490 c_compressed_record_write(char *buf, int size)
491 {
492 if (c_compressed_record_write_error == 0) {
493 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
494 c_compressed_record_file_offset += size;
495 }
496 }
497 #endif
498
499
500 int compaction_swapper_inited = 0;
501
502 void
vm_compaction_swapper_do_init(void)503 vm_compaction_swapper_do_init(void)
504 {
505 struct vnode *vp;
506 char *pathname;
507 int namelen;
508
509 if (compaction_swapper_inited) {
510 return;
511 }
512
513 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
514 compaction_swapper_inited = 1;
515 return;
516 }
517 lck_mtx_lock(&vm_swap_data_lock);
518
519 if (!compaction_swapper_inited) {
520 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
521 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
522 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
523
524 vm_swapfile_open(pathname, &vp);
525
526 if (vp) {
527 if (vnode_pager_isSSD(vp) == FALSE) {
528 /*
529 * swap files live on an HDD, so let's make sure to start swapping
530 * much earlier since we're not worried about SSD write-wear and
531 * we have so little write bandwidth to work with
532 * these values were derived expermentially by running the performance
533 * teams stock test for evaluating HDD performance against various
534 * combinations and looking and comparing overall results.
535 * Note that the > relationship between these 4 values must be maintained
536 */
537 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
538 vm_compressor_minorcompact_threshold_divisor = 15;
539 }
540 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
541 vm_compressor_majorcompact_threshold_divisor = 18;
542 }
543 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
544 vm_compressor_unthrottle_threshold_divisor = 24;
545 }
546 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
547 vm_compressor_catchup_threshold_divisor = 30;
548 }
549 }
550 #if XNU_TARGET_OS_OSX
551 vnode_setswapmount(vp);
552 vm_swappin_avail = vnode_getswappin_avail(vp);
553
554 if (vm_swappin_avail) {
555 vm_swappin_enabled = TRUE;
556 }
557 #endif /* XNU_TARGET_OS_OSX */
558 vm_swapfile_close((uint64_t)pathname, vp);
559 }
560 kfree_data(pathname, namelen);
561
562 compaction_swapper_inited = 1;
563 }
564 lck_mtx_unlock(&vm_swap_data_lock);
565 }
566
567
568 void
vm_swap_consider_defragmenting(int flags)569 vm_swap_consider_defragmenting(int flags)
570 {
571 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
572 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
573
574 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
575 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
576 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
577 lck_mtx_lock(&vm_swap_data_lock);
578
579 if (force_defrag) {
580 vm_swap_force_defrag = TRUE;
581 }
582
583 if (force_reclaim) {
584 vm_swap_force_reclaim = TRUE;
585 }
586
587 if (!vm_swapfile_gc_thread_running) {
588 thread_wakeup((event_t) &vm_swapfile_gc_needed);
589 }
590
591 lck_mtx_unlock(&vm_swap_data_lock);
592 }
593 }
594 }
595
596
597 int vm_swap_defragment_yielded = 0;
598 int vm_swap_defragment_swapin = 0;
599 int vm_swap_defragment_free = 0;
600 int vm_swap_defragment_busy = 0;
601
602 #if CONFIG_FREEZE
603 extern uint32_t c_segment_pages_compressed_incore;
604 extern uint32_t c_segment_pages_compressed_nearing_limit;
605 extern uint32_t c_segment_count;
606 extern uint32_t c_segments_nearing_limit;
607
608 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
609
610 extern bool freezer_incore_cseg_acct;
611 #endif /* CONFIG_FREEZE */
612
613 static void
vm_swap_defragment()614 vm_swap_defragment()
615 {
616 c_segment_t c_seg;
617
618 /*
619 * have to grab the master lock w/o holding
620 * any locks in spin mode
621 */
622 PAGE_REPLACEMENT_DISALLOWED(TRUE);
623
624 lck_mtx_lock_spin_always(c_list_lock);
625
626 while (!queue_empty(&c_swappedout_sparse_list_head)) {
627 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
628 vm_swap_defragment_yielded++;
629 break;
630 }
631 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
632
633 lck_mtx_lock_spin_always(&c_seg->c_lock);
634
635 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
636
637 if (c_seg->c_busy) {
638 lck_mtx_unlock_always(c_list_lock);
639
640 PAGE_REPLACEMENT_DISALLOWED(FALSE);
641 /*
642 * c_seg_wait_on_busy consumes c_seg->c_lock
643 */
644 c_seg_wait_on_busy(c_seg);
645
646 PAGE_REPLACEMENT_DISALLOWED(TRUE);
647
648 lck_mtx_lock_spin_always(c_list_lock);
649
650 vm_swap_defragment_busy++;
651 continue;
652 }
653 if (c_seg->c_bytes_used == 0) {
654 /*
655 * c_seg_free_locked consumes the c_list_lock
656 * and c_seg->c_lock
657 */
658 C_SEG_BUSY(c_seg);
659 c_seg_free_locked(c_seg);
660
661 vm_swap_defragment_free++;
662 } else {
663 lck_mtx_unlock_always(c_list_lock);
664
665 #if CONFIG_FREEZE
666 if (freezer_incore_cseg_acct) {
667 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
668 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
669 }
670
671 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
672 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
673 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
674 }
675 }
676 #endif /* CONFIG_FREEZE */
677 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
678 lck_mtx_unlock_always(&c_seg->c_lock);
679 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
680 }
681
682 vm_swap_defragment_swapin++;
683 }
684 PAGE_REPLACEMENT_DISALLOWED(FALSE);
685
686 vm_pageout_io_throttle();
687
688 /*
689 * because write waiters have privilege over readers,
690 * dropping and immediately retaking the master lock will
691 * still allow any thread waiting to acquire the
692 * master lock exclusively an opportunity to take it
693 */
694 PAGE_REPLACEMENT_DISALLOWED(TRUE);
695
696 lck_mtx_lock_spin_always(c_list_lock);
697 }
698 lck_mtx_unlock_always(c_list_lock);
699
700 PAGE_REPLACEMENT_DISALLOWED(FALSE);
701 }
702
703
704 bool vm_swapfile_create_thread_inited = false;
705 static void
vm_swapfile_create_thread(void)706 vm_swapfile_create_thread(void)
707 {
708 clock_sec_t sec;
709 clock_nsec_t nsec;
710
711 if (!vm_swapfile_create_thread_inited) {
712 #if CONFIG_THREAD_GROUPS
713 thread_group_vm_add();
714 #endif /* CONFIG_THREAD_GROUPS */
715 current_thread()->options |= TH_OPT_VMPRIV;
716 vm_swapfile_create_thread_inited = true;
717 }
718
719 vm_swapfile_create_thread_awakened++;
720 vm_swapfile_create_thread_running = 1;
721
722 while (TRUE) {
723 /*
724 * walk through the list of swap files
725 * and do the delayed frees/trims for
726 * any swap file whose count of delayed
727 * frees is above the batch limit
728 */
729 vm_swap_handle_delayed_trims(FALSE);
730
731 lck_mtx_lock(&vm_swap_data_lock);
732
733 if (hibernate_in_progress_with_pinned_swap == TRUE) {
734 break;
735 }
736
737 if (compressor_store_stop_compaction == TRUE) {
738 break;
739 }
740
741 clock_get_system_nanotime(&sec, &nsec);
742
743 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
744 break;
745 }
746
747 lck_mtx_unlock(&vm_swap_data_lock);
748
749 if (vm_swap_create_file() == FALSE) {
750 vm_swapfile_last_failed_to_create_ts = sec;
751 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
752 } else {
753 vm_swapfile_last_successful_create_ts = sec;
754 }
755 }
756 vm_swapfile_create_thread_running = 0;
757
758 if (hibernate_in_progress_with_pinned_swap == TRUE) {
759 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
760 }
761
762 if (compressor_store_stop_compaction == TRUE) {
763 thread_wakeup((event_t)&compressor_store_stop_compaction);
764 }
765
766 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
767
768 lck_mtx_unlock(&vm_swap_data_lock);
769
770 thread_block((thread_continue_t)vm_swapfile_create_thread);
771
772 /* NOTREACHED */
773 }
774
775
776 #if HIBERNATION
777
778 kern_return_t
hibernate_pin_swap(boolean_t start)779 hibernate_pin_swap(boolean_t start)
780 {
781 vm_compaction_swapper_do_init();
782
783 if (start == FALSE) {
784 lck_mtx_lock(&vm_swap_data_lock);
785 hibernate_in_progress_with_pinned_swap = FALSE;
786 lck_mtx_unlock(&vm_swap_data_lock);
787
788 return KERN_SUCCESS;
789 }
790 if (vm_swappin_enabled == FALSE) {
791 return KERN_SUCCESS;
792 }
793
794 lck_mtx_lock(&vm_swap_data_lock);
795
796 hibernate_in_progress_with_pinned_swap = TRUE;
797
798 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
799 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
800
801 lck_mtx_unlock(&vm_swap_data_lock);
802
803 thread_block(THREAD_CONTINUE_NULL);
804
805 lck_mtx_lock(&vm_swap_data_lock);
806 }
807 if (vm_num_swap_files > vm_num_pinned_swap_files) {
808 hibernate_in_progress_with_pinned_swap = FALSE;
809 lck_mtx_unlock(&vm_swap_data_lock);
810
811 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
812 vm_num_swap_files, vm_num_pinned_swap_files);
813 return KERN_FAILURE;
814 }
815 lck_mtx_unlock(&vm_swap_data_lock);
816
817 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
818 if (vm_swap_create_file() == FALSE) {
819 break;
820 }
821 }
822 return KERN_SUCCESS;
823 }
824 #endif
825 bool vm_swapfile_gc_thread_inited = false;
826 static void
vm_swapfile_gc_thread(void)827 vm_swapfile_gc_thread(void)
828 {
829 boolean_t need_defragment;
830 boolean_t need_reclaim;
831
832 if (!vm_swapfile_gc_thread_inited) {
833 #if CONFIG_THREAD_GROUPS
834 thread_group_vm_add();
835 #endif /* CONFIG_THREAD_GROUPS */
836 vm_swapfile_gc_thread_inited = true;
837 }
838
839 vm_swapfile_gc_thread_awakened++;
840 vm_swapfile_gc_thread_running = 1;
841
842 while (TRUE) {
843 lck_mtx_lock(&vm_swap_data_lock);
844
845 if (hibernate_in_progress_with_pinned_swap == TRUE) {
846 break;
847 }
848
849 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
850 break;
851 }
852
853 need_defragment = FALSE;
854 need_reclaim = FALSE;
855
856 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
857 need_defragment = TRUE;
858 }
859
860 if (VM_SWAP_SHOULD_RECLAIM()) {
861 need_defragment = TRUE;
862 need_reclaim = TRUE;
863 }
864 if (need_defragment == FALSE && need_reclaim == FALSE) {
865 break;
866 }
867
868 vm_swap_force_defrag = FALSE;
869 vm_swap_force_reclaim = FALSE;
870
871 lck_mtx_unlock(&vm_swap_data_lock);
872
873 if (need_defragment == TRUE) {
874 vm_swap_defragment();
875 }
876 if (need_reclaim == TRUE) {
877 vm_swap_reclaim();
878 }
879 }
880 vm_swapfile_gc_thread_running = 0;
881
882 if (hibernate_in_progress_with_pinned_swap == TRUE) {
883 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
884 }
885
886 if (compressor_store_stop_compaction == TRUE) {
887 thread_wakeup((event_t)&compressor_store_stop_compaction);
888 }
889
890 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
891
892 lck_mtx_unlock(&vm_swap_data_lock);
893
894 thread_block((thread_continue_t)vm_swapfile_gc_thread);
895
896 /* NOTREACHED */
897 }
898
899
900
901 #define VM_SWAPOUT_LIMIT_T2P 4
902 #define VM_SWAPOUT_LIMIT_T1P 4
903 #define VM_SWAPOUT_LIMIT_T0P 6
904 #define VM_SWAPOUT_LIMIT_T0 8
905 #define VM_SWAPOUT_LIMIT_MAX 8
906
907 #define VM_SWAPOUT_START 0
908 #define VM_SWAPOUT_T2_PASSIVE 1
909 #define VM_SWAPOUT_T1_PASSIVE 2
910 #define VM_SWAPOUT_T0_PASSIVE 3
911 #define VM_SWAPOUT_T0 4
912
913 int vm_swapout_state = VM_SWAPOUT_START;
914 int vm_swapout_limit = 1;
915
916 int vm_swapper_entered_T0 = 0;
917 int vm_swapper_entered_T0P = 0;
918 int vm_swapper_entered_T1P = 0;
919 int vm_swapper_entered_T2P = 0;
920
921
922 static void
vm_swapout_thread_throttle_adjust(void)923 vm_swapout_thread_throttle_adjust(void)
924 {
925 switch (vm_swapout_state) {
926 case VM_SWAPOUT_START:
927
928 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
929 vm_swapper_entered_T2P++;
930
931 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
932 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
933 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
934 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
935 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
936 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
937
938 break;
939
940 case VM_SWAPOUT_T2_PASSIVE:
941
942 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
943 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
944 vm_swapper_entered_T0P++;
945
946 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
947 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
948 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
949 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
950 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
951 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
952
953 break;
954 }
955 if (swapout_target_age || hibernate_flushing == TRUE) {
956 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
957 vm_swapper_entered_T1P++;
958
959 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
960 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
961 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
962 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
963 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
964 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
965 }
966 break;
967
968 case VM_SWAPOUT_T1_PASSIVE:
969
970 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
971 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
972 vm_swapper_entered_T0P++;
973
974 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
975 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
976 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
977 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
978 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
979 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
980
981 break;
982 }
983 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
984 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
985 vm_swapper_entered_T2P++;
986
987 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
988 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
989 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
990 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
991 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
992 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
993 }
994 break;
995
996 case VM_SWAPOUT_T0_PASSIVE:
997
998 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
999 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1000 vm_swapper_entered_T2P++;
1001
1002 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1003 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1004 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1005 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1006 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1007 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1008
1009 break;
1010 }
1011 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1012 vm_swapper_entered_T0++;
1013
1014 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1015 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1016 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1017 vm_swapout_state = VM_SWAPOUT_T0;
1018 }
1019 break;
1020
1021 case VM_SWAPOUT_T0:
1022
1023 if (SWAPPER_HAS_CAUGHTUP()) {
1024 vm_swapper_entered_T0P++;
1025
1026 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1027 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1028 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1029 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1030 }
1031 break;
1032 }
1033 }
1034
1035 int vm_swapout_found_empty = 0;
1036
1037 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1038
1039 int vm_swapout_soc_busy = 0;
1040 int vm_swapout_soc_done = 0;
1041
1042
1043 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1044 vm_swapout_find_free_soc(void)
1045 {
1046 int i;
1047
1048 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1049 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1050 return &vm_swapout_ctx[i];
1051 }
1052 }
1053 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1054
1055 return NULL;
1056 }
1057
1058 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1059 vm_swapout_find_done_soc(void)
1060 {
1061 int i;
1062
1063 if (vm_swapout_soc_done) {
1064 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1065 if (vm_swapout_ctx[i].swp_io_done) {
1066 return &vm_swapout_ctx[i];
1067 }
1068 }
1069 }
1070 return NULL;
1071 }
1072
1073 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1074 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1075 {
1076 kern_return_t kr;
1077
1078 if (soc->swp_io_error) {
1079 kr = KERN_FAILURE;
1080 } else {
1081 kr = KERN_SUCCESS;
1082 }
1083
1084 lck_mtx_unlock_always(c_list_lock);
1085
1086 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1087 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1088
1089 lck_mtx_lock_spin_always(c_list_lock);
1090
1091 soc->swp_io_done = 0;
1092 soc->swp_io_busy = 0;
1093
1094 vm_swapout_soc_busy--;
1095 vm_swapout_soc_done--;
1096 }
1097
1098 bool vm_swapout_thread_inited = false;
1099 static void
vm_swapout_thread(void)1100 vm_swapout_thread(void)
1101 {
1102 uint32_t size = 0;
1103 c_segment_t c_seg = NULL;
1104 kern_return_t kr = KERN_SUCCESS;
1105 struct swapout_io_completion *soc;
1106
1107 if (!vm_swapout_thread_inited) {
1108 #if CONFIG_THREAD_GROUPS
1109 thread_group_vm_add();
1110 #endif /* CONFIG_THREAD_GROUPS */
1111 current_thread()->options |= TH_OPT_VMPRIV;
1112 vm_swapout_thread_inited = true;
1113 }
1114
1115 vm_swapout_thread_awakened++;
1116
1117 lck_mtx_lock_spin_always(c_list_lock);
1118
1119 vm_swapout_thread_running = TRUE;
1120 again:
1121 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1122 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1123
1124 lck_mtx_lock_spin_always(&c_seg->c_lock);
1125
1126 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1127
1128 if (c_seg->c_busy) {
1129 lck_mtx_unlock_always(c_list_lock);
1130
1131 c_seg_wait_on_busy(c_seg);
1132
1133 lck_mtx_lock_spin_always(c_list_lock);
1134
1135 continue;
1136 }
1137 vm_swapout_thread_processed_segments++;
1138
1139 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1140
1141 if (size == 0) {
1142 assert(c_seg->c_bytes_used == 0);
1143
1144 if (!c_seg->c_on_minorcompact_q) {
1145 c_seg_need_delayed_compaction(c_seg, TRUE);
1146 }
1147
1148 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1149 lck_mtx_unlock_always(&c_seg->c_lock);
1150 lck_mtx_unlock_always(c_list_lock);
1151
1152 vm_swapout_found_empty++;
1153 goto c_seg_is_empty;
1154 }
1155 C_SEG_BUSY(c_seg);
1156 c_seg->c_busy_swapping = 1;
1157
1158 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1159
1160 lck_mtx_unlock_always(c_list_lock);
1161 lck_mtx_unlock_always(&c_seg->c_lock);
1162
1163 #if CHECKSUM_THE_SWAP
1164 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1165 c_seg->cseg_swap_size = size;
1166 #endif /* CHECKSUM_THE_SWAP */
1167
1168 #if ENCRYPTED_SWAP
1169 vm_swap_encrypt(c_seg);
1170 #endif /* ENCRYPTED_SWAP */
1171
1172 soc = vm_swapout_find_free_soc();
1173 assert(soc);
1174
1175 soc->swp_upl_ctx.io_context = (void *)soc;
1176 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1177 soc->swp_upl_ctx.io_error = 0;
1178
1179 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1180
1181 if (kr != KERN_SUCCESS) {
1182 if (soc->swp_io_done) {
1183 lck_mtx_lock_spin_always(c_list_lock);
1184
1185 soc->swp_io_done = 0;
1186 vm_swapout_soc_done--;
1187
1188 lck_mtx_unlock_always(c_list_lock);
1189 }
1190 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1191 } else {
1192 soc->swp_io_busy = 1;
1193 vm_swapout_soc_busy++;
1194 }
1195
1196 c_seg_is_empty:
1197 if (c_swapout_count == 0) {
1198 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1199 }
1200
1201 lck_mtx_lock_spin_always(c_list_lock);
1202
1203 while ((soc = vm_swapout_find_done_soc())) {
1204 vm_swapout_complete_soc(soc);
1205 }
1206 lck_mtx_unlock_always(c_list_lock);
1207
1208 vm_swapout_thread_throttle_adjust();
1209
1210 lck_mtx_lock_spin_always(c_list_lock);
1211 }
1212 while ((soc = vm_swapout_find_done_soc())) {
1213 vm_swapout_complete_soc(soc);
1214 }
1215 lck_mtx_unlock_always(c_list_lock);
1216
1217 vm_pageout_io_throttle();
1218
1219 lck_mtx_lock_spin_always(c_list_lock);
1220
1221 /*
1222 * Recheck if we have some c_segs to wakeup
1223 * post throttle. And, check to see if we
1224 * have any more swapouts needed.
1225 */
1226 if (vm_swapout_soc_done) {
1227 goto again;
1228 }
1229
1230 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1231
1232 vm_swapout_thread_running = FALSE;
1233
1234 lck_mtx_unlock_always(c_list_lock);
1235
1236 thread_block((thread_continue_t)vm_swapout_thread);
1237
1238 /* NOTREACHED */
1239 }
1240
1241
1242 void
vm_swapout_iodone(void * io_context,int error)1243 vm_swapout_iodone(void *io_context, int error)
1244 {
1245 struct swapout_io_completion *soc;
1246
1247 soc = (struct swapout_io_completion *)io_context;
1248
1249 lck_mtx_lock_spin_always(c_list_lock);
1250
1251 soc->swp_io_done = 1;
1252 soc->swp_io_error = error;
1253 vm_swapout_soc_done++;
1254
1255 if (!vm_swapout_thread_running) {
1256 thread_wakeup((event_t)&c_swapout_list_head);
1257 }
1258
1259 lck_mtx_unlock_always(c_list_lock);
1260 }
1261
1262
1263 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1264 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1265 {
1266 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1267
1268 if (kr == KERN_SUCCESS) {
1269 kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
1270 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1271 }
1272 #if ENCRYPTED_SWAP
1273 else {
1274 vm_swap_decrypt(c_seg);
1275 }
1276 #endif /* ENCRYPTED_SWAP */
1277 lck_mtx_lock_spin_always(c_list_lock);
1278 lck_mtx_lock_spin_always(&c_seg->c_lock);
1279
1280 if (kr == KERN_SUCCESS) {
1281 int new_state = C_ON_SWAPPEDOUT_Q;
1282 boolean_t insert_head = FALSE;
1283
1284 if (hibernate_flushing == TRUE) {
1285 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1286 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1287 insert_head = TRUE;
1288 }
1289 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1290 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1291 }
1292
1293 c_seg_switch_state(c_seg, new_state, insert_head);
1294
1295 c_seg->c_store.c_swap_handle = f_offset;
1296
1297 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1298
1299 c_seg->c_swappedin = false;
1300
1301 if (c_seg->c_bytes_used) {
1302 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1303 }
1304
1305 #if CONFIG_FREEZE
1306 /*
1307 * Successful swapout. Decrement the in-core compressed pages count.
1308 */
1309 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1310 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1311 #endif /* CONFIG_FREEZE */
1312 } else {
1313 if (c_seg->c_overage_swap == TRUE) {
1314 c_seg->c_overage_swap = FALSE;
1315 c_overage_swapped_count--;
1316 }
1317
1318 #if CONFIG_FREEZE
1319 if (c_seg->c_task_owner) {
1320 c_seg_update_task_owner(c_seg, NULL);
1321 }
1322 #endif /* CONFIG_FREEZE */
1323
1324 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1325
1326 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1327 c_seg_need_delayed_compaction(c_seg, TRUE);
1328 }
1329 }
1330 assert(c_seg->c_busy_swapping);
1331 assert(c_seg->c_busy);
1332
1333 c_seg->c_busy_swapping = 0;
1334 lck_mtx_unlock_always(c_list_lock);
1335
1336 C_SEG_WAKEUP_DONE(c_seg);
1337 lck_mtx_unlock_always(&c_seg->c_lock);
1338
1339 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1340 }
1341
1342
1343 boolean_t
vm_swap_create_file()1344 vm_swap_create_file()
1345 {
1346 uint64_t size = 0;
1347 int namelen = 0;
1348 boolean_t swap_file_created = FALSE;
1349 boolean_t swap_file_reuse = FALSE;
1350 boolean_t swap_file_pin = FALSE;
1351 struct swapfile *swf = NULL;
1352
1353 /*
1354 * make sure we've got all the info we need
1355 * to potentially pin a swap file... we could
1356 * be swapping out due to hibernation w/o ever
1357 * having run vm_pageout_scan, which is normally
1358 * the trigger to do the init
1359 */
1360 vm_compaction_swapper_do_init();
1361
1362 /*
1363 * Any swapfile structure ready for re-use?
1364 */
1365
1366 lck_mtx_lock(&vm_swap_data_lock);
1367
1368 swf = (struct swapfile*) queue_first(&swf_global_queue);
1369
1370 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1371 if (swf->swp_flags == SWAP_REUSE) {
1372 swap_file_reuse = TRUE;
1373 break;
1374 }
1375 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1376 }
1377
1378 lck_mtx_unlock(&vm_swap_data_lock);
1379
1380 if (swap_file_reuse == FALSE) {
1381 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1382
1383 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1384 swf->swp_index = vm_num_swap_files + 1;
1385 swf->swp_pathlen = namelen;
1386 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1387
1388 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1389 }
1390
1391 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1392
1393 if (swf->swp_vp == NULL) {
1394 if (swap_file_reuse == FALSE) {
1395 kfree_data(swf->swp_path, swf->swp_pathlen);
1396 kfree_type(struct swapfile, swf);
1397 }
1398 return FALSE;
1399 }
1400 vm_swapfile_can_be_created = TRUE;
1401
1402 size = MAX_SWAP_FILE_SIZE;
1403
1404 while (size >= MIN_SWAP_FILE_SIZE) {
1405 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1406
1407 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1408 int num_bytes_for_bitmap = 0;
1409
1410 swap_file_created = TRUE;
1411
1412 swf->swp_size = size;
1413 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1414 swf->swp_nseginuse = 0;
1415 swf->swp_free_hint = 0;
1416
1417 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1418 /*
1419 * Allocate a bitmap that describes the
1420 * number of segments held by this swapfile.
1421 */
1422 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1423 Z_WAITOK | Z_ZERO);
1424
1425 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1426 Z_WAITOK | Z_ZERO);
1427
1428 /*
1429 * passing a NULL trim_list into vnode_trim_list
1430 * will return ENOTSUP if trim isn't supported
1431 * and 0 if it is
1432 */
1433 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1434 swp_trim_supported = TRUE;
1435 }
1436
1437 lck_mtx_lock(&vm_swap_data_lock);
1438
1439 swf->swp_flags = SWAP_READY;
1440
1441 if (swap_file_reuse == FALSE) {
1442 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1443 }
1444
1445 vm_num_swap_files++;
1446
1447 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1448 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1449 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1450 }
1451
1452 if (swap_file_pin == TRUE) {
1453 vm_num_pinned_swap_files++;
1454 swf->swp_flags |= SWAP_PINNED;
1455 vm_swappin_avail -= swf->swp_size;
1456 }
1457
1458 lck_mtx_unlock(&vm_swap_data_lock);
1459
1460 thread_wakeup((event_t) &vm_num_swap_files);
1461 #if !XNU_TARGET_OS_OSX
1462 if (vm_num_swap_files == 1) {
1463 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1464
1465 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1466 c_overage_swapped_limit /= 2;
1467 }
1468 }
1469 #endif /* !XNU_TARGET_OS_OSX */
1470 break;
1471 } else {
1472 size = size / 2;
1473 }
1474 }
1475 if (swap_file_created == FALSE) {
1476 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1477
1478 swf->swp_vp = NULL;
1479
1480 if (swap_file_reuse == FALSE) {
1481 kfree_data(swf->swp_path, swf->swp_pathlen);
1482 kfree_type(struct swapfile, swf);
1483 }
1484 }
1485 return swap_file_created;
1486 }
1487
1488 extern void vnode_put(struct vnode* vp);
1489 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1490 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1491 {
1492 struct swapfile *swf = NULL;
1493 uint64_t file_offset = 0;
1494 int retval = 0;
1495
1496 assert(c_seg->c_store.c_buffer);
1497
1498 lck_mtx_lock(&vm_swap_data_lock);
1499
1500 swf = vm_swapfile_for_handle(f_offset);
1501
1502 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1503 vm_swap_get_failures++;
1504 retval = 1;
1505 goto done;
1506 }
1507 swf->swp_io_count++;
1508
1509 lck_mtx_unlock(&vm_swap_data_lock);
1510
1511 #if DEVELOPMENT || DEBUG
1512 C_SEG_MAKE_WRITEABLE(c_seg);
1513 #endif
1514 file_offset = (f_offset & SWAP_SLOT_MASK);
1515
1516 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1517 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1518 } else {
1519 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1520 vnode_put(swf->swp_vp);
1521 }
1522
1523 #if DEVELOPMENT || DEBUG
1524 C_SEG_WRITE_PROTECT(c_seg);
1525 #endif
1526 if (retval == 0) {
1527 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1528 } else {
1529 vm_swap_get_failures++;
1530 }
1531
1532 /*
1533 * Free this slot in the swap structure.
1534 */
1535 vm_swap_free(f_offset);
1536
1537 lck_mtx_lock(&vm_swap_data_lock);
1538 swf->swp_io_count--;
1539
1540 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1541 swf->swp_flags &= ~SWAP_WANTED;
1542 thread_wakeup((event_t) &swf->swp_flags);
1543 }
1544 done:
1545 lck_mtx_unlock(&vm_swap_data_lock);
1546
1547 if (retval == 0) {
1548 return KERN_SUCCESS;
1549 } else {
1550 return KERN_FAILURE;
1551 }
1552 }
1553
1554 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1555 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1556 {
1557 unsigned int segidx = 0;
1558 struct swapfile *swf = NULL;
1559 uint64_t file_offset = 0;
1560 uint64_t swapfile_index = 0;
1561 unsigned int byte_for_segidx = 0;
1562 unsigned int offset_within_byte = 0;
1563 boolean_t swf_eligible = FALSE;
1564 boolean_t waiting = FALSE;
1565 boolean_t retried = FALSE;
1566 int error = 0;
1567 clock_sec_t sec;
1568 clock_nsec_t nsec;
1569 void *upl_ctx = NULL;
1570 boolean_t drop_iocount = FALSE;
1571
1572 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1573 return KERN_FAILURE;
1574 }
1575 retry:
1576 lck_mtx_lock(&vm_swap_data_lock);
1577
1578 swf = (struct swapfile*) queue_first(&swf_global_queue);
1579
1580 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1581 segidx = swf->swp_free_hint;
1582
1583 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1584
1585 if (swf_eligible) {
1586 while (segidx < swf->swp_nsegs) {
1587 byte_for_segidx = segidx >> 3;
1588 offset_within_byte = segidx % 8;
1589
1590 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1591 segidx++;
1592 continue;
1593 }
1594
1595 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1596
1597 file_offset = segidx * compressed_swap_chunk_size;
1598 swf->swp_nseginuse++;
1599 swf->swp_io_count++;
1600 swf->swp_csegs[segidx] = c_seg;
1601
1602 swapfile_index = swf->swp_index;
1603 vm_swapfile_total_segs_used++;
1604 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1605 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1606 }
1607
1608 clock_get_system_nanotime(&sec, &nsec);
1609
1610 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1611 thread_wakeup((event_t) &vm_swapfile_create_needed);
1612 }
1613
1614 lck_mtx_unlock(&vm_swap_data_lock);
1615
1616 goto issue_io;
1617 }
1618 }
1619 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1620 }
1621 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1622
1623 /*
1624 * we've run out of swap segments, but may not
1625 * be in a position to immediately create a new swap
1626 * file if we've recently failed to create due to a lack
1627 * of free space in the root filesystem... we'll try
1628 * to kick that create off, but in any event we're going
1629 * to take a breather (up to 1 second) so that we're not caught in a tight
1630 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1631 * segments into swap files only to have them immediately put back
1632 * on the c_age queue due to vm_swap_put failing.
1633 *
1634 * if we're doing these puts due to a hibernation flush,
1635 * no need to block... setting hibernate_no_swapspace to TRUE,
1636 * will cause "vm_compressor_compact_and_swap" to immediately abort
1637 */
1638 clock_get_system_nanotime(&sec, &nsec);
1639
1640 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1641 thread_wakeup((event_t) &vm_swapfile_create_needed);
1642 }
1643
1644 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1645 waiting = TRUE;
1646 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1647 } else {
1648 hibernate_no_swapspace = TRUE;
1649 }
1650
1651 lck_mtx_unlock(&vm_swap_data_lock);
1652
1653 if (waiting == TRUE) {
1654 thread_block(THREAD_CONTINUE_NULL);
1655
1656 if (retried == FALSE && hibernate_flushing == TRUE) {
1657 retried = TRUE;
1658 goto retry;
1659 }
1660 }
1661 vm_swap_put_failures_no_swap_file++;
1662
1663 return KERN_FAILURE;
1664
1665 issue_io:
1666 assert(c_seg->c_busy_swapping);
1667 assert(c_seg->c_busy);
1668 assert(!c_seg->c_on_minorcompact_q);
1669
1670 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1671
1672 if (soc) {
1673 soc->swp_c_seg = c_seg;
1674 soc->swp_c_size = size;
1675
1676 soc->swp_swf = swf;
1677
1678 soc->swp_io_error = 0;
1679 soc->swp_io_done = 0;
1680
1681 upl_ctx = (void *)&soc->swp_upl_ctx;
1682 }
1683
1684 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1685 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1686 } else {
1687 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1688 drop_iocount = TRUE;
1689 }
1690
1691 if (error || upl_ctx == NULL) {
1692 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1693 }
1694
1695 return KERN_SUCCESS;
1696 }
1697
1698 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1699 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1700 {
1701 if (drop_iocount) {
1702 vnode_put(swf->swp_vp);
1703 }
1704
1705 lck_mtx_lock(&vm_swap_data_lock);
1706
1707 swf->swp_io_count--;
1708
1709 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1710 swf->swp_flags &= ~SWAP_WANTED;
1711 thread_wakeup((event_t) &swf->swp_flags);
1712 }
1713 lck_mtx_unlock(&vm_swap_data_lock);
1714
1715 if (error) {
1716 vm_swap_free(*f_offset);
1717 vm_swap_put_failures++;
1718
1719 return KERN_FAILURE;
1720 }
1721 return KERN_SUCCESS;
1722 }
1723
1724
1725 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1726 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1727 {
1728 uint64_t file_offset = 0;
1729 unsigned int segidx = 0;
1730
1731
1732 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1733 unsigned int byte_for_segidx = 0;
1734 unsigned int offset_within_byte = 0;
1735
1736 file_offset = (f_offset & SWAP_SLOT_MASK);
1737 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1738
1739 byte_for_segidx = segidx >> 3;
1740 offset_within_byte = segidx % 8;
1741
1742 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1743 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1744
1745 swf->swp_csegs[segidx] = NULL;
1746
1747 swf->swp_nseginuse--;
1748 vm_swapfile_total_segs_used--;
1749
1750 if (segidx < swf->swp_free_hint) {
1751 swf->swp_free_hint = segidx;
1752 }
1753 }
1754 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1755 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1756 }
1757 }
1758 }
1759
1760
1761 uint32_t vm_swap_free_now_count = 0;
1762 uint32_t vm_swap_free_delayed_count = 0;
1763
1764
1765 void
vm_swap_free(uint64_t f_offset)1766 vm_swap_free(uint64_t f_offset)
1767 {
1768 struct swapfile *swf = NULL;
1769 struct trim_list *tl = NULL;
1770 clock_sec_t sec;
1771 clock_nsec_t nsec;
1772
1773 if (swp_trim_supported == TRUE) {
1774 tl = kalloc_type(struct trim_list, Z_WAITOK);
1775 }
1776
1777 lck_mtx_lock(&vm_swap_data_lock);
1778
1779 swf = vm_swapfile_for_handle(f_offset);
1780
1781 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1782 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1783 /*
1784 * don't delay the free if the underlying disk doesn't support
1785 * trim, or we're in the midst of reclaiming this swap file since
1786 * we don't want to move segments that are technically free
1787 * but not yet handled by the delayed free mechanism
1788 */
1789 vm_swap_free_now(swf, f_offset);
1790
1791 vm_swap_free_now_count++;
1792 goto done;
1793 }
1794 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1795 tl->tl_length = compressed_swap_chunk_size;
1796
1797 tl->tl_next = swf->swp_delayed_trim_list_head;
1798 swf->swp_delayed_trim_list_head = tl;
1799 swf->swp_delayed_trim_count++;
1800 tl = NULL;
1801
1802 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1803 clock_get_system_nanotime(&sec, &nsec);
1804
1805 if (sec > dont_trim_until_ts) {
1806 thread_wakeup((event_t) &vm_swapfile_create_needed);
1807 }
1808 }
1809 vm_swap_free_delayed_count++;
1810 }
1811 done:
1812 lck_mtx_unlock(&vm_swap_data_lock);
1813
1814 if (tl != NULL) {
1815 kfree_type(struct trim_list, tl);
1816 }
1817 }
1818
1819
1820 static void
vm_swap_wait_on_trim_handling_in_progress()1821 vm_swap_wait_on_trim_handling_in_progress()
1822 {
1823 while (delayed_trim_handling_in_progress == TRUE) {
1824 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1825 lck_mtx_unlock(&vm_swap_data_lock);
1826
1827 thread_block(THREAD_CONTINUE_NULL);
1828
1829 lck_mtx_lock(&vm_swap_data_lock);
1830 }
1831 }
1832
1833
1834 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1835 vm_swap_handle_delayed_trims(boolean_t force_now)
1836 {
1837 struct swapfile *swf = NULL;
1838
1839 /*
1840 * serialize the race between us and vm_swap_reclaim...
1841 * if vm_swap_reclaim wins it will turn off SWAP_READY
1842 * on the victim it has chosen... we can just skip over
1843 * that file since vm_swap_reclaim will first process
1844 * all of the delayed trims associated with it
1845 */
1846
1847 if (compressor_store_stop_compaction == TRUE) {
1848 return;
1849 }
1850
1851 lck_mtx_lock(&vm_swap_data_lock);
1852
1853 delayed_trim_handling_in_progress = TRUE;
1854
1855 lck_mtx_unlock(&vm_swap_data_lock);
1856
1857 /*
1858 * no need to hold the lock to walk the swf list since
1859 * vm_swap_create (the only place where we add to this list)
1860 * is run on the same thread as this function
1861 * and vm_swap_reclaim doesn't remove items from this list
1862 * instead marking them with SWAP_REUSE for future re-use
1863 */
1864 swf = (struct swapfile*) queue_first(&swf_global_queue);
1865
1866 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1867 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1868 assert(!(swf->swp_flags & SWAP_RECLAIM));
1869 vm_swap_do_delayed_trim(swf);
1870 }
1871 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1872 }
1873 lck_mtx_lock(&vm_swap_data_lock);
1874
1875 delayed_trim_handling_in_progress = FALSE;
1876 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1877
1878 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1879 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1880 }
1881
1882 lck_mtx_unlock(&vm_swap_data_lock);
1883 }
1884
1885 static void
vm_swap_do_delayed_trim(struct swapfile * swf)1886 vm_swap_do_delayed_trim(struct swapfile *swf)
1887 {
1888 struct trim_list *tl, *tl_head;
1889 int error;
1890
1891 if (compressor_store_stop_compaction == TRUE) {
1892 return;
1893 }
1894
1895 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1896 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1897 return;
1898 }
1899
1900 lck_mtx_lock(&vm_swap_data_lock);
1901
1902 tl_head = swf->swp_delayed_trim_list_head;
1903 swf->swp_delayed_trim_list_head = NULL;
1904 swf->swp_delayed_trim_count = 0;
1905
1906 lck_mtx_unlock(&vm_swap_data_lock);
1907
1908 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1909
1910 (void) vnode_put(swf->swp_vp);
1911
1912 while ((tl = tl_head) != NULL) {
1913 unsigned int segidx = 0;
1914 unsigned int byte_for_segidx = 0;
1915 unsigned int offset_within_byte = 0;
1916
1917 lck_mtx_lock(&vm_swap_data_lock);
1918
1919 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
1920
1921 byte_for_segidx = segidx >> 3;
1922 offset_within_byte = segidx % 8;
1923
1924 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1925 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1926
1927 swf->swp_csegs[segidx] = NULL;
1928
1929 swf->swp_nseginuse--;
1930 vm_swapfile_total_segs_used--;
1931
1932 if (segidx < swf->swp_free_hint) {
1933 swf->swp_free_hint = segidx;
1934 }
1935 }
1936 lck_mtx_unlock(&vm_swap_data_lock);
1937
1938 tl_head = tl->tl_next;
1939
1940 kfree_type(struct trim_list, tl);
1941 }
1942 }
1943
1944
1945 void
vm_swap_flush()1946 vm_swap_flush()
1947 {
1948 return;
1949 }
1950
1951 int vm_swap_reclaim_yielded = 0;
1952
1953 void
vm_swap_reclaim(void)1954 vm_swap_reclaim(void)
1955 {
1956 vm_offset_t addr = 0;
1957 unsigned int segidx = 0;
1958 uint64_t f_offset = 0;
1959 struct swapfile *swf = NULL;
1960 struct swapfile *smallest_swf = NULL;
1961 unsigned int min_nsegs = 0;
1962 unsigned int byte_for_segidx = 0;
1963 unsigned int offset_within_byte = 0;
1964 uint32_t c_size = 0;
1965
1966 c_segment_t c_seg = NULL;
1967
1968 kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
1969 KMA_NOFAIL | KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
1970
1971 lck_mtx_lock(&vm_swap_data_lock);
1972
1973 /*
1974 * if we're running the swapfile list looking for
1975 * candidates with delayed trims, we need to
1976 * wait before making our decision concerning
1977 * the swapfile we want to reclaim
1978 */
1979 vm_swap_wait_on_trim_handling_in_progress();
1980
1981 /*
1982 * from here until we knock down the SWAP_READY bit,
1983 * we need to remain behind the vm_swap_data_lock...
1984 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1985 * will not consider this swapfile for processing
1986 */
1987 swf = (struct swapfile*) queue_first(&swf_global_queue);
1988 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
1989 smallest_swf = NULL;
1990
1991 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1992 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1993 smallest_swf = swf;
1994 min_nsegs = swf->swp_nseginuse;
1995 }
1996 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1997 }
1998
1999 if (smallest_swf == NULL) {
2000 goto done;
2001 }
2002
2003 swf = smallest_swf;
2004
2005
2006 swf->swp_flags &= ~SWAP_READY;
2007 swf->swp_flags |= SWAP_RECLAIM;
2008
2009 if (swf->swp_delayed_trim_count) {
2010 lck_mtx_unlock(&vm_swap_data_lock);
2011
2012 vm_swap_do_delayed_trim(swf);
2013
2014 lck_mtx_lock(&vm_swap_data_lock);
2015 }
2016 segidx = 0;
2017
2018 while (segidx < swf->swp_nsegs) {
2019 ReTry_for_cseg:
2020 /*
2021 * Wait for outgoing I/Os.
2022 */
2023 while (swf->swp_io_count) {
2024 swf->swp_flags |= SWAP_WANTED;
2025
2026 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2027 lck_mtx_unlock(&vm_swap_data_lock);
2028
2029 thread_block(THREAD_CONTINUE_NULL);
2030
2031 lck_mtx_lock(&vm_swap_data_lock);
2032 }
2033 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2034 vm_swap_reclaim_yielded++;
2035 break;
2036 }
2037
2038 byte_for_segidx = segidx >> 3;
2039 offset_within_byte = segidx % 8;
2040
2041 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2042 segidx++;
2043 continue;
2044 }
2045
2046 c_seg = swf->swp_csegs[segidx];
2047 assert(c_seg);
2048
2049 lck_mtx_lock_spin_always(&c_seg->c_lock);
2050
2051 if (c_seg->c_busy) {
2052 /*
2053 * a swapped out c_segment in the process of being freed will remain in the
2054 * busy state until after the vm_swap_free is called on it... vm_swap_free
2055 * takes the vm_swap_data_lock, so can't change the swap state until after
2056 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2057 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2058 * at that point, we re-look up the swap state which will now indicate that
2059 * this c_segment no longer exists.
2060 */
2061 c_seg->c_wanted = 1;
2062
2063 assert_wait((event_t) (c_seg), THREAD_UNINT);
2064 lck_mtx_unlock_always(&c_seg->c_lock);
2065
2066 lck_mtx_unlock(&vm_swap_data_lock);
2067
2068 thread_block(THREAD_CONTINUE_NULL);
2069
2070 lck_mtx_lock(&vm_swap_data_lock);
2071
2072 goto ReTry_for_cseg;
2073 }
2074 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2075
2076 f_offset = segidx * compressed_swap_chunk_size;
2077
2078 assert(c_seg == swf->swp_csegs[segidx]);
2079 swf->swp_csegs[segidx] = NULL;
2080 swf->swp_nseginuse--;
2081
2082 vm_swapfile_total_segs_used--;
2083
2084 lck_mtx_unlock(&vm_swap_data_lock);
2085
2086 assert(C_SEG_IS_ONDISK(c_seg));
2087
2088 C_SEG_BUSY(c_seg);
2089 c_seg->c_busy_swapping = 1;
2090 #if !CHECKSUM_THE_SWAP
2091 c_seg_trim_tail(c_seg);
2092 #endif
2093 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2094
2095 assert(c_size <= c_seg_bufsize && c_size);
2096
2097 lck_mtx_unlock_always(&c_seg->c_lock);
2098
2099 if (vnode_getwithref(swf->swp_vp)) {
2100 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2101 vm_swap_get_failures++;
2102 goto swap_io_failed;
2103 } else {
2104 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2105 /*
2106 * reading the data back in failed, so convert c_seg
2107 * to a swapped in c_segment that contains no data
2108 */
2109 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2110 /*
2111 * returns with c_busy_swapping cleared
2112 */
2113 vnode_put(swf->swp_vp);
2114 vm_swap_get_failures++;
2115 goto swap_io_failed;
2116 }
2117 vnode_put(swf->swp_vp);
2118 }
2119
2120 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2121 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2122
2123 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2124 vm_offset_t c_buffer;
2125
2126 /*
2127 * the put failed, so convert c_seg to a fully swapped in c_segment
2128 * with valid data
2129 */
2130 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2131
2132 kernel_memory_populate(c_buffer, c_size,
2133 KMA_NOFAIL | KMA_COMPRESSOR,
2134 VM_KERN_MEMORY_COMPRESSOR);
2135
2136 memcpy((char *)c_buffer, (char *)addr, c_size);
2137
2138 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2139 #if ENCRYPTED_SWAP
2140 vm_swap_decrypt(c_seg);
2141 #endif /* ENCRYPTED_SWAP */
2142 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2143 /*
2144 * returns with c_busy_swapping cleared
2145 */
2146 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2147
2148 goto swap_io_failed;
2149 }
2150 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2151
2152 lck_mtx_lock_spin_always(&c_seg->c_lock);
2153
2154 c_seg->c_swappedin = false;
2155
2156 assert(C_SEG_IS_ONDISK(c_seg));
2157 /*
2158 * The c_seg will now know about the new location on disk.
2159 */
2160 c_seg->c_store.c_swap_handle = f_offset;
2161
2162 assert(c_seg->c_busy_swapping);
2163 c_seg->c_busy_swapping = 0;
2164 swap_io_failed:
2165 assert(c_seg->c_busy);
2166 C_SEG_WAKEUP_DONE(c_seg);
2167
2168 lck_mtx_unlock_always(&c_seg->c_lock);
2169 lck_mtx_lock(&vm_swap_data_lock);
2170 }
2171
2172 if (swf->swp_nseginuse) {
2173 swf->swp_flags &= ~SWAP_RECLAIM;
2174 swf->swp_flags |= SWAP_READY;
2175
2176 goto done;
2177 }
2178 /*
2179 * We don't remove this inactive swf from the queue.
2180 * That way, we can re-use it when needed again and
2181 * preserve the namespace. The delayed_trim processing
2182 * is also dependent on us not removing swfs from the queue.
2183 */
2184 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2185
2186 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2187
2188 lck_mtx_unlock(&vm_swap_data_lock);
2189
2190 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2191
2192 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2193 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2194
2195 lck_mtx_lock(&vm_swap_data_lock);
2196
2197 if (swf->swp_flags & SWAP_PINNED) {
2198 vm_num_pinned_swap_files--;
2199 vm_swappin_avail += swf->swp_size;
2200 }
2201
2202 swf->swp_vp = NULL;
2203 swf->swp_size = 0;
2204 swf->swp_free_hint = 0;
2205 swf->swp_nsegs = 0;
2206 swf->swp_flags = SWAP_REUSE;
2207
2208 vm_num_swap_files--;
2209
2210 done:
2211 thread_wakeup((event_t) &swf->swp_flags);
2212 lck_mtx_unlock(&vm_swap_data_lock);
2213
2214 kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2215 }
2216
2217
2218 uint64_t
vm_swap_get_total_space(void)2219 vm_swap_get_total_space(void)
2220 {
2221 uint64_t total_space = 0;
2222
2223 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2224
2225 return total_space;
2226 }
2227
2228 uint64_t
vm_swap_get_used_space(void)2229 vm_swap_get_used_space(void)
2230 {
2231 uint64_t used_space = 0;
2232
2233 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2234
2235 return used_space;
2236 }
2237
2238 uint64_t
vm_swap_get_free_space(void)2239 vm_swap_get_free_space(void)
2240 {
2241 return vm_swap_get_total_space() - vm_swap_get_used_space();
2242 }
2243
2244 uint64_t
vm_swap_get_max_configured_space(void)2245 vm_swap_get_max_configured_space(void)
2246 {
2247 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2248 return num_swap_files * MAX_SWAP_FILE_SIZE;
2249 }
2250
2251 int
vm_swap_low_on_space(void)2252 vm_swap_low_on_space(void)
2253 {
2254 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2255 return 0;
2256 }
2257
2258 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2259 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2260 return 0;
2261 }
2262
2263 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2264 return 1;
2265 }
2266 }
2267 return 0;
2268 }
2269
2270 int
vm_swap_out_of_space(void)2271 vm_swap_out_of_space(void)
2272 {
2273 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2274 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2275 /*
2276 * Last swapfile and we have only space for the
2277 * last few swapouts.
2278 */
2279 return 1;
2280 }
2281
2282 return 0;
2283 }
2284
2285 boolean_t
vm_swap_files_pinned(void)2286 vm_swap_files_pinned(void)
2287 {
2288 boolean_t result;
2289
2290 if (vm_swappin_enabled == FALSE) {
2291 return TRUE;
2292 }
2293
2294 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2295
2296 return result;
2297 }
2298
2299 #if CONFIG_FREEZE
2300 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2301 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2302 {
2303 boolean_t use_device_value = FALSE;
2304 struct swapfile *swf = NULL;
2305
2306 if (vm_num_swap_files) {
2307 lck_mtx_lock(&vm_swap_data_lock);
2308
2309 swf = (struct swapfile*) queue_first(&swf_global_queue);
2310
2311 if (swf) {
2312 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2313 if (swf->swp_flags == SWAP_READY) {
2314 assert(swf->swp_vp);
2315
2316 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2317 use_device_value = TRUE;
2318 }
2319 break;
2320 }
2321 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2322 }
2323 }
2324
2325 lck_mtx_unlock(&vm_swap_data_lock);
2326 } else {
2327 /*
2328 * This block is used for the initial budget value before any swap files
2329 * are created. We create a temp swap file to get the budget.
2330 */
2331
2332 struct vnode *temp_vp = NULL;
2333
2334 vm_swapfile_open(swapfilename, &temp_vp);
2335
2336 if (temp_vp) {
2337 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2338 use_device_value = TRUE;
2339 }
2340
2341 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2342 temp_vp = NULL;
2343 } else {
2344 *freeze_daily_budget = 0;
2345 }
2346 }
2347
2348 return use_device_value;
2349 }
2350 #endif /* CONFIG_FREEZE */
2351
2352 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2353 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2354 {
2355 lck_mtx_lock(&vm_swap_data_lock);
2356
2357 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2358 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2359
2360 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2361 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2362
2363 lck_mtx_unlock(&vm_swap_data_lock);
2364 }
2365