xref: /xnu-8020.140.41/osfmk/vm/vm_compressor_backing_store.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32 
33 #include <IOKit/IOHibernatePrivate.h>
34 
35 #include <kern/policy_internal.h>
36 
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39 
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42  * launchd explicitly turns ON swap later during boot on macOS devices.
43  */
44 boolean_t       compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t       compressor_store_stop_compaction = FALSE;
47 #endif
48 
49 boolean_t       vm_swapfile_create_needed = FALSE;
50 boolean_t       vm_swapfile_gc_needed = FALSE;
51 
52 int             vm_swapper_throttle = -1;
53 uint64_t        vm_swapout_thread_id;
54 
55 uint64_t        vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t        vm_swap_get_failures = 0; /* Fatal */
57 uint64_t        vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int             vm_num_swap_files_config = 0;
59 int             vm_num_swap_files = 0;
60 int             vm_num_pinned_swap_files = 0;
61 int             vm_swapout_thread_processed_segments = 0;
62 int             vm_swapout_thread_awakened = 0;
63 bool            vm_swapout_thread_running = FALSE;
64 int             vm_swapfile_create_thread_awakened = 0;
65 int             vm_swapfile_create_thread_running = 0;
66 int             vm_swapfile_gc_thread_awakened = 0;
67 int             vm_swapfile_gc_thread_running = 0;
68 
69 int64_t         vm_swappin_avail = 0;
70 boolean_t       vm_swappin_enabled = FALSE;
71 unsigned int    vm_swapfile_total_segs_alloced = 0;
72 unsigned int    vm_swapfile_total_segs_alloced_max = 0;
73 unsigned int    vm_swapfile_total_segs_used = 0;
74 unsigned int    vm_swapfile_total_segs_used_max = 0;
75 
76 char            swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
77 
78 extern vm_map_t compressor_map;
79 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
80 
81 #define SWAP_READY      0x1     /* Swap file is ready to be used */
82 #define SWAP_RECLAIM    0x2     /* Swap file is marked to be reclaimed */
83 #define SWAP_WANTED     0x4     /* Swap file has waiters */
84 #define SWAP_REUSE      0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
85 #define SWAP_PINNED     0x10    /* Swap file is pinned (FusionDrive) */
86 
87 
88 struct swapfile {
89 	queue_head_t            swp_queue;      /* list of swap files */
90 	char                    *swp_path;      /* saved pathname of swap file */
91 	struct vnode            *swp_vp;        /* backing vnode */
92 	uint64_t                swp_size;       /* size of this swap file */
93 	uint8_t                 *swp_bitmap;    /* bitmap showing the alloced/freed slots in the swap file */
94 	unsigned int            swp_pathlen;    /* length of pathname */
95 	unsigned int            swp_nsegs;      /* #segments we can use */
96 	unsigned int            swp_nseginuse;  /* #segments in use */
97 	unsigned int            swp_index;      /* index of this swap file */
98 	unsigned int            swp_flags;      /* state of swap file */
99 	unsigned int            swp_free_hint;  /* offset of 1st free chunk */
100 	unsigned int            swp_io_count;   /* count of outstanding I/Os */
101 	c_segment_t             *swp_csegs;     /* back pointers to the c_segments. Used during swap reclaim. */
102 
103 	struct trim_list        *swp_delayed_trim_list_head;
104 	unsigned int            swp_delayed_trim_count;
105 };
106 
107 queue_head_t    swf_global_queue;
108 boolean_t       swp_trim_supported = FALSE;
109 
110 extern clock_sec_t      dont_trim_until_ts;
111 clock_sec_t             vm_swapfile_last_failed_to_create_ts = 0;
112 clock_sec_t             vm_swapfile_last_successful_create_ts = 0;
113 int                     vm_swapfile_can_be_created = FALSE;
114 boolean_t               delayed_trim_handling_in_progress = FALSE;
115 
116 boolean_t               hibernate_in_progress_with_pinned_swap = FALSE;
117 
118 static void vm_swapout_thread_throttle_adjust(void);
119 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
120 static void vm_swapout_thread(void);
121 static void vm_swapfile_create_thread(void);
122 static void vm_swapfile_gc_thread(void);
123 static void vm_swap_defragment(void);
124 static void vm_swap_handle_delayed_trims(boolean_t);
125 static void vm_swap_do_delayed_trim(struct swapfile *);
126 static void vm_swap_wait_on_trim_handling_in_progress(void);
127 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
128 
129 extern int vnode_getwithref(struct vnode* vp);
130 
131 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
132 
133 #if !XNU_TARGET_OS_OSX
134 
135 /*
136  * For CONFIG_FREEZE, we scale the c_segments_limit based on the
137  * number of swapfiles allowed. That increases wired memory overhead.
138  * So we want to keep the max swapfiles same on both DEV/RELEASE so
139  * that the memory overhead is similar for performance comparisons.
140  */
141 #define VM_MAX_SWAP_FILE_NUM            5
142 
143 #define VM_SWAPFILE_DELAYED_TRIM_MAX    4
144 
145 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
146 #define VM_SWAP_SHOULD_PIN(_size)       FALSE
147 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
148 	                                 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
149 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
150 
151 #else /* !XNU_TARGET_OS_OSX */
152 
153 #define VM_MAX_SWAP_FILE_NUM            100
154 #define VM_SWAPFILE_DELAYED_TRIM_MAX    128
155 
156 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
157 #define VM_SWAP_SHOULD_PIN(_size)       (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
158 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
159 	                                 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
160 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
161 
162 #endif /* !XNU_TARGET_OS_OSX */
163 
164 #define VM_SWAP_SHOULD_RECLAIM()        (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
165 #define VM_SWAP_SHOULD_ABORT_RECLAIM()  (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
166 #define VM_SWAPFILE_DELAYED_CREATE      15
167 
168 #define VM_SWAP_BUSY()  ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
169 
170 
171 #if CHECKSUM_THE_SWAP
172 extern unsigned int hash_string(char *cp, int len);
173 #endif
174 
175 #if RECORD_THE_COMPRESSED_DATA
176 boolean_t       c_compressed_record_init_done = FALSE;
177 int             c_compressed_record_write_error = 0;
178 struct vnode    *c_compressed_record_vp = NULL;
179 uint64_t        c_compressed_record_file_offset = 0;
180 void    c_compressed_record_init(void);
181 void    c_compressed_record_write(char *, int);
182 #endif
183 
184 extern void                     vm_pageout_io_throttle(void);
185 
186 static struct swapfile *vm_swapfile_for_handle(uint64_t);
187 
188 /*
189  * Called with the vm_swap_data_lock held.
190  */
191 
192 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)193 vm_swapfile_for_handle(uint64_t f_offset)
194 {
195 	uint64_t                file_offset = 0;
196 	unsigned int            swapfile_index = 0;
197 	struct swapfile*        swf = NULL;
198 
199 	file_offset = (f_offset & SWAP_SLOT_MASK);
200 	swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
201 
202 	swf = (struct swapfile*) queue_first(&swf_global_queue);
203 
204 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
205 		if (swapfile_index == swf->swp_index) {
206 			break;
207 		}
208 
209 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
210 	}
211 
212 	if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
213 		swf = NULL;
214 	}
215 
216 	return swf;
217 }
218 
219 #if ENCRYPTED_SWAP
220 
221 #include <libkern/crypto/aesxts.h>
222 
223 extern int cc_rand_generate(void *, size_t);     /* from libkern/cyrpto/rand.h> */
224 
225 boolean_t       swap_crypt_initialized;
226 void            swap_crypt_initialize(void);
227 
228 symmetric_xts   xts_modectx;
229 uint32_t        swap_crypt_key1[8];   /* big enough for a 256 bit random key */
230 uint32_t        swap_crypt_key2[8];   /* big enough for a 256 bit random key */
231 
232 #if DEVELOPMENT || DEBUG
233 boolean_t       swap_crypt_xts_tested = FALSE;
234 unsigned char   swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
235 unsigned char   swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
236 unsigned char   swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
237 #endif /* DEVELOPMENT || DEBUG */
238 
239 unsigned long   vm_page_encrypt_counter;
240 unsigned long   vm_page_decrypt_counter;
241 
242 
243 void
swap_crypt_initialize(void)244 swap_crypt_initialize(void)
245 {
246 	uint8_t  *enckey1, *enckey2;
247 	int      keylen1, keylen2;
248 	int      error;
249 
250 	assert(swap_crypt_initialized == FALSE);
251 
252 	keylen1 = sizeof(swap_crypt_key1);
253 	enckey1 = (uint8_t *)&swap_crypt_key1;
254 	keylen2 = sizeof(swap_crypt_key2);
255 	enckey2 = (uint8_t *)&swap_crypt_key2;
256 
257 	error = cc_rand_generate((void *)enckey1, keylen1);
258 	assert(!error);
259 
260 	error = cc_rand_generate((void *)enckey2, keylen2);
261 	assert(!error);
262 
263 	error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
264 	assert(!error);
265 
266 	swap_crypt_initialized = TRUE;
267 
268 #if DEVELOPMENT || DEBUG
269 	uint8_t *encptr;
270 	uint8_t *decptr;
271 	uint8_t *refptr;
272 	uint8_t *iv;
273 	uint64_t ivnum[2];
274 	int size = 0;
275 	int i    = 0;
276 	int rc   = 0;
277 
278 	assert(swap_crypt_xts_tested == FALSE);
279 
280 	/*
281 	 * Validate the encryption algorithms.
282 	 *
283 	 * First initialize the test data.
284 	 */
285 	for (i = 0; i < 4096; i++) {
286 		swap_crypt_test_page_ref[i] = (char) i;
287 	}
288 	ivnum[0] = (uint64_t)0xaa;
289 	ivnum[1] = 0;
290 	iv = (uint8_t *)ivnum;
291 
292 	refptr = (uint8_t *)swap_crypt_test_page_ref;
293 	encptr = (uint8_t *)swap_crypt_test_page_encrypt;
294 	decptr = (uint8_t *)swap_crypt_test_page_decrypt;
295 	size = 4096;
296 
297 	/* encrypt */
298 	rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
299 	assert(!rc);
300 
301 	/* compare result with original - should NOT match */
302 	for (i = 0; i < 4096; i++) {
303 		if (swap_crypt_test_page_encrypt[i] !=
304 		    swap_crypt_test_page_ref[i]) {
305 			break;
306 		}
307 	}
308 	assert(i != 4096);
309 
310 	/* decrypt */
311 	rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
312 	assert(!rc);
313 
314 	/* compare result with original */
315 	for (i = 0; i < 4096; i++) {
316 		if (swap_crypt_test_page_decrypt[i] !=
317 		    swap_crypt_test_page_ref[i]) {
318 			panic("encryption test failed");
319 		}
320 	}
321 	/* encrypt in place */
322 	rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
323 	assert(!rc);
324 
325 	/* decrypt in place */
326 	rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
327 	assert(!rc);
328 
329 	for (i = 0; i < 4096; i++) {
330 		if (swap_crypt_test_page_decrypt[i] !=
331 		    swap_crypt_test_page_ref[i]) {
332 			panic("in place encryption test failed");
333 		}
334 	}
335 	swap_crypt_xts_tested = TRUE;
336 #endif /* DEVELOPMENT || DEBUG */
337 }
338 
339 
340 void
vm_swap_encrypt(c_segment_t c_seg)341 vm_swap_encrypt(c_segment_t c_seg)
342 {
343 	uint8_t *ptr;
344 	uint8_t *iv;
345 	uint64_t ivnum[2];
346 	int size = 0;
347 	int rc   = 0;
348 
349 	if (swap_crypt_initialized == FALSE) {
350 		swap_crypt_initialize();
351 	}
352 
353 #if DEVELOPMENT || DEBUG
354 	C_SEG_MAKE_WRITEABLE(c_seg);
355 #endif
356 	ptr = (uint8_t *)c_seg->c_store.c_buffer;
357 	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
358 
359 	ivnum[0] = (uint64_t)c_seg;
360 	ivnum[1] = 0;
361 	iv = (uint8_t *)ivnum;
362 
363 	rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
364 	assert(!rc);
365 
366 	vm_page_encrypt_counter += (size / PAGE_SIZE_64);
367 
368 #if DEVELOPMENT || DEBUG
369 	C_SEG_WRITE_PROTECT(c_seg);
370 #endif
371 }
372 
373 void
vm_swap_decrypt(c_segment_t c_seg)374 vm_swap_decrypt(c_segment_t c_seg)
375 {
376 	uint8_t *ptr;
377 	uint8_t *iv;
378 	uint64_t ivnum[2];
379 	int size = 0;
380 	int rc   = 0;
381 
382 	assert(swap_crypt_initialized);
383 
384 #if DEVELOPMENT || DEBUG
385 	C_SEG_MAKE_WRITEABLE(c_seg);
386 #endif
387 	ptr = (uint8_t *)c_seg->c_store.c_buffer;
388 	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
389 
390 	ivnum[0] = (uint64_t)c_seg;
391 	ivnum[1] = 0;
392 	iv = (uint8_t *)ivnum;
393 
394 	rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
395 	assert(!rc);
396 
397 	vm_page_decrypt_counter += (size / PAGE_SIZE_64);
398 
399 #if DEVELOPMENT || DEBUG
400 	C_SEG_WRITE_PROTECT(c_seg);
401 #endif
402 }
403 #endif /* ENCRYPTED_SWAP */
404 
405 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
406 void
vm_compressor_swap_init(void)407 vm_compressor_swap_init(void)
408 {
409 	thread_t        thread = NULL;
410 
411 	queue_init(&swf_global_queue);
412 
413 #if !XNU_TARGET_OS_OSX
414 	/*
415 	 * dummy value until the swap file gets created
416 	 * when we drive the first c_segment_t to the
417 	 * swapout queue... at that time we will
418 	 * know the true size we have to work with
419 	 */
420 	c_overage_swapped_limit = 16;
421 #endif /* !XNU_TARGET_OS_OSX */
422 
423 	compressed_swap_chunk_size = c_seg_bufsize;
424 	vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
425 	swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
426 	swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
427 	vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
428 #if DEVELOPMENT || DEBUG
429 	typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
430 	if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
431 		if (parsed_vm_max_num_swap_files > 0) {
432 			vm_num_swap_files_config = parsed_vm_max_num_swap_files;
433 		} else {
434 			printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
435 		}
436 	}
437 #endif
438 	printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
439 
440 	if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
441 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
442 		panic("vm_swapout_thread: create failed");
443 	}
444 	thread_set_thread_name(thread, "VM_swapout");
445 	vm_swapout_thread_id = thread->thread_id;
446 	thread_deallocate(thread);
447 
448 	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
449 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
450 		panic("vm_swapfile_create_thread: create failed");
451 	}
452 	thread_set_thread_name(thread, "VM_swapfile_create");
453 	thread_deallocate(thread);
454 
455 	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
456 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
457 		panic("vm_swapfile_gc_thread: create failed");
458 	}
459 	thread_set_thread_name(thread, "VM_swapfile_gc");
460 	/*
461 	 * Swapfile garbage collection will need to allocate memory
462 	 * to complete its swap reclaim and in-memory compaction.
463 	 * So allow it to dip into the reserved VM page pool.
464 	 */
465 	thread_lock(thread);
466 	thread->options |= TH_OPT_VMPRIV;
467 	thread_unlock(thread);
468 	thread_deallocate(thread);
469 	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
470 	    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
471 	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
472 	    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
473 
474 	printf("VM Swap Subsystem is ON\n");
475 }
476 
477 
478 #if RECORD_THE_COMPRESSED_DATA
479 
480 void
c_compressed_record_init()481 c_compressed_record_init()
482 {
483 	if (c_compressed_record_init_done == FALSE) {
484 		vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
485 		c_compressed_record_init_done = TRUE;
486 	}
487 }
488 
489 void
c_compressed_record_write(char * buf,int size)490 c_compressed_record_write(char *buf, int size)
491 {
492 	if (c_compressed_record_write_error == 0) {
493 		c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
494 		c_compressed_record_file_offset += size;
495 	}
496 }
497 #endif
498 
499 
500 int             compaction_swapper_inited = 0;
501 
502 void
vm_compaction_swapper_do_init(void)503 vm_compaction_swapper_do_init(void)
504 {
505 	struct  vnode *vp;
506 	char    *pathname;
507 	int     namelen;
508 
509 	if (compaction_swapper_inited) {
510 		return;
511 	}
512 
513 	if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
514 		compaction_swapper_inited = 1;
515 		return;
516 	}
517 	lck_mtx_lock(&vm_swap_data_lock);
518 
519 	if (!compaction_swapper_inited) {
520 		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
521 		pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
522 		snprintf(pathname, namelen, "%s%d", swapfilename, 0);
523 
524 		vm_swapfile_open(pathname, &vp);
525 
526 		if (vp) {
527 			if (vnode_pager_isSSD(vp) == FALSE) {
528 				/*
529 				 * swap files live on an HDD, so let's make sure to start swapping
530 				 * much earlier since we're not worried about SSD write-wear and
531 				 * we have so little write bandwidth to work with
532 				 * these values were derived expermentially by running the performance
533 				 * teams stock test for evaluating HDD performance against various
534 				 * combinations and looking and comparing overall results.
535 				 * Note that the > relationship between these 4 values must be maintained
536 				 */
537 				if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
538 					vm_compressor_minorcompact_threshold_divisor = 15;
539 				}
540 				if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
541 					vm_compressor_majorcompact_threshold_divisor = 18;
542 				}
543 				if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
544 					vm_compressor_unthrottle_threshold_divisor = 24;
545 				}
546 				if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
547 					vm_compressor_catchup_threshold_divisor = 30;
548 				}
549 			}
550 #if XNU_TARGET_OS_OSX
551 			vnode_setswapmount(vp);
552 			vm_swappin_avail = vnode_getswappin_avail(vp);
553 
554 			if (vm_swappin_avail) {
555 				vm_swappin_enabled = TRUE;
556 			}
557 #endif /* XNU_TARGET_OS_OSX */
558 			vm_swapfile_close((uint64_t)pathname, vp);
559 		}
560 		kfree_data(pathname, namelen);
561 
562 		compaction_swapper_inited = 1;
563 	}
564 	lck_mtx_unlock(&vm_swap_data_lock);
565 }
566 
567 
568 void
vm_swap_consider_defragmenting(int flags)569 vm_swap_consider_defragmenting(int flags)
570 {
571 	boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
572 	boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
573 
574 	if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
575 	    (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
576 		if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
577 			lck_mtx_lock(&vm_swap_data_lock);
578 
579 			if (force_defrag) {
580 				vm_swap_force_defrag = TRUE;
581 			}
582 
583 			if (force_reclaim) {
584 				vm_swap_force_reclaim = TRUE;
585 			}
586 
587 			if (!vm_swapfile_gc_thread_running) {
588 				thread_wakeup((event_t) &vm_swapfile_gc_needed);
589 			}
590 
591 			lck_mtx_unlock(&vm_swap_data_lock);
592 		}
593 	}
594 }
595 
596 
597 int vm_swap_defragment_yielded = 0;
598 int vm_swap_defragment_swapin = 0;
599 int vm_swap_defragment_free = 0;
600 int vm_swap_defragment_busy = 0;
601 
602 #if CONFIG_FREEZE
603 extern uint32_t c_segment_pages_compressed_incore;
604 extern uint32_t c_segment_pages_compressed_nearing_limit;
605 extern uint32_t c_segment_count;
606 extern uint32_t c_segments_nearing_limit;
607 
608 boolean_t       memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
609 
610 extern bool freezer_incore_cseg_acct;
611 #endif /* CONFIG_FREEZE */
612 
613 static void
vm_swap_defragment()614 vm_swap_defragment()
615 {
616 	c_segment_t     c_seg;
617 
618 	/*
619 	 * have to grab the master lock w/o holding
620 	 * any locks in spin mode
621 	 */
622 	PAGE_REPLACEMENT_DISALLOWED(TRUE);
623 
624 	lck_mtx_lock_spin_always(c_list_lock);
625 
626 	while (!queue_empty(&c_swappedout_sparse_list_head)) {
627 		if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
628 			vm_swap_defragment_yielded++;
629 			break;
630 		}
631 		c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
632 
633 		lck_mtx_lock_spin_always(&c_seg->c_lock);
634 
635 		assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
636 
637 		if (c_seg->c_busy) {
638 			lck_mtx_unlock_always(c_list_lock);
639 
640 			PAGE_REPLACEMENT_DISALLOWED(FALSE);
641 			/*
642 			 * c_seg_wait_on_busy consumes c_seg->c_lock
643 			 */
644 			c_seg_wait_on_busy(c_seg);
645 
646 			PAGE_REPLACEMENT_DISALLOWED(TRUE);
647 
648 			lck_mtx_lock_spin_always(c_list_lock);
649 
650 			vm_swap_defragment_busy++;
651 			continue;
652 		}
653 		if (c_seg->c_bytes_used == 0) {
654 			/*
655 			 * c_seg_free_locked consumes the c_list_lock
656 			 * and c_seg->c_lock
657 			 */
658 			C_SEG_BUSY(c_seg);
659 			c_seg_free_locked(c_seg);
660 
661 			vm_swap_defragment_free++;
662 		} else {
663 			lck_mtx_unlock_always(c_list_lock);
664 
665 #if CONFIG_FREEZE
666 			if (freezer_incore_cseg_acct) {
667 				if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
668 					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
669 				}
670 
671 				uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
672 				if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
673 					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
674 				}
675 			}
676 #endif /* CONFIG_FREEZE */
677 			if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
678 				lck_mtx_unlock_always(&c_seg->c_lock);
679 				vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
680 			}
681 
682 			vm_swap_defragment_swapin++;
683 		}
684 		PAGE_REPLACEMENT_DISALLOWED(FALSE);
685 
686 		vm_pageout_io_throttle();
687 
688 		/*
689 		 * because write waiters have privilege over readers,
690 		 * dropping and immediately retaking the master lock will
691 		 * still allow any thread waiting to acquire the
692 		 * master lock exclusively an opportunity to take it
693 		 */
694 		PAGE_REPLACEMENT_DISALLOWED(TRUE);
695 
696 		lck_mtx_lock_spin_always(c_list_lock);
697 	}
698 	lck_mtx_unlock_always(c_list_lock);
699 
700 	PAGE_REPLACEMENT_DISALLOWED(FALSE);
701 }
702 
703 
704 bool vm_swapfile_create_thread_inited = false;
705 static void
vm_swapfile_create_thread(void)706 vm_swapfile_create_thread(void)
707 {
708 	clock_sec_t     sec;
709 	clock_nsec_t    nsec;
710 
711 	if (!vm_swapfile_create_thread_inited) {
712 #if CONFIG_THREAD_GROUPS
713 		thread_group_vm_add();
714 #endif /* CONFIG_THREAD_GROUPS */
715 		current_thread()->options |= TH_OPT_VMPRIV;
716 		vm_swapfile_create_thread_inited = true;
717 	}
718 
719 	vm_swapfile_create_thread_awakened++;
720 	vm_swapfile_create_thread_running = 1;
721 
722 	while (TRUE) {
723 		/*
724 		 * walk through the list of swap files
725 		 * and do the delayed frees/trims for
726 		 * any swap file whose count of delayed
727 		 * frees is above the batch limit
728 		 */
729 		vm_swap_handle_delayed_trims(FALSE);
730 
731 		lck_mtx_lock(&vm_swap_data_lock);
732 
733 		if (hibernate_in_progress_with_pinned_swap == TRUE) {
734 			break;
735 		}
736 
737 		if (compressor_store_stop_compaction == TRUE) {
738 			break;
739 		}
740 
741 		clock_get_system_nanotime(&sec, &nsec);
742 
743 		if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
744 			break;
745 		}
746 
747 		lck_mtx_unlock(&vm_swap_data_lock);
748 
749 		if (vm_swap_create_file() == FALSE) {
750 			vm_swapfile_last_failed_to_create_ts = sec;
751 			HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
752 		} else {
753 			vm_swapfile_last_successful_create_ts = sec;
754 		}
755 	}
756 	vm_swapfile_create_thread_running = 0;
757 
758 	if (hibernate_in_progress_with_pinned_swap == TRUE) {
759 		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
760 	}
761 
762 	if (compressor_store_stop_compaction == TRUE) {
763 		thread_wakeup((event_t)&compressor_store_stop_compaction);
764 	}
765 
766 	assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
767 
768 	lck_mtx_unlock(&vm_swap_data_lock);
769 
770 	thread_block((thread_continue_t)vm_swapfile_create_thread);
771 
772 	/* NOTREACHED */
773 }
774 
775 
776 #if HIBERNATION
777 
778 kern_return_t
hibernate_pin_swap(boolean_t start)779 hibernate_pin_swap(boolean_t start)
780 {
781 	vm_compaction_swapper_do_init();
782 
783 	if (start == FALSE) {
784 		lck_mtx_lock(&vm_swap_data_lock);
785 		hibernate_in_progress_with_pinned_swap = FALSE;
786 		lck_mtx_unlock(&vm_swap_data_lock);
787 
788 		return KERN_SUCCESS;
789 	}
790 	if (vm_swappin_enabled == FALSE) {
791 		return KERN_SUCCESS;
792 	}
793 
794 	lck_mtx_lock(&vm_swap_data_lock);
795 
796 	hibernate_in_progress_with_pinned_swap = TRUE;
797 
798 	while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
799 		assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
800 
801 		lck_mtx_unlock(&vm_swap_data_lock);
802 
803 		thread_block(THREAD_CONTINUE_NULL);
804 
805 		lck_mtx_lock(&vm_swap_data_lock);
806 	}
807 	if (vm_num_swap_files > vm_num_pinned_swap_files) {
808 		hibernate_in_progress_with_pinned_swap = FALSE;
809 		lck_mtx_unlock(&vm_swap_data_lock);
810 
811 		HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
812 		    vm_num_swap_files, vm_num_pinned_swap_files);
813 		return KERN_FAILURE;
814 	}
815 	lck_mtx_unlock(&vm_swap_data_lock);
816 
817 	while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
818 		if (vm_swap_create_file() == FALSE) {
819 			break;
820 		}
821 	}
822 	return KERN_SUCCESS;
823 }
824 #endif
825 bool vm_swapfile_gc_thread_inited = false;
826 static void
vm_swapfile_gc_thread(void)827 vm_swapfile_gc_thread(void)
828 {
829 	boolean_t       need_defragment;
830 	boolean_t       need_reclaim;
831 
832 	if (!vm_swapfile_gc_thread_inited) {
833 #if CONFIG_THREAD_GROUPS
834 		thread_group_vm_add();
835 #endif /* CONFIG_THREAD_GROUPS */
836 		vm_swapfile_gc_thread_inited = true;
837 	}
838 
839 	vm_swapfile_gc_thread_awakened++;
840 	vm_swapfile_gc_thread_running = 1;
841 
842 	while (TRUE) {
843 		lck_mtx_lock(&vm_swap_data_lock);
844 
845 		if (hibernate_in_progress_with_pinned_swap == TRUE) {
846 			break;
847 		}
848 
849 		if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
850 			break;
851 		}
852 
853 		need_defragment = FALSE;
854 		need_reclaim = FALSE;
855 
856 		if (VM_SWAP_SHOULD_DEFRAGMENT()) {
857 			need_defragment = TRUE;
858 		}
859 
860 		if (VM_SWAP_SHOULD_RECLAIM()) {
861 			need_defragment = TRUE;
862 			need_reclaim = TRUE;
863 		}
864 		if (need_defragment == FALSE && need_reclaim == FALSE) {
865 			break;
866 		}
867 
868 		vm_swap_force_defrag = FALSE;
869 		vm_swap_force_reclaim = FALSE;
870 
871 		lck_mtx_unlock(&vm_swap_data_lock);
872 
873 		if (need_defragment == TRUE) {
874 			vm_swap_defragment();
875 		}
876 		if (need_reclaim == TRUE) {
877 			vm_swap_reclaim();
878 		}
879 	}
880 	vm_swapfile_gc_thread_running = 0;
881 
882 	if (hibernate_in_progress_with_pinned_swap == TRUE) {
883 		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
884 	}
885 
886 	if (compressor_store_stop_compaction == TRUE) {
887 		thread_wakeup((event_t)&compressor_store_stop_compaction);
888 	}
889 
890 	assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
891 
892 	lck_mtx_unlock(&vm_swap_data_lock);
893 
894 	thread_block((thread_continue_t)vm_swapfile_gc_thread);
895 
896 	/* NOTREACHED */
897 }
898 
899 
900 
901 #define   VM_SWAPOUT_LIMIT_T2P  4
902 #define   VM_SWAPOUT_LIMIT_T1P  4
903 #define   VM_SWAPOUT_LIMIT_T0P  6
904 #define   VM_SWAPOUT_LIMIT_T0   8
905 #define   VM_SWAPOUT_LIMIT_MAX  8
906 
907 #define   VM_SWAPOUT_START      0
908 #define   VM_SWAPOUT_T2_PASSIVE 1
909 #define   VM_SWAPOUT_T1_PASSIVE 2
910 #define   VM_SWAPOUT_T0_PASSIVE 3
911 #define   VM_SWAPOUT_T0         4
912 
913 int vm_swapout_state = VM_SWAPOUT_START;
914 int vm_swapout_limit = 1;
915 
916 int vm_swapper_entered_T0  = 0;
917 int vm_swapper_entered_T0P = 0;
918 int vm_swapper_entered_T1P = 0;
919 int vm_swapper_entered_T2P = 0;
920 
921 
922 static void
vm_swapout_thread_throttle_adjust(void)923 vm_swapout_thread_throttle_adjust(void)
924 {
925 	switch (vm_swapout_state) {
926 	case VM_SWAPOUT_START:
927 
928 		vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
929 		vm_swapper_entered_T2P++;
930 
931 		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
932 		    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
933 		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
934 		    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
935 		vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
936 		vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
937 
938 		break;
939 
940 	case VM_SWAPOUT_T2_PASSIVE:
941 
942 		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
943 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
944 			vm_swapper_entered_T0P++;
945 
946 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
947 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
948 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
949 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
950 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
951 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
952 
953 			break;
954 		}
955 		if (swapout_target_age || hibernate_flushing == TRUE) {
956 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
957 			vm_swapper_entered_T1P++;
958 
959 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
960 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
961 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
962 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
963 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
964 			vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
965 		}
966 		break;
967 
968 	case VM_SWAPOUT_T1_PASSIVE:
969 
970 		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
971 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
972 			vm_swapper_entered_T0P++;
973 
974 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
975 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
976 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
977 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
978 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
979 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
980 
981 			break;
982 		}
983 		if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
984 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
985 			vm_swapper_entered_T2P++;
986 
987 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
988 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
989 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
990 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
991 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
992 			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
993 		}
994 		break;
995 
996 	case VM_SWAPOUT_T0_PASSIVE:
997 
998 		if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
999 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1000 			vm_swapper_entered_T2P++;
1001 
1002 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1003 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1004 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1005 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1006 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1007 			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1008 
1009 			break;
1010 		}
1011 		if (SWAPPER_NEEDS_TO_CATCHUP()) {
1012 			vm_swapper_entered_T0++;
1013 
1014 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1015 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1016 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1017 			vm_swapout_state = VM_SWAPOUT_T0;
1018 		}
1019 		break;
1020 
1021 	case VM_SWAPOUT_T0:
1022 
1023 		if (SWAPPER_HAS_CAUGHTUP()) {
1024 			vm_swapper_entered_T0P++;
1025 
1026 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1027 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1028 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1029 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1030 		}
1031 		break;
1032 	}
1033 }
1034 
1035 int vm_swapout_found_empty = 0;
1036 
1037 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1038 
1039 int vm_swapout_soc_busy = 0;
1040 int vm_swapout_soc_done = 0;
1041 
1042 
1043 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1044 vm_swapout_find_free_soc(void)
1045 {
1046 	int      i;
1047 
1048 	for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1049 		if (vm_swapout_ctx[i].swp_io_busy == 0) {
1050 			return &vm_swapout_ctx[i];
1051 		}
1052 	}
1053 	assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1054 
1055 	return NULL;
1056 }
1057 
1058 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1059 vm_swapout_find_done_soc(void)
1060 {
1061 	int      i;
1062 
1063 	if (vm_swapout_soc_done) {
1064 		for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1065 			if (vm_swapout_ctx[i].swp_io_done) {
1066 				return &vm_swapout_ctx[i];
1067 			}
1068 		}
1069 	}
1070 	return NULL;
1071 }
1072 
1073 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1074 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1075 {
1076 	kern_return_t  kr;
1077 
1078 	if (soc->swp_io_error) {
1079 		kr = KERN_FAILURE;
1080 	} else {
1081 		kr = KERN_SUCCESS;
1082 	}
1083 
1084 	lck_mtx_unlock_always(c_list_lock);
1085 
1086 	vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1087 	vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1088 
1089 	lck_mtx_lock_spin_always(c_list_lock);
1090 
1091 	soc->swp_io_done = 0;
1092 	soc->swp_io_busy = 0;
1093 
1094 	vm_swapout_soc_busy--;
1095 	vm_swapout_soc_done--;
1096 }
1097 
1098 bool vm_swapout_thread_inited = false;
1099 static void
vm_swapout_thread(void)1100 vm_swapout_thread(void)
1101 {
1102 	uint32_t        size = 0;
1103 	c_segment_t     c_seg = NULL;
1104 	kern_return_t   kr = KERN_SUCCESS;
1105 	struct swapout_io_completion *soc;
1106 
1107 	if (!vm_swapout_thread_inited) {
1108 #if CONFIG_THREAD_GROUPS
1109 		thread_group_vm_add();
1110 #endif /* CONFIG_THREAD_GROUPS */
1111 		current_thread()->options |= TH_OPT_VMPRIV;
1112 		vm_swapout_thread_inited = true;
1113 	}
1114 
1115 	vm_swapout_thread_awakened++;
1116 
1117 	lck_mtx_lock_spin_always(c_list_lock);
1118 
1119 	vm_swapout_thread_running = TRUE;
1120 again:
1121 	while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1122 		c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1123 
1124 		lck_mtx_lock_spin_always(&c_seg->c_lock);
1125 
1126 		assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1127 
1128 		if (c_seg->c_busy) {
1129 			lck_mtx_unlock_always(c_list_lock);
1130 
1131 			c_seg_wait_on_busy(c_seg);
1132 
1133 			lck_mtx_lock_spin_always(c_list_lock);
1134 
1135 			continue;
1136 		}
1137 		vm_swapout_thread_processed_segments++;
1138 
1139 		size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1140 
1141 		if (size == 0) {
1142 			assert(c_seg->c_bytes_used == 0);
1143 
1144 			if (!c_seg->c_on_minorcompact_q) {
1145 				c_seg_need_delayed_compaction(c_seg, TRUE);
1146 			}
1147 
1148 			c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1149 			lck_mtx_unlock_always(&c_seg->c_lock);
1150 			lck_mtx_unlock_always(c_list_lock);
1151 
1152 			vm_swapout_found_empty++;
1153 			goto c_seg_is_empty;
1154 		}
1155 		C_SEG_BUSY(c_seg);
1156 		c_seg->c_busy_swapping = 1;
1157 
1158 		c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1159 
1160 		lck_mtx_unlock_always(c_list_lock);
1161 		lck_mtx_unlock_always(&c_seg->c_lock);
1162 
1163 #if CHECKSUM_THE_SWAP
1164 		c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1165 		c_seg->cseg_swap_size = size;
1166 #endif /* CHECKSUM_THE_SWAP */
1167 
1168 #if ENCRYPTED_SWAP
1169 		vm_swap_encrypt(c_seg);
1170 #endif /* ENCRYPTED_SWAP */
1171 
1172 		soc = vm_swapout_find_free_soc();
1173 		assert(soc);
1174 
1175 		soc->swp_upl_ctx.io_context = (void *)soc;
1176 		soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1177 		soc->swp_upl_ctx.io_error = 0;
1178 
1179 		kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1180 
1181 		if (kr != KERN_SUCCESS) {
1182 			if (soc->swp_io_done) {
1183 				lck_mtx_lock_spin_always(c_list_lock);
1184 
1185 				soc->swp_io_done = 0;
1186 				vm_swapout_soc_done--;
1187 
1188 				lck_mtx_unlock_always(c_list_lock);
1189 			}
1190 			vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1191 		} else {
1192 			soc->swp_io_busy = 1;
1193 			vm_swapout_soc_busy++;
1194 		}
1195 
1196 c_seg_is_empty:
1197 		if (c_swapout_count == 0) {
1198 			vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1199 		}
1200 
1201 		lck_mtx_lock_spin_always(c_list_lock);
1202 
1203 		while ((soc = vm_swapout_find_done_soc())) {
1204 			vm_swapout_complete_soc(soc);
1205 		}
1206 		lck_mtx_unlock_always(c_list_lock);
1207 
1208 		vm_swapout_thread_throttle_adjust();
1209 
1210 		lck_mtx_lock_spin_always(c_list_lock);
1211 	}
1212 	while ((soc = vm_swapout_find_done_soc())) {
1213 		vm_swapout_complete_soc(soc);
1214 	}
1215 	lck_mtx_unlock_always(c_list_lock);
1216 
1217 	vm_pageout_io_throttle();
1218 
1219 	lck_mtx_lock_spin_always(c_list_lock);
1220 
1221 	/*
1222 	 * Recheck if we have some c_segs to wakeup
1223 	 * post throttle. And, check to see if we
1224 	 * have any more swapouts needed.
1225 	 */
1226 	if (vm_swapout_soc_done) {
1227 		goto again;
1228 	}
1229 
1230 	assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1231 
1232 	vm_swapout_thread_running = FALSE;
1233 
1234 	lck_mtx_unlock_always(c_list_lock);
1235 
1236 	thread_block((thread_continue_t)vm_swapout_thread);
1237 
1238 	/* NOTREACHED */
1239 }
1240 
1241 
1242 void
vm_swapout_iodone(void * io_context,int error)1243 vm_swapout_iodone(void *io_context, int error)
1244 {
1245 	struct swapout_io_completion *soc;
1246 
1247 	soc = (struct swapout_io_completion *)io_context;
1248 
1249 	lck_mtx_lock_spin_always(c_list_lock);
1250 
1251 	soc->swp_io_done = 1;
1252 	soc->swp_io_error = error;
1253 	vm_swapout_soc_done++;
1254 
1255 	if (!vm_swapout_thread_running) {
1256 		thread_wakeup((event_t)&c_swapout_list_head);
1257 	}
1258 
1259 	lck_mtx_unlock_always(c_list_lock);
1260 }
1261 
1262 
1263 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1264 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1265 {
1266 	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1267 
1268 	if (kr == KERN_SUCCESS) {
1269 		kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
1270 		    KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1271 	}
1272 #if ENCRYPTED_SWAP
1273 	else {
1274 		vm_swap_decrypt(c_seg);
1275 	}
1276 #endif /* ENCRYPTED_SWAP */
1277 	lck_mtx_lock_spin_always(c_list_lock);
1278 	lck_mtx_lock_spin_always(&c_seg->c_lock);
1279 
1280 	if (kr == KERN_SUCCESS) {
1281 		int             new_state = C_ON_SWAPPEDOUT_Q;
1282 		boolean_t       insert_head = FALSE;
1283 
1284 		if (hibernate_flushing == TRUE) {
1285 			if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1286 			    c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1287 				insert_head = TRUE;
1288 			}
1289 		} else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1290 			new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1291 		}
1292 
1293 		c_seg_switch_state(c_seg, new_state, insert_head);
1294 
1295 		c_seg->c_store.c_swap_handle = f_offset;
1296 
1297 		counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1298 
1299 		c_seg->c_swappedin = false;
1300 
1301 		if (c_seg->c_bytes_used) {
1302 			OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1303 		}
1304 
1305 #if CONFIG_FREEZE
1306 		/*
1307 		 * Successful swapout. Decrement the in-core compressed pages count.
1308 		 */
1309 		OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1310 		assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1311 #endif /* CONFIG_FREEZE */
1312 	} else {
1313 		if (c_seg->c_overage_swap == TRUE) {
1314 			c_seg->c_overage_swap = FALSE;
1315 			c_overage_swapped_count--;
1316 		}
1317 
1318 #if CONFIG_FREEZE
1319 		if (c_seg->c_task_owner) {
1320 			c_seg_update_task_owner(c_seg, NULL);
1321 		}
1322 #endif /* CONFIG_FREEZE */
1323 
1324 		c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1325 
1326 		if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1327 			c_seg_need_delayed_compaction(c_seg, TRUE);
1328 		}
1329 	}
1330 	assert(c_seg->c_busy_swapping);
1331 	assert(c_seg->c_busy);
1332 
1333 	c_seg->c_busy_swapping = 0;
1334 	lck_mtx_unlock_always(c_list_lock);
1335 
1336 	C_SEG_WAKEUP_DONE(c_seg);
1337 	lck_mtx_unlock_always(&c_seg->c_lock);
1338 
1339 	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1340 }
1341 
1342 
1343 boolean_t
vm_swap_create_file()1344 vm_swap_create_file()
1345 {
1346 	uint64_t        size = 0;
1347 	int             namelen = 0;
1348 	boolean_t       swap_file_created = FALSE;
1349 	boolean_t       swap_file_reuse = FALSE;
1350 	boolean_t       swap_file_pin = FALSE;
1351 	struct swapfile *swf = NULL;
1352 
1353 	/*
1354 	 * make sure we've got all the info we need
1355 	 * to potentially pin a swap file... we could
1356 	 * be swapping out due to hibernation w/o ever
1357 	 * having run vm_pageout_scan, which is normally
1358 	 * the trigger to do the init
1359 	 */
1360 	vm_compaction_swapper_do_init();
1361 
1362 	/*
1363 	 * Any swapfile structure ready for re-use?
1364 	 */
1365 
1366 	lck_mtx_lock(&vm_swap_data_lock);
1367 
1368 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1369 
1370 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1371 		if (swf->swp_flags == SWAP_REUSE) {
1372 			swap_file_reuse = TRUE;
1373 			break;
1374 		}
1375 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1376 	}
1377 
1378 	lck_mtx_unlock(&vm_swap_data_lock);
1379 
1380 	if (swap_file_reuse == FALSE) {
1381 		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1382 
1383 		swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1384 		swf->swp_index = vm_num_swap_files + 1;
1385 		swf->swp_pathlen = namelen;
1386 		swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1387 
1388 		snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1389 	}
1390 
1391 	vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1392 
1393 	if (swf->swp_vp == NULL) {
1394 		if (swap_file_reuse == FALSE) {
1395 			kfree_data(swf->swp_path, swf->swp_pathlen);
1396 			kfree_type(struct swapfile, swf);
1397 		}
1398 		return FALSE;
1399 	}
1400 	vm_swapfile_can_be_created = TRUE;
1401 
1402 	size = MAX_SWAP_FILE_SIZE;
1403 
1404 	while (size >= MIN_SWAP_FILE_SIZE) {
1405 		swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1406 
1407 		if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1408 			int num_bytes_for_bitmap = 0;
1409 
1410 			swap_file_created = TRUE;
1411 
1412 			swf->swp_size = size;
1413 			swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1414 			swf->swp_nseginuse = 0;
1415 			swf->swp_free_hint = 0;
1416 
1417 			num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1418 			/*
1419 			 * Allocate a bitmap that describes the
1420 			 * number of segments held by this swapfile.
1421 			 */
1422 			swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1423 			    Z_WAITOK | Z_ZERO);
1424 
1425 			swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1426 			    Z_WAITOK | Z_ZERO);
1427 
1428 			/*
1429 			 * passing a NULL trim_list into vnode_trim_list
1430 			 * will return ENOTSUP if trim isn't supported
1431 			 * and 0 if it is
1432 			 */
1433 			if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1434 				swp_trim_supported = TRUE;
1435 			}
1436 
1437 			lck_mtx_lock(&vm_swap_data_lock);
1438 
1439 			swf->swp_flags = SWAP_READY;
1440 
1441 			if (swap_file_reuse == FALSE) {
1442 				queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1443 			}
1444 
1445 			vm_num_swap_files++;
1446 
1447 			vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1448 			if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1449 				vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1450 			}
1451 
1452 			if (swap_file_pin == TRUE) {
1453 				vm_num_pinned_swap_files++;
1454 				swf->swp_flags |= SWAP_PINNED;
1455 				vm_swappin_avail -= swf->swp_size;
1456 			}
1457 
1458 			lck_mtx_unlock(&vm_swap_data_lock);
1459 
1460 			thread_wakeup((event_t) &vm_num_swap_files);
1461 #if !XNU_TARGET_OS_OSX
1462 			if (vm_num_swap_files == 1) {
1463 				c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1464 
1465 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1466 					c_overage_swapped_limit /= 2;
1467 				}
1468 			}
1469 #endif /* !XNU_TARGET_OS_OSX */
1470 			break;
1471 		} else {
1472 			size = size / 2;
1473 		}
1474 	}
1475 	if (swap_file_created == FALSE) {
1476 		vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1477 
1478 		swf->swp_vp = NULL;
1479 
1480 		if (swap_file_reuse == FALSE) {
1481 			kfree_data(swf->swp_path, swf->swp_pathlen);
1482 			kfree_type(struct swapfile, swf);
1483 		}
1484 	}
1485 	return swap_file_created;
1486 }
1487 
1488 extern void vnode_put(struct vnode* vp);
1489 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1490 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1491 {
1492 	struct swapfile *swf = NULL;
1493 	uint64_t        file_offset = 0;
1494 	int             retval = 0;
1495 
1496 	assert(c_seg->c_store.c_buffer);
1497 
1498 	lck_mtx_lock(&vm_swap_data_lock);
1499 
1500 	swf = vm_swapfile_for_handle(f_offset);
1501 
1502 	if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1503 		vm_swap_get_failures++;
1504 		retval = 1;
1505 		goto done;
1506 	}
1507 	swf->swp_io_count++;
1508 
1509 	lck_mtx_unlock(&vm_swap_data_lock);
1510 
1511 #if DEVELOPMENT || DEBUG
1512 	C_SEG_MAKE_WRITEABLE(c_seg);
1513 #endif
1514 	file_offset = (f_offset & SWAP_SLOT_MASK);
1515 
1516 	if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1517 		printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1518 	} else {
1519 		retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1520 		vnode_put(swf->swp_vp);
1521 	}
1522 
1523 #if DEVELOPMENT || DEBUG
1524 	C_SEG_WRITE_PROTECT(c_seg);
1525 #endif
1526 	if (retval == 0) {
1527 		counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1528 	} else {
1529 		vm_swap_get_failures++;
1530 	}
1531 
1532 	/*
1533 	 * Free this slot in the swap structure.
1534 	 */
1535 	vm_swap_free(f_offset);
1536 
1537 	lck_mtx_lock(&vm_swap_data_lock);
1538 	swf->swp_io_count--;
1539 
1540 	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1541 		swf->swp_flags &= ~SWAP_WANTED;
1542 		thread_wakeup((event_t) &swf->swp_flags);
1543 	}
1544 done:
1545 	lck_mtx_unlock(&vm_swap_data_lock);
1546 
1547 	if (retval == 0) {
1548 		return KERN_SUCCESS;
1549 	} else {
1550 		return KERN_FAILURE;
1551 	}
1552 }
1553 
1554 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1555 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1556 {
1557 	unsigned int    segidx = 0;
1558 	struct swapfile *swf = NULL;
1559 	uint64_t        file_offset = 0;
1560 	uint64_t        swapfile_index = 0;
1561 	unsigned int    byte_for_segidx = 0;
1562 	unsigned int    offset_within_byte = 0;
1563 	boolean_t       swf_eligible = FALSE;
1564 	boolean_t       waiting = FALSE;
1565 	boolean_t       retried = FALSE;
1566 	int             error = 0;
1567 	clock_sec_t     sec;
1568 	clock_nsec_t    nsec;
1569 	void            *upl_ctx = NULL;
1570 	boolean_t       drop_iocount = FALSE;
1571 
1572 	if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1573 		return KERN_FAILURE;
1574 	}
1575 retry:
1576 	lck_mtx_lock(&vm_swap_data_lock);
1577 
1578 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1579 
1580 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1581 		segidx = swf->swp_free_hint;
1582 
1583 		swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1584 
1585 		if (swf_eligible) {
1586 			while (segidx < swf->swp_nsegs) {
1587 				byte_for_segidx = segidx >> 3;
1588 				offset_within_byte = segidx % 8;
1589 
1590 				if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1591 					segidx++;
1592 					continue;
1593 				}
1594 
1595 				(swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1596 
1597 				file_offset = segidx * compressed_swap_chunk_size;
1598 				swf->swp_nseginuse++;
1599 				swf->swp_io_count++;
1600 				swf->swp_csegs[segidx] = c_seg;
1601 
1602 				swapfile_index = swf->swp_index;
1603 				vm_swapfile_total_segs_used++;
1604 				if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1605 					vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1606 				}
1607 
1608 				clock_get_system_nanotime(&sec, &nsec);
1609 
1610 				if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1611 					thread_wakeup((event_t) &vm_swapfile_create_needed);
1612 				}
1613 
1614 				lck_mtx_unlock(&vm_swap_data_lock);
1615 
1616 				goto issue_io;
1617 			}
1618 		}
1619 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1620 	}
1621 	assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1622 
1623 	/*
1624 	 * we've run out of swap segments, but may not
1625 	 * be in a position to immediately create a new swap
1626 	 * file if we've recently failed to create due to a lack
1627 	 * of free space in the root filesystem... we'll try
1628 	 * to kick that create off, but in any event we're going
1629 	 * to take a breather (up to 1 second) so that we're not caught in a tight
1630 	 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1631 	 * segments into swap files only to have them immediately put back
1632 	 * on the c_age queue due to vm_swap_put failing.
1633 	 *
1634 	 * if we're doing these puts due to a hibernation flush,
1635 	 * no need to block... setting hibernate_no_swapspace to TRUE,
1636 	 * will cause "vm_compressor_compact_and_swap" to immediately abort
1637 	 */
1638 	clock_get_system_nanotime(&sec, &nsec);
1639 
1640 	if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1641 		thread_wakeup((event_t) &vm_swapfile_create_needed);
1642 	}
1643 
1644 	if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1645 		waiting = TRUE;
1646 		assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1647 	} else {
1648 		hibernate_no_swapspace = TRUE;
1649 	}
1650 
1651 	lck_mtx_unlock(&vm_swap_data_lock);
1652 
1653 	if (waiting == TRUE) {
1654 		thread_block(THREAD_CONTINUE_NULL);
1655 
1656 		if (retried == FALSE && hibernate_flushing == TRUE) {
1657 			retried = TRUE;
1658 			goto retry;
1659 		}
1660 	}
1661 	vm_swap_put_failures_no_swap_file++;
1662 
1663 	return KERN_FAILURE;
1664 
1665 issue_io:
1666 	assert(c_seg->c_busy_swapping);
1667 	assert(c_seg->c_busy);
1668 	assert(!c_seg->c_on_minorcompact_q);
1669 
1670 	*f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1671 
1672 	if (soc) {
1673 		soc->swp_c_seg = c_seg;
1674 		soc->swp_c_size = size;
1675 
1676 		soc->swp_swf = swf;
1677 
1678 		soc->swp_io_error = 0;
1679 		soc->swp_io_done = 0;
1680 
1681 		upl_ctx = (void *)&soc->swp_upl_ctx;
1682 	}
1683 
1684 	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1685 		printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1686 	} else {
1687 		error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1688 		drop_iocount = TRUE;
1689 	}
1690 
1691 	if (error || upl_ctx == NULL) {
1692 		return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1693 	}
1694 
1695 	return KERN_SUCCESS;
1696 }
1697 
1698 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1699 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1700 {
1701 	if (drop_iocount) {
1702 		vnode_put(swf->swp_vp);
1703 	}
1704 
1705 	lck_mtx_lock(&vm_swap_data_lock);
1706 
1707 	swf->swp_io_count--;
1708 
1709 	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1710 		swf->swp_flags &= ~SWAP_WANTED;
1711 		thread_wakeup((event_t) &swf->swp_flags);
1712 	}
1713 	lck_mtx_unlock(&vm_swap_data_lock);
1714 
1715 	if (error) {
1716 		vm_swap_free(*f_offset);
1717 		vm_swap_put_failures++;
1718 
1719 		return KERN_FAILURE;
1720 	}
1721 	return KERN_SUCCESS;
1722 }
1723 
1724 
1725 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1726 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1727 {
1728 	uint64_t        file_offset = 0;
1729 	unsigned int    segidx = 0;
1730 
1731 
1732 	if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1733 		unsigned int byte_for_segidx = 0;
1734 		unsigned int offset_within_byte = 0;
1735 
1736 		file_offset = (f_offset & SWAP_SLOT_MASK);
1737 		segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1738 
1739 		byte_for_segidx = segidx >> 3;
1740 		offset_within_byte = segidx % 8;
1741 
1742 		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1743 			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1744 
1745 			swf->swp_csegs[segidx] = NULL;
1746 
1747 			swf->swp_nseginuse--;
1748 			vm_swapfile_total_segs_used--;
1749 
1750 			if (segidx < swf->swp_free_hint) {
1751 				swf->swp_free_hint = segidx;
1752 			}
1753 		}
1754 		if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1755 			thread_wakeup((event_t) &vm_swapfile_gc_needed);
1756 		}
1757 	}
1758 }
1759 
1760 
1761 uint32_t vm_swap_free_now_count = 0;
1762 uint32_t vm_swap_free_delayed_count = 0;
1763 
1764 
1765 void
vm_swap_free(uint64_t f_offset)1766 vm_swap_free(uint64_t f_offset)
1767 {
1768 	struct swapfile *swf = NULL;
1769 	struct trim_list *tl = NULL;
1770 	clock_sec_t     sec;
1771 	clock_nsec_t    nsec;
1772 
1773 	if (swp_trim_supported == TRUE) {
1774 		tl = kalloc_type(struct trim_list, Z_WAITOK);
1775 	}
1776 
1777 	lck_mtx_lock(&vm_swap_data_lock);
1778 
1779 	swf = vm_swapfile_for_handle(f_offset);
1780 
1781 	if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1782 		if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1783 			/*
1784 			 * don't delay the free if the underlying disk doesn't support
1785 			 * trim, or we're in the midst of reclaiming this swap file since
1786 			 * we don't want to move segments that are technically free
1787 			 * but not yet handled by the delayed free mechanism
1788 			 */
1789 			vm_swap_free_now(swf, f_offset);
1790 
1791 			vm_swap_free_now_count++;
1792 			goto done;
1793 		}
1794 		tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1795 		tl->tl_length = compressed_swap_chunk_size;
1796 
1797 		tl->tl_next = swf->swp_delayed_trim_list_head;
1798 		swf->swp_delayed_trim_list_head = tl;
1799 		swf->swp_delayed_trim_count++;
1800 		tl = NULL;
1801 
1802 		if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1803 			clock_get_system_nanotime(&sec, &nsec);
1804 
1805 			if (sec > dont_trim_until_ts) {
1806 				thread_wakeup((event_t) &vm_swapfile_create_needed);
1807 			}
1808 		}
1809 		vm_swap_free_delayed_count++;
1810 	}
1811 done:
1812 	lck_mtx_unlock(&vm_swap_data_lock);
1813 
1814 	if (tl != NULL) {
1815 		kfree_type(struct trim_list, tl);
1816 	}
1817 }
1818 
1819 
1820 static void
vm_swap_wait_on_trim_handling_in_progress()1821 vm_swap_wait_on_trim_handling_in_progress()
1822 {
1823 	while (delayed_trim_handling_in_progress == TRUE) {
1824 		assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1825 		lck_mtx_unlock(&vm_swap_data_lock);
1826 
1827 		thread_block(THREAD_CONTINUE_NULL);
1828 
1829 		lck_mtx_lock(&vm_swap_data_lock);
1830 	}
1831 }
1832 
1833 
1834 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1835 vm_swap_handle_delayed_trims(boolean_t force_now)
1836 {
1837 	struct swapfile *swf = NULL;
1838 
1839 	/*
1840 	 * serialize the race between us and vm_swap_reclaim...
1841 	 * if vm_swap_reclaim wins it will turn off SWAP_READY
1842 	 * on the victim it has chosen... we can just skip over
1843 	 * that file since vm_swap_reclaim will first process
1844 	 * all of the delayed trims associated with it
1845 	 */
1846 
1847 	if (compressor_store_stop_compaction == TRUE) {
1848 		return;
1849 	}
1850 
1851 	lck_mtx_lock(&vm_swap_data_lock);
1852 
1853 	delayed_trim_handling_in_progress = TRUE;
1854 
1855 	lck_mtx_unlock(&vm_swap_data_lock);
1856 
1857 	/*
1858 	 * no need to hold the lock to walk the swf list since
1859 	 * vm_swap_create (the only place where we add to this list)
1860 	 * is run on the same thread as this function
1861 	 * and vm_swap_reclaim doesn't remove items from this list
1862 	 * instead marking them with SWAP_REUSE for future re-use
1863 	 */
1864 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1865 
1866 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1867 		if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1868 			assert(!(swf->swp_flags & SWAP_RECLAIM));
1869 			vm_swap_do_delayed_trim(swf);
1870 		}
1871 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1872 	}
1873 	lck_mtx_lock(&vm_swap_data_lock);
1874 
1875 	delayed_trim_handling_in_progress = FALSE;
1876 	thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1877 
1878 	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1879 		thread_wakeup((event_t) &vm_swapfile_gc_needed);
1880 	}
1881 
1882 	lck_mtx_unlock(&vm_swap_data_lock);
1883 }
1884 
1885 static void
vm_swap_do_delayed_trim(struct swapfile * swf)1886 vm_swap_do_delayed_trim(struct swapfile *swf)
1887 {
1888 	struct trim_list *tl, *tl_head;
1889 	int error;
1890 
1891 	if (compressor_store_stop_compaction == TRUE) {
1892 		return;
1893 	}
1894 
1895 	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1896 		printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1897 		return;
1898 	}
1899 
1900 	lck_mtx_lock(&vm_swap_data_lock);
1901 
1902 	tl_head = swf->swp_delayed_trim_list_head;
1903 	swf->swp_delayed_trim_list_head = NULL;
1904 	swf->swp_delayed_trim_count = 0;
1905 
1906 	lck_mtx_unlock(&vm_swap_data_lock);
1907 
1908 	vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1909 
1910 	(void) vnode_put(swf->swp_vp);
1911 
1912 	while ((tl = tl_head) != NULL) {
1913 		unsigned int    segidx = 0;
1914 		unsigned int    byte_for_segidx = 0;
1915 		unsigned int    offset_within_byte = 0;
1916 
1917 		lck_mtx_lock(&vm_swap_data_lock);
1918 
1919 		segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
1920 
1921 		byte_for_segidx = segidx >> 3;
1922 		offset_within_byte = segidx % 8;
1923 
1924 		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1925 			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1926 
1927 			swf->swp_csegs[segidx] = NULL;
1928 
1929 			swf->swp_nseginuse--;
1930 			vm_swapfile_total_segs_used--;
1931 
1932 			if (segidx < swf->swp_free_hint) {
1933 				swf->swp_free_hint = segidx;
1934 			}
1935 		}
1936 		lck_mtx_unlock(&vm_swap_data_lock);
1937 
1938 		tl_head = tl->tl_next;
1939 
1940 		kfree_type(struct trim_list, tl);
1941 	}
1942 }
1943 
1944 
1945 void
vm_swap_flush()1946 vm_swap_flush()
1947 {
1948 	return;
1949 }
1950 
1951 int     vm_swap_reclaim_yielded = 0;
1952 
1953 void
vm_swap_reclaim(void)1954 vm_swap_reclaim(void)
1955 {
1956 	vm_offset_t     addr = 0;
1957 	unsigned int    segidx = 0;
1958 	uint64_t        f_offset = 0;
1959 	struct swapfile *swf = NULL;
1960 	struct swapfile *smallest_swf = NULL;
1961 	unsigned int    min_nsegs = 0;
1962 	unsigned int    byte_for_segidx = 0;
1963 	unsigned int    offset_within_byte = 0;
1964 	uint32_t        c_size = 0;
1965 
1966 	c_segment_t     c_seg = NULL;
1967 
1968 	kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
1969 	    KMA_NOFAIL | KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
1970 
1971 	lck_mtx_lock(&vm_swap_data_lock);
1972 
1973 	/*
1974 	 * if we're running the swapfile list looking for
1975 	 * candidates with delayed trims, we need to
1976 	 * wait before making our decision concerning
1977 	 * the swapfile we want to reclaim
1978 	 */
1979 	vm_swap_wait_on_trim_handling_in_progress();
1980 
1981 	/*
1982 	 * from here until we knock down the SWAP_READY bit,
1983 	 * we need to remain behind the vm_swap_data_lock...
1984 	 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1985 	 * will not consider this swapfile for processing
1986 	 */
1987 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1988 	min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
1989 	smallest_swf = NULL;
1990 
1991 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1992 		if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1993 			smallest_swf = swf;
1994 			min_nsegs = swf->swp_nseginuse;
1995 		}
1996 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1997 	}
1998 
1999 	if (smallest_swf == NULL) {
2000 		goto done;
2001 	}
2002 
2003 	swf = smallest_swf;
2004 
2005 
2006 	swf->swp_flags &= ~SWAP_READY;
2007 	swf->swp_flags |= SWAP_RECLAIM;
2008 
2009 	if (swf->swp_delayed_trim_count) {
2010 		lck_mtx_unlock(&vm_swap_data_lock);
2011 
2012 		vm_swap_do_delayed_trim(swf);
2013 
2014 		lck_mtx_lock(&vm_swap_data_lock);
2015 	}
2016 	segidx = 0;
2017 
2018 	while (segidx < swf->swp_nsegs) {
2019 ReTry_for_cseg:
2020 		/*
2021 		 * Wait for outgoing I/Os.
2022 		 */
2023 		while (swf->swp_io_count) {
2024 			swf->swp_flags |= SWAP_WANTED;
2025 
2026 			assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2027 			lck_mtx_unlock(&vm_swap_data_lock);
2028 
2029 			thread_block(THREAD_CONTINUE_NULL);
2030 
2031 			lck_mtx_lock(&vm_swap_data_lock);
2032 		}
2033 		if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2034 			vm_swap_reclaim_yielded++;
2035 			break;
2036 		}
2037 
2038 		byte_for_segidx = segidx >> 3;
2039 		offset_within_byte = segidx % 8;
2040 
2041 		if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2042 			segidx++;
2043 			continue;
2044 		}
2045 
2046 		c_seg = swf->swp_csegs[segidx];
2047 		assert(c_seg);
2048 
2049 		lck_mtx_lock_spin_always(&c_seg->c_lock);
2050 
2051 		if (c_seg->c_busy) {
2052 			/*
2053 			 * a swapped out c_segment in the process of being freed will remain in the
2054 			 * busy state until after the vm_swap_free is called on it... vm_swap_free
2055 			 * takes the vm_swap_data_lock, so can't change the swap state until after
2056 			 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2057 			 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2058 			 * at that point, we re-look up the swap state which will now indicate that
2059 			 * this c_segment no longer exists.
2060 			 */
2061 			c_seg->c_wanted = 1;
2062 
2063 			assert_wait((event_t) (c_seg), THREAD_UNINT);
2064 			lck_mtx_unlock_always(&c_seg->c_lock);
2065 
2066 			lck_mtx_unlock(&vm_swap_data_lock);
2067 
2068 			thread_block(THREAD_CONTINUE_NULL);
2069 
2070 			lck_mtx_lock(&vm_swap_data_lock);
2071 
2072 			goto ReTry_for_cseg;
2073 		}
2074 		(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2075 
2076 		f_offset = segidx * compressed_swap_chunk_size;
2077 
2078 		assert(c_seg == swf->swp_csegs[segidx]);
2079 		swf->swp_csegs[segidx] = NULL;
2080 		swf->swp_nseginuse--;
2081 
2082 		vm_swapfile_total_segs_used--;
2083 
2084 		lck_mtx_unlock(&vm_swap_data_lock);
2085 
2086 		assert(C_SEG_IS_ONDISK(c_seg));
2087 
2088 		C_SEG_BUSY(c_seg);
2089 		c_seg->c_busy_swapping = 1;
2090 #if !CHECKSUM_THE_SWAP
2091 		c_seg_trim_tail(c_seg);
2092 #endif
2093 		c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2094 
2095 		assert(c_size <= c_seg_bufsize && c_size);
2096 
2097 		lck_mtx_unlock_always(&c_seg->c_lock);
2098 
2099 		if (vnode_getwithref(swf->swp_vp)) {
2100 			printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2101 			vm_swap_get_failures++;
2102 			goto swap_io_failed;
2103 		} else {
2104 			if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2105 				/*
2106 				 * reading the data back in failed, so convert c_seg
2107 				 * to a swapped in c_segment that contains no data
2108 				 */
2109 				c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2110 				/*
2111 				 * returns with c_busy_swapping cleared
2112 				 */
2113 				vnode_put(swf->swp_vp);
2114 				vm_swap_get_failures++;
2115 				goto swap_io_failed;
2116 			}
2117 			vnode_put(swf->swp_vp);
2118 		}
2119 
2120 		counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2121 		vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2122 
2123 		if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2124 			vm_offset_t     c_buffer;
2125 
2126 			/*
2127 			 * the put failed, so convert c_seg to a fully swapped in c_segment
2128 			 * with valid data
2129 			 */
2130 			c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2131 
2132 			kernel_memory_populate(c_buffer, c_size,
2133 			    KMA_NOFAIL | KMA_COMPRESSOR,
2134 			    VM_KERN_MEMORY_COMPRESSOR);
2135 
2136 			memcpy((char *)c_buffer, (char *)addr, c_size);
2137 
2138 			c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2139 #if ENCRYPTED_SWAP
2140 			vm_swap_decrypt(c_seg);
2141 #endif /* ENCRYPTED_SWAP */
2142 			c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2143 			/*
2144 			 * returns with c_busy_swapping cleared
2145 			 */
2146 			OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2147 
2148 			goto swap_io_failed;
2149 		}
2150 		counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2151 
2152 		lck_mtx_lock_spin_always(&c_seg->c_lock);
2153 
2154 		c_seg->c_swappedin = false;
2155 
2156 		assert(C_SEG_IS_ONDISK(c_seg));
2157 		/*
2158 		 * The c_seg will now know about the new location on disk.
2159 		 */
2160 		c_seg->c_store.c_swap_handle = f_offset;
2161 
2162 		assert(c_seg->c_busy_swapping);
2163 		c_seg->c_busy_swapping = 0;
2164 swap_io_failed:
2165 		assert(c_seg->c_busy);
2166 		C_SEG_WAKEUP_DONE(c_seg);
2167 
2168 		lck_mtx_unlock_always(&c_seg->c_lock);
2169 		lck_mtx_lock(&vm_swap_data_lock);
2170 	}
2171 
2172 	if (swf->swp_nseginuse) {
2173 		swf->swp_flags &= ~SWAP_RECLAIM;
2174 		swf->swp_flags |= SWAP_READY;
2175 
2176 		goto done;
2177 	}
2178 	/*
2179 	 * We don't remove this inactive swf from the queue.
2180 	 * That way, we can re-use it when needed again and
2181 	 * preserve the namespace. The delayed_trim processing
2182 	 * is also dependent on us not removing swfs from the queue.
2183 	 */
2184 	//queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2185 
2186 	vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2187 
2188 	lck_mtx_unlock(&vm_swap_data_lock);
2189 
2190 	vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2191 
2192 	kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2193 	kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2194 
2195 	lck_mtx_lock(&vm_swap_data_lock);
2196 
2197 	if (swf->swp_flags & SWAP_PINNED) {
2198 		vm_num_pinned_swap_files--;
2199 		vm_swappin_avail += swf->swp_size;
2200 	}
2201 
2202 	swf->swp_vp = NULL;
2203 	swf->swp_size = 0;
2204 	swf->swp_free_hint = 0;
2205 	swf->swp_nsegs = 0;
2206 	swf->swp_flags = SWAP_REUSE;
2207 
2208 	vm_num_swap_files--;
2209 
2210 done:
2211 	thread_wakeup((event_t) &swf->swp_flags);
2212 	lck_mtx_unlock(&vm_swap_data_lock);
2213 
2214 	kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2215 }
2216 
2217 
2218 uint64_t
vm_swap_get_total_space(void)2219 vm_swap_get_total_space(void)
2220 {
2221 	uint64_t total_space = 0;
2222 
2223 	total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2224 
2225 	return total_space;
2226 }
2227 
2228 uint64_t
vm_swap_get_used_space(void)2229 vm_swap_get_used_space(void)
2230 {
2231 	uint64_t used_space = 0;
2232 
2233 	used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2234 
2235 	return used_space;
2236 }
2237 
2238 uint64_t
vm_swap_get_free_space(void)2239 vm_swap_get_free_space(void)
2240 {
2241 	return vm_swap_get_total_space() - vm_swap_get_used_space();
2242 }
2243 
2244 uint64_t
vm_swap_get_max_configured_space(void)2245 vm_swap_get_max_configured_space(void)
2246 {
2247 	int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2248 	return num_swap_files * MAX_SWAP_FILE_SIZE;
2249 }
2250 
2251 int
vm_swap_low_on_space(void)2252 vm_swap_low_on_space(void)
2253 {
2254 	if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2255 		return 0;
2256 	}
2257 
2258 	if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2259 		if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2260 			return 0;
2261 		}
2262 
2263 		if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2264 			return 1;
2265 		}
2266 	}
2267 	return 0;
2268 }
2269 
2270 int
vm_swap_out_of_space(void)2271 vm_swap_out_of_space(void)
2272 {
2273 	if ((vm_num_swap_files == vm_num_swap_files_config) &&
2274 	    ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2275 		/*
2276 		 * Last swapfile and we have only space for the
2277 		 * last few swapouts.
2278 		 */
2279 		return 1;
2280 	}
2281 
2282 	return 0;
2283 }
2284 
2285 boolean_t
vm_swap_files_pinned(void)2286 vm_swap_files_pinned(void)
2287 {
2288 	boolean_t result;
2289 
2290 	if (vm_swappin_enabled == FALSE) {
2291 		return TRUE;
2292 	}
2293 
2294 	result = (vm_num_pinned_swap_files == vm_num_swap_files);
2295 
2296 	return result;
2297 }
2298 
2299 #if CONFIG_FREEZE
2300 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2301 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2302 {
2303 	boolean_t       use_device_value = FALSE;
2304 	struct swapfile *swf = NULL;
2305 
2306 	if (vm_num_swap_files) {
2307 		lck_mtx_lock(&vm_swap_data_lock);
2308 
2309 		swf = (struct swapfile*) queue_first(&swf_global_queue);
2310 
2311 		if (swf) {
2312 			while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2313 				if (swf->swp_flags == SWAP_READY) {
2314 					assert(swf->swp_vp);
2315 
2316 					if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2317 						use_device_value = TRUE;
2318 					}
2319 					break;
2320 				}
2321 				swf = (struct swapfile*) queue_next(&swf->swp_queue);
2322 			}
2323 		}
2324 
2325 		lck_mtx_unlock(&vm_swap_data_lock);
2326 	} else {
2327 		/*
2328 		 * This block is used for the initial budget value before any swap files
2329 		 * are created. We create a temp swap file to get the budget.
2330 		 */
2331 
2332 		struct vnode *temp_vp = NULL;
2333 
2334 		vm_swapfile_open(swapfilename, &temp_vp);
2335 
2336 		if (temp_vp) {
2337 			if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2338 				use_device_value = TRUE;
2339 			}
2340 
2341 			vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2342 			temp_vp = NULL;
2343 		} else {
2344 			*freeze_daily_budget = 0;
2345 		}
2346 	}
2347 
2348 	return use_device_value;
2349 }
2350 #endif /* CONFIG_FREEZE */
2351 
2352 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2353 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2354 {
2355 	lck_mtx_lock(&vm_swap_data_lock);
2356 
2357 	*alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2358 	*used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2359 
2360 	vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2361 	vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2362 
2363 	lck_mtx_unlock(&vm_swap_data_lock);
2364 }
2365