xref: /xnu-8019.80.24/osfmk/vm/vm_compressor_backing_store.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32 
33 #include <IOKit/IOHibernatePrivate.h>
34 
35 #include <kern/policy_internal.h>
36 
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39 
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42  * launchd explicitly turns ON swap later during boot on macOS devices.
43  */
44 boolean_t       compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t       compressor_store_stop_compaction = FALSE;
47 #endif
48 
49 boolean_t       vm_swapfile_create_needed = FALSE;
50 boolean_t       vm_swapfile_gc_needed = FALSE;
51 
52 int             vm_swapper_throttle = -1;
53 uint64_t        vm_swapout_thread_id;
54 
55 uint64_t        vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t        vm_swap_get_failures = 0; /* Fatal */
57 uint64_t        vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int             vm_num_swap_files_config = 0;
59 int             vm_num_swap_files = 0;
60 int             vm_num_pinned_swap_files = 0;
61 int             vm_swapout_thread_processed_segments = 0;
62 int             vm_swapout_thread_awakened = 0;
63 bool            vm_swapout_thread_running = FALSE;
64 int             vm_swapfile_create_thread_awakened = 0;
65 int             vm_swapfile_create_thread_running = 0;
66 int             vm_swapfile_gc_thread_awakened = 0;
67 int             vm_swapfile_gc_thread_running = 0;
68 
69 int64_t         vm_swappin_avail = 0;
70 boolean_t       vm_swappin_enabled = FALSE;
71 unsigned int    vm_swapfile_total_segs_alloced = 0;
72 unsigned int    vm_swapfile_total_segs_alloced_max = 0;
73 unsigned int    vm_swapfile_total_segs_used = 0;
74 unsigned int    vm_swapfile_total_segs_used_max = 0;
75 
76 char            swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
77 
78 extern vm_map_t compressor_map;
79 extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
80 
81 #define SWAP_READY      0x1     /* Swap file is ready to be used */
82 #define SWAP_RECLAIM    0x2     /* Swap file is marked to be reclaimed */
83 #define SWAP_WANTED     0x4     /* Swap file has waiters */
84 #define SWAP_REUSE      0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
85 #define SWAP_PINNED     0x10    /* Swap file is pinned (FusionDrive) */
86 
87 
88 struct swapfile {
89 	queue_head_t            swp_queue;      /* list of swap files */
90 	char                    *swp_path;      /* saved pathname of swap file */
91 	struct vnode            *swp_vp;        /* backing vnode */
92 	uint64_t                swp_size;       /* size of this swap file */
93 	uint8_t                 *swp_bitmap;    /* bitmap showing the alloced/freed slots in the swap file */
94 	unsigned int            swp_pathlen;    /* length of pathname */
95 	unsigned int            swp_nsegs;      /* #segments we can use */
96 	unsigned int            swp_nseginuse;  /* #segments in use */
97 	unsigned int            swp_index;      /* index of this swap file */
98 	unsigned int            swp_flags;      /* state of swap file */
99 	unsigned int            swp_free_hint;  /* offset of 1st free chunk */
100 	unsigned int            swp_io_count;   /* count of outstanding I/Os */
101 	c_segment_t             *swp_csegs;     /* back pointers to the c_segments. Used during swap reclaim. */
102 
103 	struct trim_list        *swp_delayed_trim_list_head;
104 	unsigned int            swp_delayed_trim_count;
105 };
106 
107 queue_head_t    swf_global_queue;
108 boolean_t       swp_trim_supported = FALSE;
109 
110 extern clock_sec_t      dont_trim_until_ts;
111 clock_sec_t             vm_swapfile_last_failed_to_create_ts = 0;
112 clock_sec_t             vm_swapfile_last_successful_create_ts = 0;
113 int                     vm_swapfile_can_be_created = FALSE;
114 boolean_t               delayed_trim_handling_in_progress = FALSE;
115 
116 boolean_t               hibernate_in_progress_with_pinned_swap = FALSE;
117 
118 static void vm_swapout_thread_throttle_adjust(void);
119 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
120 static void vm_swapout_thread(void);
121 static void vm_swapfile_create_thread(void);
122 static void vm_swapfile_gc_thread(void);
123 static void vm_swap_defragment(void);
124 static void vm_swap_handle_delayed_trims(boolean_t);
125 static void vm_swap_do_delayed_trim(struct swapfile *);
126 static void vm_swap_wait_on_trim_handling_in_progress(void);
127 static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
128 
129 extern int vnode_getwithref(struct vnode* vp);
130 
131 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
132 
133 #if !XNU_TARGET_OS_OSX
134 
135 /*
136  * For CONFIG_FREEZE, we scale the c_segments_limit based on the
137  * number of swapfiles allowed. That increases wired memory overhead.
138  * So we want to keep the max swapfiles same on both DEV/RELEASE so
139  * that the memory overhead is similar for performance comparisons.
140  */
141 #define VM_MAX_SWAP_FILE_NUM            5
142 
143 #define VM_SWAPFILE_DELAYED_TRIM_MAX    4
144 
145 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
146 #define VM_SWAP_SHOULD_PIN(_size)       FALSE
147 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
148 	                                 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
149 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
150 
151 #else /* !XNU_TARGET_OS_OSX */
152 
153 #define VM_MAX_SWAP_FILE_NUM            100
154 #define VM_SWAPFILE_DELAYED_TRIM_MAX    128
155 
156 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
157 #define VM_SWAP_SHOULD_PIN(_size)       (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
158 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
159 	                                 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
160 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
161 
162 #endif /* !XNU_TARGET_OS_OSX */
163 
164 #define VM_SWAP_SHOULD_RECLAIM()        (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
165 #define VM_SWAP_SHOULD_ABORT_RECLAIM()  (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
166 #define VM_SWAPFILE_DELAYED_CREATE      15
167 
168 #define VM_SWAP_BUSY()  ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
169 
170 
171 #if CHECKSUM_THE_SWAP
172 extern unsigned int hash_string(char *cp, int len);
173 #endif
174 
175 #if RECORD_THE_COMPRESSED_DATA
176 boolean_t       c_compressed_record_init_done = FALSE;
177 int             c_compressed_record_write_error = 0;
178 struct vnode    *c_compressed_record_vp = NULL;
179 uint64_t        c_compressed_record_file_offset = 0;
180 void    c_compressed_record_init(void);
181 void    c_compressed_record_write(char *, int);
182 #endif
183 
184 extern void                     vm_pageout_io_throttle(void);
185 
186 static struct swapfile *vm_swapfile_for_handle(uint64_t);
187 
188 /*
189  * Called with the vm_swap_data_lock held.
190  */
191 
192 static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)193 vm_swapfile_for_handle(uint64_t f_offset)
194 {
195 	uint64_t                file_offset = 0;
196 	unsigned int            swapfile_index = 0;
197 	struct swapfile*        swf = NULL;
198 
199 	file_offset = (f_offset & SWAP_SLOT_MASK);
200 	swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
201 
202 	swf = (struct swapfile*) queue_first(&swf_global_queue);
203 
204 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
205 		if (swapfile_index == swf->swp_index) {
206 			break;
207 		}
208 
209 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
210 	}
211 
212 	if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
213 		swf = NULL;
214 	}
215 
216 	return swf;
217 }
218 
219 #if ENCRYPTED_SWAP
220 
221 #include <libkern/crypto/aesxts.h>
222 
223 extern int cc_rand_generate(void *, size_t);     /* from libkern/cyrpto/rand.h> */
224 
225 boolean_t       swap_crypt_initialized;
226 void            swap_crypt_initialize(void);
227 
228 symmetric_xts   xts_modectx;
229 uint32_t        swap_crypt_key1[8];   /* big enough for a 256 bit random key */
230 uint32_t        swap_crypt_key2[8];   /* big enough for a 256 bit random key */
231 
232 #if DEVELOPMENT || DEBUG
233 boolean_t       swap_crypt_xts_tested = FALSE;
234 unsigned char   swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
235 unsigned char   swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
236 unsigned char   swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
237 #endif /* DEVELOPMENT || DEBUG */
238 
239 unsigned long   vm_page_encrypt_counter;
240 unsigned long   vm_page_decrypt_counter;
241 
242 
243 void
swap_crypt_initialize(void)244 swap_crypt_initialize(void)
245 {
246 	uint8_t  *enckey1, *enckey2;
247 	int      keylen1, keylen2;
248 	int      error;
249 
250 	assert(swap_crypt_initialized == FALSE);
251 
252 	keylen1 = sizeof(swap_crypt_key1);
253 	enckey1 = (uint8_t *)&swap_crypt_key1;
254 	keylen2 = sizeof(swap_crypt_key2);
255 	enckey2 = (uint8_t *)&swap_crypt_key2;
256 
257 	error = cc_rand_generate((void *)enckey1, keylen1);
258 	assert(!error);
259 
260 	error = cc_rand_generate((void *)enckey2, keylen2);
261 	assert(!error);
262 
263 	error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
264 	assert(!error);
265 
266 	swap_crypt_initialized = TRUE;
267 
268 #if DEVELOPMENT || DEBUG
269 	uint8_t *encptr;
270 	uint8_t *decptr;
271 	uint8_t *refptr;
272 	uint8_t *iv;
273 	uint64_t ivnum[2];
274 	int size = 0;
275 	int i    = 0;
276 	int rc   = 0;
277 
278 	assert(swap_crypt_xts_tested == FALSE);
279 
280 	/*
281 	 * Validate the encryption algorithms.
282 	 *
283 	 * First initialize the test data.
284 	 */
285 	for (i = 0; i < 4096; i++) {
286 		swap_crypt_test_page_ref[i] = (char) i;
287 	}
288 	ivnum[0] = (uint64_t)0xaa;
289 	ivnum[1] = 0;
290 	iv = (uint8_t *)ivnum;
291 
292 	refptr = (uint8_t *)swap_crypt_test_page_ref;
293 	encptr = (uint8_t *)swap_crypt_test_page_encrypt;
294 	decptr = (uint8_t *)swap_crypt_test_page_decrypt;
295 	size = 4096;
296 
297 	/* encrypt */
298 	rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
299 	assert(!rc);
300 
301 	/* compare result with original - should NOT match */
302 	for (i = 0; i < 4096; i++) {
303 		if (swap_crypt_test_page_encrypt[i] !=
304 		    swap_crypt_test_page_ref[i]) {
305 			break;
306 		}
307 	}
308 	assert(i != 4096);
309 
310 	/* decrypt */
311 	rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
312 	assert(!rc);
313 
314 	/* compare result with original */
315 	for (i = 0; i < 4096; i++) {
316 		if (swap_crypt_test_page_decrypt[i] !=
317 		    swap_crypt_test_page_ref[i]) {
318 			panic("encryption test failed");
319 		}
320 	}
321 	/* encrypt in place */
322 	rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
323 	assert(!rc);
324 
325 	/* decrypt in place */
326 	rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
327 	assert(!rc);
328 
329 	for (i = 0; i < 4096; i++) {
330 		if (swap_crypt_test_page_decrypt[i] !=
331 		    swap_crypt_test_page_ref[i]) {
332 			panic("in place encryption test failed");
333 		}
334 	}
335 	swap_crypt_xts_tested = TRUE;
336 #endif /* DEVELOPMENT || DEBUG */
337 }
338 
339 
340 void
vm_swap_encrypt(c_segment_t c_seg)341 vm_swap_encrypt(c_segment_t c_seg)
342 {
343 	uint8_t *ptr;
344 	uint8_t *iv;
345 	uint64_t ivnum[2];
346 	int size = 0;
347 	int rc   = 0;
348 
349 	if (swap_crypt_initialized == FALSE) {
350 		swap_crypt_initialize();
351 	}
352 
353 #if DEVELOPMENT || DEBUG
354 	C_SEG_MAKE_WRITEABLE(c_seg);
355 #endif
356 	ptr = (uint8_t *)c_seg->c_store.c_buffer;
357 	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
358 
359 	ivnum[0] = (uint64_t)c_seg;
360 	ivnum[1] = 0;
361 	iv = (uint8_t *)ivnum;
362 
363 	rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
364 	assert(!rc);
365 
366 	vm_page_encrypt_counter += (size / PAGE_SIZE_64);
367 
368 #if DEVELOPMENT || DEBUG
369 	C_SEG_WRITE_PROTECT(c_seg);
370 #endif
371 }
372 
373 void
vm_swap_decrypt(c_segment_t c_seg)374 vm_swap_decrypt(c_segment_t c_seg)
375 {
376 	uint8_t *ptr;
377 	uint8_t *iv;
378 	uint64_t ivnum[2];
379 	int size = 0;
380 	int rc   = 0;
381 
382 	assert(swap_crypt_initialized);
383 
384 #if DEVELOPMENT || DEBUG
385 	C_SEG_MAKE_WRITEABLE(c_seg);
386 #endif
387 	ptr = (uint8_t *)c_seg->c_store.c_buffer;
388 	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
389 
390 	ivnum[0] = (uint64_t)c_seg;
391 	ivnum[1] = 0;
392 	iv = (uint8_t *)ivnum;
393 
394 	rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
395 	assert(!rc);
396 
397 	vm_page_decrypt_counter += (size / PAGE_SIZE_64);
398 
399 #if DEVELOPMENT || DEBUG
400 	C_SEG_WRITE_PROTECT(c_seg);
401 #endif
402 }
403 #endif /* ENCRYPTED_SWAP */
404 
405 uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
406 void
vm_compressor_swap_init()407 vm_compressor_swap_init()
408 {
409 	thread_t        thread = NULL;
410 
411 	queue_init(&swf_global_queue);
412 
413 	if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
414 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
415 		panic("vm_swapout_thread: create failed");
416 	}
417 	thread_set_thread_name(thread, "VM_swapout");
418 	vm_swapout_thread_id = thread->thread_id;
419 
420 	thread_deallocate(thread);
421 
422 	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
423 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
424 		panic("vm_swapfile_create_thread: create failed");
425 	}
426 
427 	thread_set_thread_name(thread, "VM_swapfile_create");
428 	thread_deallocate(thread);
429 
430 	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
431 	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
432 		panic("vm_swapfile_gc_thread: create failed");
433 	}
434 	thread_set_thread_name(thread, "VM_swapfile_gc");
435 
436 	/*
437 	 * Swapfile garbage collection will need to allocate memory
438 	 * to complete its swap reclaim and in-memory compaction.
439 	 * So allow it to dip into the reserved VM page pool.
440 	 */
441 	thread_lock(thread);
442 	thread->options |= TH_OPT_VMPRIV;
443 	thread_unlock(thread);
444 
445 	thread_deallocate(thread);
446 
447 	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
448 	    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
449 	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
450 	    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
451 
452 #if !XNU_TARGET_OS_OSX
453 	/*
454 	 * dummy value until the swap file gets created
455 	 * when we drive the first c_segment_t to the
456 	 * swapout queue... at that time we will
457 	 * know the true size we have to work with
458 	 */
459 	c_overage_swapped_limit = 16;
460 #endif /* !XNU_TARGET_OS_OSX */
461 
462 	compressed_swap_chunk_size = c_seg_bufsize;
463 	vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
464 	swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
465 	swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
466 	vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
467 #if DEVELOPMENT || DEBUG
468 	typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
469 	if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
470 		if (parsed_vm_max_num_swap_files > 0) {
471 			vm_num_swap_files_config = parsed_vm_max_num_swap_files;
472 		} else {
473 			printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
474 		}
475 	}
476 #endif
477 	printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
478 
479 	printf("VM Swap Subsystem is ON\n");
480 }
481 
482 
483 #if RECORD_THE_COMPRESSED_DATA
484 
485 void
c_compressed_record_init()486 c_compressed_record_init()
487 {
488 	if (c_compressed_record_init_done == FALSE) {
489 		vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
490 		c_compressed_record_init_done = TRUE;
491 	}
492 }
493 
494 void
c_compressed_record_write(char * buf,int size)495 c_compressed_record_write(char *buf, int size)
496 {
497 	if (c_compressed_record_write_error == 0) {
498 		c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
499 		c_compressed_record_file_offset += size;
500 	}
501 }
502 #endif
503 
504 
505 int             compaction_swapper_inited = 0;
506 
507 void
vm_compaction_swapper_do_init(void)508 vm_compaction_swapper_do_init(void)
509 {
510 	struct  vnode *vp;
511 	char    *pathname;
512 	int     namelen;
513 
514 	if (compaction_swapper_inited) {
515 		return;
516 	}
517 
518 	if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
519 		compaction_swapper_inited = 1;
520 		return;
521 	}
522 	lck_mtx_lock(&vm_swap_data_lock);
523 
524 	if (!compaction_swapper_inited) {
525 		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
526 		pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
527 		snprintf(pathname, namelen, "%s%d", swapfilename, 0);
528 
529 		vm_swapfile_open(pathname, &vp);
530 
531 		if (vp) {
532 			if (vnode_pager_isSSD(vp) == FALSE) {
533 				/*
534 				 * swap files live on an HDD, so let's make sure to start swapping
535 				 * much earlier since we're not worried about SSD write-wear and
536 				 * we have so little write bandwidth to work with
537 				 * these values were derived expermentially by running the performance
538 				 * teams stock test for evaluating HDD performance against various
539 				 * combinations and looking and comparing overall results.
540 				 * Note that the > relationship between these 4 values must be maintained
541 				 */
542 				if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
543 					vm_compressor_minorcompact_threshold_divisor = 15;
544 				}
545 				if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
546 					vm_compressor_majorcompact_threshold_divisor = 18;
547 				}
548 				if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
549 					vm_compressor_unthrottle_threshold_divisor = 24;
550 				}
551 				if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
552 					vm_compressor_catchup_threshold_divisor = 30;
553 				}
554 			}
555 #if XNU_TARGET_OS_OSX
556 			vnode_setswapmount(vp);
557 			vm_swappin_avail = vnode_getswappin_avail(vp);
558 
559 			if (vm_swappin_avail) {
560 				vm_swappin_enabled = TRUE;
561 			}
562 #endif /* XNU_TARGET_OS_OSX */
563 			vm_swapfile_close((uint64_t)pathname, vp);
564 		}
565 		kfree_data(pathname, namelen);
566 
567 		compaction_swapper_inited = 1;
568 	}
569 	lck_mtx_unlock(&vm_swap_data_lock);
570 }
571 
572 
573 void
vm_swap_consider_defragmenting(int flags)574 vm_swap_consider_defragmenting(int flags)
575 {
576 	boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
577 	boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
578 
579 	if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
580 	    (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
581 		if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
582 			lck_mtx_lock(&vm_swap_data_lock);
583 
584 			if (force_defrag) {
585 				vm_swap_force_defrag = TRUE;
586 			}
587 
588 			if (force_reclaim) {
589 				vm_swap_force_reclaim = TRUE;
590 			}
591 
592 			if (!vm_swapfile_gc_thread_running) {
593 				thread_wakeup((event_t) &vm_swapfile_gc_needed);
594 			}
595 
596 			lck_mtx_unlock(&vm_swap_data_lock);
597 		}
598 	}
599 }
600 
601 
602 int vm_swap_defragment_yielded = 0;
603 int vm_swap_defragment_swapin = 0;
604 int vm_swap_defragment_free = 0;
605 int vm_swap_defragment_busy = 0;
606 
607 #if CONFIG_FREEZE
608 extern uint32_t c_segment_pages_compressed_incore;
609 extern uint32_t c_segment_pages_compressed_nearing_limit;
610 extern uint32_t c_segment_count;
611 extern uint32_t c_segments_nearing_limit;
612 
613 boolean_t       memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
614 
615 extern bool freezer_incore_cseg_acct;
616 #endif /* CONFIG_FREEZE */
617 
618 static void
vm_swap_defragment()619 vm_swap_defragment()
620 {
621 	c_segment_t     c_seg;
622 
623 	/*
624 	 * have to grab the master lock w/o holding
625 	 * any locks in spin mode
626 	 */
627 	PAGE_REPLACEMENT_DISALLOWED(TRUE);
628 
629 	lck_mtx_lock_spin_always(c_list_lock);
630 
631 	while (!queue_empty(&c_swappedout_sparse_list_head)) {
632 		if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
633 			vm_swap_defragment_yielded++;
634 			break;
635 		}
636 		c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
637 
638 		lck_mtx_lock_spin_always(&c_seg->c_lock);
639 
640 		assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
641 
642 		if (c_seg->c_busy) {
643 			lck_mtx_unlock_always(c_list_lock);
644 
645 			PAGE_REPLACEMENT_DISALLOWED(FALSE);
646 			/*
647 			 * c_seg_wait_on_busy consumes c_seg->c_lock
648 			 */
649 			c_seg_wait_on_busy(c_seg);
650 
651 			PAGE_REPLACEMENT_DISALLOWED(TRUE);
652 
653 			lck_mtx_lock_spin_always(c_list_lock);
654 
655 			vm_swap_defragment_busy++;
656 			continue;
657 		}
658 		if (c_seg->c_bytes_used == 0) {
659 			/*
660 			 * c_seg_free_locked consumes the c_list_lock
661 			 * and c_seg->c_lock
662 			 */
663 			C_SEG_BUSY(c_seg);
664 			c_seg_free_locked(c_seg);
665 
666 			vm_swap_defragment_free++;
667 		} else {
668 			lck_mtx_unlock_always(c_list_lock);
669 
670 #if CONFIG_FREEZE
671 			if (freezer_incore_cseg_acct) {
672 				if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
673 					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
674 				}
675 
676 				uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
677 				if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
678 					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
679 				}
680 			}
681 #endif /* CONFIG_FREEZE */
682 			if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
683 				lck_mtx_unlock_always(&c_seg->c_lock);
684 				vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
685 			}
686 
687 			vm_swap_defragment_swapin++;
688 		}
689 		PAGE_REPLACEMENT_DISALLOWED(FALSE);
690 
691 		vm_pageout_io_throttle();
692 
693 		/*
694 		 * because write waiters have privilege over readers,
695 		 * dropping and immediately retaking the master lock will
696 		 * still allow any thread waiting to acquire the
697 		 * master lock exclusively an opportunity to take it
698 		 */
699 		PAGE_REPLACEMENT_DISALLOWED(TRUE);
700 
701 		lck_mtx_lock_spin_always(c_list_lock);
702 	}
703 	lck_mtx_unlock_always(c_list_lock);
704 
705 	PAGE_REPLACEMENT_DISALLOWED(FALSE);
706 }
707 
708 
709 bool vm_swapfile_create_thread_inited = false;
710 static void
vm_swapfile_create_thread(void)711 vm_swapfile_create_thread(void)
712 {
713 	clock_sec_t     sec;
714 	clock_nsec_t    nsec;
715 
716 	if (!vm_swapfile_create_thread_inited) {
717 #if CONFIG_THREAD_GROUPS
718 		thread_group_vm_add();
719 #endif /* CONFIG_THREAD_GROUPS */
720 		current_thread()->options |= TH_OPT_VMPRIV;
721 		vm_swapfile_create_thread_inited = true;
722 	}
723 
724 	vm_swapfile_create_thread_awakened++;
725 	vm_swapfile_create_thread_running = 1;
726 
727 	while (TRUE) {
728 		/*
729 		 * walk through the list of swap files
730 		 * and do the delayed frees/trims for
731 		 * any swap file whose count of delayed
732 		 * frees is above the batch limit
733 		 */
734 		vm_swap_handle_delayed_trims(FALSE);
735 
736 		lck_mtx_lock(&vm_swap_data_lock);
737 
738 		if (hibernate_in_progress_with_pinned_swap == TRUE) {
739 			break;
740 		}
741 
742 		if (compressor_store_stop_compaction == TRUE) {
743 			break;
744 		}
745 
746 		clock_get_system_nanotime(&sec, &nsec);
747 
748 		if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
749 			break;
750 		}
751 
752 		lck_mtx_unlock(&vm_swap_data_lock);
753 
754 		if (vm_swap_create_file() == FALSE) {
755 			vm_swapfile_last_failed_to_create_ts = sec;
756 			HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
757 		} else {
758 			vm_swapfile_last_successful_create_ts = sec;
759 		}
760 	}
761 	vm_swapfile_create_thread_running = 0;
762 
763 	if (hibernate_in_progress_with_pinned_swap == TRUE) {
764 		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
765 	}
766 
767 	if (compressor_store_stop_compaction == TRUE) {
768 		thread_wakeup((event_t)&compressor_store_stop_compaction);
769 	}
770 
771 	assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
772 
773 	lck_mtx_unlock(&vm_swap_data_lock);
774 
775 	thread_block((thread_continue_t)vm_swapfile_create_thread);
776 
777 	/* NOTREACHED */
778 }
779 
780 
781 #if HIBERNATION
782 
783 kern_return_t
hibernate_pin_swap(boolean_t start)784 hibernate_pin_swap(boolean_t start)
785 {
786 	vm_compaction_swapper_do_init();
787 
788 	if (start == FALSE) {
789 		lck_mtx_lock(&vm_swap_data_lock);
790 		hibernate_in_progress_with_pinned_swap = FALSE;
791 		lck_mtx_unlock(&vm_swap_data_lock);
792 
793 		return KERN_SUCCESS;
794 	}
795 	if (vm_swappin_enabled == FALSE) {
796 		return KERN_SUCCESS;
797 	}
798 
799 	lck_mtx_lock(&vm_swap_data_lock);
800 
801 	hibernate_in_progress_with_pinned_swap = TRUE;
802 
803 	while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
804 		assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
805 
806 		lck_mtx_unlock(&vm_swap_data_lock);
807 
808 		thread_block(THREAD_CONTINUE_NULL);
809 
810 		lck_mtx_lock(&vm_swap_data_lock);
811 	}
812 	if (vm_num_swap_files > vm_num_pinned_swap_files) {
813 		hibernate_in_progress_with_pinned_swap = FALSE;
814 		lck_mtx_unlock(&vm_swap_data_lock);
815 
816 		HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
817 		    vm_num_swap_files, vm_num_pinned_swap_files);
818 		return KERN_FAILURE;
819 	}
820 	lck_mtx_unlock(&vm_swap_data_lock);
821 
822 	while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
823 		if (vm_swap_create_file() == FALSE) {
824 			break;
825 		}
826 	}
827 	return KERN_SUCCESS;
828 }
829 #endif
830 bool vm_swapfile_gc_thread_inited = false;
831 static void
vm_swapfile_gc_thread(void)832 vm_swapfile_gc_thread(void)
833 {
834 	boolean_t       need_defragment;
835 	boolean_t       need_reclaim;
836 
837 	if (!vm_swapfile_gc_thread_inited) {
838 #if CONFIG_THREAD_GROUPS
839 		thread_group_vm_add();
840 #endif /* CONFIG_THREAD_GROUPS */
841 		vm_swapfile_gc_thread_inited = true;
842 	}
843 
844 	vm_swapfile_gc_thread_awakened++;
845 	vm_swapfile_gc_thread_running = 1;
846 
847 	while (TRUE) {
848 		lck_mtx_lock(&vm_swap_data_lock);
849 
850 		if (hibernate_in_progress_with_pinned_swap == TRUE) {
851 			break;
852 		}
853 
854 		if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
855 			break;
856 		}
857 
858 		need_defragment = FALSE;
859 		need_reclaim = FALSE;
860 
861 		if (VM_SWAP_SHOULD_DEFRAGMENT()) {
862 			need_defragment = TRUE;
863 		}
864 
865 		if (VM_SWAP_SHOULD_RECLAIM()) {
866 			need_defragment = TRUE;
867 			need_reclaim = TRUE;
868 		}
869 		if (need_defragment == FALSE && need_reclaim == FALSE) {
870 			break;
871 		}
872 
873 		vm_swap_force_defrag = FALSE;
874 		vm_swap_force_reclaim = FALSE;
875 
876 		lck_mtx_unlock(&vm_swap_data_lock);
877 
878 		if (need_defragment == TRUE) {
879 			vm_swap_defragment();
880 		}
881 		if (need_reclaim == TRUE) {
882 			vm_swap_reclaim();
883 		}
884 	}
885 	vm_swapfile_gc_thread_running = 0;
886 
887 	if (hibernate_in_progress_with_pinned_swap == TRUE) {
888 		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
889 	}
890 
891 	if (compressor_store_stop_compaction == TRUE) {
892 		thread_wakeup((event_t)&compressor_store_stop_compaction);
893 	}
894 
895 	assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
896 
897 	lck_mtx_unlock(&vm_swap_data_lock);
898 
899 	thread_block((thread_continue_t)vm_swapfile_gc_thread);
900 
901 	/* NOTREACHED */
902 }
903 
904 
905 
906 #define   VM_SWAPOUT_LIMIT_T2P  4
907 #define   VM_SWAPOUT_LIMIT_T1P  4
908 #define   VM_SWAPOUT_LIMIT_T0P  6
909 #define   VM_SWAPOUT_LIMIT_T0   8
910 #define   VM_SWAPOUT_LIMIT_MAX  8
911 
912 #define   VM_SWAPOUT_START      0
913 #define   VM_SWAPOUT_T2_PASSIVE 1
914 #define   VM_SWAPOUT_T1_PASSIVE 2
915 #define   VM_SWAPOUT_T0_PASSIVE 3
916 #define   VM_SWAPOUT_T0         4
917 
918 int vm_swapout_state = VM_SWAPOUT_START;
919 int vm_swapout_limit = 1;
920 
921 int vm_swapper_entered_T0  = 0;
922 int vm_swapper_entered_T0P = 0;
923 int vm_swapper_entered_T1P = 0;
924 int vm_swapper_entered_T2P = 0;
925 
926 
927 static void
vm_swapout_thread_throttle_adjust(void)928 vm_swapout_thread_throttle_adjust(void)
929 {
930 	switch (vm_swapout_state) {
931 	case VM_SWAPOUT_START:
932 
933 		vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
934 		vm_swapper_entered_T2P++;
935 
936 		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
937 		    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
938 		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
939 		    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
940 		vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
941 		vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
942 
943 		break;
944 
945 	case VM_SWAPOUT_T2_PASSIVE:
946 
947 		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
948 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
949 			vm_swapper_entered_T0P++;
950 
951 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
952 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
953 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
954 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
955 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
956 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
957 
958 			break;
959 		}
960 		if (swapout_target_age || hibernate_flushing == TRUE) {
961 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
962 			vm_swapper_entered_T1P++;
963 
964 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
965 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
966 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
967 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
968 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
969 			vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
970 		}
971 		break;
972 
973 	case VM_SWAPOUT_T1_PASSIVE:
974 
975 		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
976 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
977 			vm_swapper_entered_T0P++;
978 
979 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
980 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
981 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
982 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
983 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
984 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
985 
986 			break;
987 		}
988 		if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
989 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
990 			vm_swapper_entered_T2P++;
991 
992 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
993 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
994 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
995 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
996 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
997 			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
998 		}
999 		break;
1000 
1001 	case VM_SWAPOUT_T0_PASSIVE:
1002 
1003 		if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1004 			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1005 			vm_swapper_entered_T2P++;
1006 
1007 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1008 			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
1009 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1010 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1011 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1012 			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1013 
1014 			break;
1015 		}
1016 		if (SWAPPER_NEEDS_TO_CATCHUP()) {
1017 			vm_swapper_entered_T0++;
1018 
1019 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1020 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1021 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1022 			vm_swapout_state = VM_SWAPOUT_T0;
1023 		}
1024 		break;
1025 
1026 	case VM_SWAPOUT_T0:
1027 
1028 		if (SWAPPER_HAS_CAUGHTUP()) {
1029 			vm_swapper_entered_T0P++;
1030 
1031 			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1032 			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1033 			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1034 			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1035 		}
1036 		break;
1037 	}
1038 }
1039 
1040 int vm_swapout_found_empty = 0;
1041 
1042 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1043 
1044 int vm_swapout_soc_busy = 0;
1045 int vm_swapout_soc_done = 0;
1046 
1047 
1048 static struct swapout_io_completion *
vm_swapout_find_free_soc(void)1049 vm_swapout_find_free_soc(void)
1050 {
1051 	int      i;
1052 
1053 	for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1054 		if (vm_swapout_ctx[i].swp_io_busy == 0) {
1055 			return &vm_swapout_ctx[i];
1056 		}
1057 	}
1058 	assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1059 
1060 	return NULL;
1061 }
1062 
1063 static struct swapout_io_completion *
vm_swapout_find_done_soc(void)1064 vm_swapout_find_done_soc(void)
1065 {
1066 	int      i;
1067 
1068 	if (vm_swapout_soc_done) {
1069 		for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1070 			if (vm_swapout_ctx[i].swp_io_done) {
1071 				return &vm_swapout_ctx[i];
1072 			}
1073 		}
1074 	}
1075 	return NULL;
1076 }
1077 
1078 static void
vm_swapout_complete_soc(struct swapout_io_completion * soc)1079 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1080 {
1081 	kern_return_t  kr;
1082 
1083 	if (soc->swp_io_error) {
1084 		kr = KERN_FAILURE;
1085 	} else {
1086 		kr = KERN_SUCCESS;
1087 	}
1088 
1089 	lck_mtx_unlock_always(c_list_lock);
1090 
1091 	vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1092 	vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1093 
1094 	lck_mtx_lock_spin_always(c_list_lock);
1095 
1096 	soc->swp_io_done = 0;
1097 	soc->swp_io_busy = 0;
1098 
1099 	vm_swapout_soc_busy--;
1100 	vm_swapout_soc_done--;
1101 }
1102 
1103 bool vm_swapout_thread_inited = false;
1104 static void
vm_swapout_thread(void)1105 vm_swapout_thread(void)
1106 {
1107 	uint32_t        size = 0;
1108 	c_segment_t     c_seg = NULL;
1109 	kern_return_t   kr = KERN_SUCCESS;
1110 	struct swapout_io_completion *soc;
1111 
1112 	if (!vm_swapout_thread_inited) {
1113 #if CONFIG_THREAD_GROUPS
1114 		thread_group_vm_add();
1115 #endif /* CONFIG_THREAD_GROUPS */
1116 		current_thread()->options |= TH_OPT_VMPRIV;
1117 		vm_swapout_thread_inited = true;
1118 	}
1119 
1120 	vm_swapout_thread_awakened++;
1121 
1122 	lck_mtx_lock_spin_always(c_list_lock);
1123 
1124 	vm_swapout_thread_running = TRUE;
1125 again:
1126 	while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1127 		c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1128 
1129 		lck_mtx_lock_spin_always(&c_seg->c_lock);
1130 
1131 		assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1132 
1133 		if (c_seg->c_busy) {
1134 			lck_mtx_unlock_always(c_list_lock);
1135 
1136 			c_seg_wait_on_busy(c_seg);
1137 
1138 			lck_mtx_lock_spin_always(c_list_lock);
1139 
1140 			continue;
1141 		}
1142 		vm_swapout_thread_processed_segments++;
1143 
1144 		size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1145 
1146 		if (size == 0) {
1147 			assert(c_seg->c_bytes_used == 0);
1148 
1149 			if (!c_seg->c_on_minorcompact_q) {
1150 				c_seg_need_delayed_compaction(c_seg, TRUE);
1151 			}
1152 
1153 			c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1154 			lck_mtx_unlock_always(&c_seg->c_lock);
1155 			lck_mtx_unlock_always(c_list_lock);
1156 
1157 			vm_swapout_found_empty++;
1158 			goto c_seg_is_empty;
1159 		}
1160 		C_SEG_BUSY(c_seg);
1161 		c_seg->c_busy_swapping = 1;
1162 
1163 		c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1164 
1165 		lck_mtx_unlock_always(c_list_lock);
1166 		lck_mtx_unlock_always(&c_seg->c_lock);
1167 
1168 #if CHECKSUM_THE_SWAP
1169 		c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1170 		c_seg->cseg_swap_size = size;
1171 #endif /* CHECKSUM_THE_SWAP */
1172 
1173 #if ENCRYPTED_SWAP
1174 		vm_swap_encrypt(c_seg);
1175 #endif /* ENCRYPTED_SWAP */
1176 
1177 		soc = vm_swapout_find_free_soc();
1178 		assert(soc);
1179 
1180 		soc->swp_upl_ctx.io_context = (void *)soc;
1181 		soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1182 		soc->swp_upl_ctx.io_error = 0;
1183 
1184 		kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1185 
1186 		if (kr != KERN_SUCCESS) {
1187 			if (soc->swp_io_done) {
1188 				lck_mtx_lock_spin_always(c_list_lock);
1189 
1190 				soc->swp_io_done = 0;
1191 				vm_swapout_soc_done--;
1192 
1193 				lck_mtx_unlock_always(c_list_lock);
1194 			}
1195 			vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1196 		} else {
1197 			soc->swp_io_busy = 1;
1198 			vm_swapout_soc_busy++;
1199 		}
1200 
1201 c_seg_is_empty:
1202 		if (c_swapout_count == 0) {
1203 			vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1204 		}
1205 
1206 		lck_mtx_lock_spin_always(c_list_lock);
1207 
1208 		while ((soc = vm_swapout_find_done_soc())) {
1209 			vm_swapout_complete_soc(soc);
1210 		}
1211 		lck_mtx_unlock_always(c_list_lock);
1212 
1213 		vm_swapout_thread_throttle_adjust();
1214 
1215 		lck_mtx_lock_spin_always(c_list_lock);
1216 	}
1217 	while ((soc = vm_swapout_find_done_soc())) {
1218 		vm_swapout_complete_soc(soc);
1219 	}
1220 	lck_mtx_unlock_always(c_list_lock);
1221 
1222 	vm_pageout_io_throttle();
1223 
1224 	lck_mtx_lock_spin_always(c_list_lock);
1225 
1226 	/*
1227 	 * Recheck if we have some c_segs to wakeup
1228 	 * post throttle. And, check to see if we
1229 	 * have any more swapouts needed.
1230 	 */
1231 	if (vm_swapout_soc_done) {
1232 		goto again;
1233 	}
1234 
1235 	assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1236 
1237 	vm_swapout_thread_running = FALSE;
1238 
1239 	lck_mtx_unlock_always(c_list_lock);
1240 
1241 	thread_block((thread_continue_t)vm_swapout_thread);
1242 
1243 	/* NOTREACHED */
1244 }
1245 
1246 
1247 void
vm_swapout_iodone(void * io_context,int error)1248 vm_swapout_iodone(void *io_context, int error)
1249 {
1250 	struct swapout_io_completion *soc;
1251 
1252 	soc = (struct swapout_io_completion *)io_context;
1253 
1254 	lck_mtx_lock_spin_always(c_list_lock);
1255 
1256 	soc->swp_io_done = 1;
1257 	soc->swp_io_error = error;
1258 	vm_swapout_soc_done++;
1259 
1260 	if (!vm_swapout_thread_running) {
1261 		thread_wakeup((event_t)&c_swapout_list_head);
1262 	}
1263 
1264 	lck_mtx_unlock_always(c_list_lock);
1265 }
1266 
1267 
1268 static void
vm_swapout_finish(c_segment_t c_seg,uint64_t f_offset,uint32_t size,kern_return_t kr)1269 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1270 {
1271 	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1272 
1273 	if (kr == KERN_SUCCESS) {
1274 		kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size,
1275 		    KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1276 	}
1277 #if ENCRYPTED_SWAP
1278 	else {
1279 		vm_swap_decrypt(c_seg);
1280 	}
1281 #endif /* ENCRYPTED_SWAP */
1282 	lck_mtx_lock_spin_always(c_list_lock);
1283 	lck_mtx_lock_spin_always(&c_seg->c_lock);
1284 
1285 	if (kr == KERN_SUCCESS) {
1286 		int             new_state = C_ON_SWAPPEDOUT_Q;
1287 		boolean_t       insert_head = FALSE;
1288 
1289 		if (hibernate_flushing == TRUE) {
1290 			if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1291 			    c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1292 				insert_head = TRUE;
1293 			}
1294 		} else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1295 			new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1296 		}
1297 
1298 		c_seg_switch_state(c_seg, new_state, insert_head);
1299 
1300 		c_seg->c_store.c_swap_handle = f_offset;
1301 
1302 		counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
1303 
1304 		c_seg->c_swappedin = false;
1305 
1306 		if (c_seg->c_bytes_used) {
1307 			OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1308 		}
1309 
1310 #if CONFIG_FREEZE
1311 		/*
1312 		 * Successful swapout. Decrement the in-core compressed pages count.
1313 		 */
1314 		OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1315 		assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1316 #endif /* CONFIG_FREEZE */
1317 	} else {
1318 		if (c_seg->c_overage_swap == TRUE) {
1319 			c_seg->c_overage_swap = FALSE;
1320 			c_overage_swapped_count--;
1321 		}
1322 
1323 #if CONFIG_FREEZE
1324 		if (c_seg->c_task_owner) {
1325 			c_seg_update_task_owner(c_seg, NULL);
1326 		}
1327 #endif /* CONFIG_FREEZE */
1328 
1329 		c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1330 
1331 		if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1332 			c_seg_need_delayed_compaction(c_seg, TRUE);
1333 		}
1334 	}
1335 	assert(c_seg->c_busy_swapping);
1336 	assert(c_seg->c_busy);
1337 
1338 	c_seg->c_busy_swapping = 0;
1339 	lck_mtx_unlock_always(c_list_lock);
1340 
1341 	C_SEG_WAKEUP_DONE(c_seg);
1342 	lck_mtx_unlock_always(&c_seg->c_lock);
1343 
1344 	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1345 }
1346 
1347 
1348 boolean_t
vm_swap_create_file()1349 vm_swap_create_file()
1350 {
1351 	uint64_t        size = 0;
1352 	int             namelen = 0;
1353 	boolean_t       swap_file_created = FALSE;
1354 	boolean_t       swap_file_reuse = FALSE;
1355 	boolean_t       swap_file_pin = FALSE;
1356 	struct swapfile *swf = NULL;
1357 
1358 	/*
1359 	 * make sure we've got all the info we need
1360 	 * to potentially pin a swap file... we could
1361 	 * be swapping out due to hibernation w/o ever
1362 	 * having run vm_pageout_scan, which is normally
1363 	 * the trigger to do the init
1364 	 */
1365 	vm_compaction_swapper_do_init();
1366 
1367 	/*
1368 	 * Any swapfile structure ready for re-use?
1369 	 */
1370 
1371 	lck_mtx_lock(&vm_swap_data_lock);
1372 
1373 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1374 
1375 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1376 		if (swf->swp_flags == SWAP_REUSE) {
1377 			swap_file_reuse = TRUE;
1378 			break;
1379 		}
1380 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1381 	}
1382 
1383 	lck_mtx_unlock(&vm_swap_data_lock);
1384 
1385 	if (swap_file_reuse == FALSE) {
1386 		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1387 
1388 		swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1389 		swf->swp_index = vm_num_swap_files + 1;
1390 		swf->swp_pathlen = namelen;
1391 		swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1392 
1393 		snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1394 	}
1395 
1396 	vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1397 
1398 	if (swf->swp_vp == NULL) {
1399 		if (swap_file_reuse == FALSE) {
1400 			kfree_data(swf->swp_path, swf->swp_pathlen);
1401 			kfree_type(struct swapfile, swf);
1402 		}
1403 		return FALSE;
1404 	}
1405 	vm_swapfile_can_be_created = TRUE;
1406 
1407 	size = MAX_SWAP_FILE_SIZE;
1408 
1409 	while (size >= MIN_SWAP_FILE_SIZE) {
1410 		swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1411 
1412 		if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1413 			int num_bytes_for_bitmap = 0;
1414 
1415 			swap_file_created = TRUE;
1416 
1417 			swf->swp_size = size;
1418 			swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1419 			swf->swp_nseginuse = 0;
1420 			swf->swp_free_hint = 0;
1421 
1422 			num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1423 			/*
1424 			 * Allocate a bitmap that describes the
1425 			 * number of segments held by this swapfile.
1426 			 */
1427 			swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1428 			    Z_WAITOK | Z_ZERO);
1429 
1430 			swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1431 			    Z_WAITOK | Z_ZERO);
1432 
1433 			/*
1434 			 * passing a NULL trim_list into vnode_trim_list
1435 			 * will return ENOTSUP if trim isn't supported
1436 			 * and 0 if it is
1437 			 */
1438 			if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1439 				swp_trim_supported = TRUE;
1440 			}
1441 
1442 			lck_mtx_lock(&vm_swap_data_lock);
1443 
1444 			swf->swp_flags = SWAP_READY;
1445 
1446 			if (swap_file_reuse == FALSE) {
1447 				queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1448 			}
1449 
1450 			vm_num_swap_files++;
1451 
1452 			vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1453 			if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1454 				vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1455 			}
1456 
1457 			if (swap_file_pin == TRUE) {
1458 				vm_num_pinned_swap_files++;
1459 				swf->swp_flags |= SWAP_PINNED;
1460 				vm_swappin_avail -= swf->swp_size;
1461 			}
1462 
1463 			lck_mtx_unlock(&vm_swap_data_lock);
1464 
1465 			thread_wakeup((event_t) &vm_num_swap_files);
1466 #if !XNU_TARGET_OS_OSX
1467 			if (vm_num_swap_files == 1) {
1468 				c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1469 
1470 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1471 					c_overage_swapped_limit /= 2;
1472 				}
1473 			}
1474 #endif /* !XNU_TARGET_OS_OSX */
1475 			break;
1476 		} else {
1477 			size = size / 2;
1478 		}
1479 	}
1480 	if (swap_file_created == FALSE) {
1481 		vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1482 
1483 		swf->swp_vp = NULL;
1484 
1485 		if (swap_file_reuse == FALSE) {
1486 			kfree_data(swf->swp_path, swf->swp_pathlen);
1487 			kfree_type(struct swapfile, swf);
1488 		}
1489 	}
1490 	return swap_file_created;
1491 }
1492 
1493 extern void vnode_put(struct vnode* vp);
1494 kern_return_t
vm_swap_get(c_segment_t c_seg,uint64_t f_offset,uint64_t size)1495 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1496 {
1497 	struct swapfile *swf = NULL;
1498 	uint64_t        file_offset = 0;
1499 	int             retval = 0;
1500 
1501 	assert(c_seg->c_store.c_buffer);
1502 
1503 	lck_mtx_lock(&vm_swap_data_lock);
1504 
1505 	swf = vm_swapfile_for_handle(f_offset);
1506 
1507 	if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1508 		vm_swap_get_failures++;
1509 		retval = 1;
1510 		goto done;
1511 	}
1512 	swf->swp_io_count++;
1513 
1514 	lck_mtx_unlock(&vm_swap_data_lock);
1515 
1516 #if DEVELOPMENT || DEBUG
1517 	C_SEG_MAKE_WRITEABLE(c_seg);
1518 #endif
1519 	file_offset = (f_offset & SWAP_SLOT_MASK);
1520 
1521 	if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1522 		printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1523 	} else {
1524 		retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1525 		vnode_put(swf->swp_vp);
1526 	}
1527 
1528 #if DEVELOPMENT || DEBUG
1529 	C_SEG_WRITE_PROTECT(c_seg);
1530 #endif
1531 	if (retval == 0) {
1532 		counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
1533 	} else {
1534 		vm_swap_get_failures++;
1535 	}
1536 
1537 	/*
1538 	 * Free this slot in the swap structure.
1539 	 */
1540 	vm_swap_free(f_offset);
1541 
1542 	lck_mtx_lock(&vm_swap_data_lock);
1543 	swf->swp_io_count--;
1544 
1545 	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1546 		swf->swp_flags &= ~SWAP_WANTED;
1547 		thread_wakeup((event_t) &swf->swp_flags);
1548 	}
1549 done:
1550 	lck_mtx_unlock(&vm_swap_data_lock);
1551 
1552 	if (retval == 0) {
1553 		return KERN_SUCCESS;
1554 	} else {
1555 		return KERN_FAILURE;
1556 	}
1557 }
1558 
1559 kern_return_t
vm_swap_put(vm_offset_t addr,uint64_t * f_offset,uint32_t size,c_segment_t c_seg,struct swapout_io_completion * soc)1560 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1561 {
1562 	unsigned int    segidx = 0;
1563 	struct swapfile *swf = NULL;
1564 	uint64_t        file_offset = 0;
1565 	uint64_t        swapfile_index = 0;
1566 	unsigned int    byte_for_segidx = 0;
1567 	unsigned int    offset_within_byte = 0;
1568 	boolean_t       swf_eligible = FALSE;
1569 	boolean_t       waiting = FALSE;
1570 	boolean_t       retried = FALSE;
1571 	int             error = 0;
1572 	clock_sec_t     sec;
1573 	clock_nsec_t    nsec;
1574 	void            *upl_ctx = NULL;
1575 	boolean_t       drop_iocount = FALSE;
1576 
1577 	if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1578 		return KERN_FAILURE;
1579 	}
1580 retry:
1581 	lck_mtx_lock(&vm_swap_data_lock);
1582 
1583 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1584 
1585 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1586 		segidx = swf->swp_free_hint;
1587 
1588 		swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1589 
1590 		if (swf_eligible) {
1591 			while (segidx < swf->swp_nsegs) {
1592 				byte_for_segidx = segidx >> 3;
1593 				offset_within_byte = segidx % 8;
1594 
1595 				if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1596 					segidx++;
1597 					continue;
1598 				}
1599 
1600 				(swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1601 
1602 				file_offset = segidx * compressed_swap_chunk_size;
1603 				swf->swp_nseginuse++;
1604 				swf->swp_io_count++;
1605 				swf->swp_csegs[segidx] = c_seg;
1606 
1607 				swapfile_index = swf->swp_index;
1608 				vm_swapfile_total_segs_used++;
1609 				if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1610 					vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1611 				}
1612 
1613 				clock_get_system_nanotime(&sec, &nsec);
1614 
1615 				if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1616 					thread_wakeup((event_t) &vm_swapfile_create_needed);
1617 				}
1618 
1619 				lck_mtx_unlock(&vm_swap_data_lock);
1620 
1621 				goto issue_io;
1622 			}
1623 		}
1624 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1625 	}
1626 	assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1627 
1628 	/*
1629 	 * we've run out of swap segments, but may not
1630 	 * be in a position to immediately create a new swap
1631 	 * file if we've recently failed to create due to a lack
1632 	 * of free space in the root filesystem... we'll try
1633 	 * to kick that create off, but in any event we're going
1634 	 * to take a breather (up to 1 second) so that we're not caught in a tight
1635 	 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1636 	 * segments into swap files only to have them immediately put back
1637 	 * on the c_age queue due to vm_swap_put failing.
1638 	 *
1639 	 * if we're doing these puts due to a hibernation flush,
1640 	 * no need to block... setting hibernate_no_swapspace to TRUE,
1641 	 * will cause "vm_compressor_compact_and_swap" to immediately abort
1642 	 */
1643 	clock_get_system_nanotime(&sec, &nsec);
1644 
1645 	if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1646 		thread_wakeup((event_t) &vm_swapfile_create_needed);
1647 	}
1648 
1649 	if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1650 		waiting = TRUE;
1651 		assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1652 	} else {
1653 		hibernate_no_swapspace = TRUE;
1654 	}
1655 
1656 	lck_mtx_unlock(&vm_swap_data_lock);
1657 
1658 	if (waiting == TRUE) {
1659 		thread_block(THREAD_CONTINUE_NULL);
1660 
1661 		if (retried == FALSE && hibernate_flushing == TRUE) {
1662 			retried = TRUE;
1663 			goto retry;
1664 		}
1665 	}
1666 	vm_swap_put_failures_no_swap_file++;
1667 
1668 	return KERN_FAILURE;
1669 
1670 issue_io:
1671 	assert(c_seg->c_busy_swapping);
1672 	assert(c_seg->c_busy);
1673 	assert(!c_seg->c_on_minorcompact_q);
1674 
1675 	*f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1676 
1677 	if (soc) {
1678 		soc->swp_c_seg = c_seg;
1679 		soc->swp_c_size = size;
1680 
1681 		soc->swp_swf = swf;
1682 
1683 		soc->swp_io_error = 0;
1684 		soc->swp_io_done = 0;
1685 
1686 		upl_ctx = (void *)&soc->swp_upl_ctx;
1687 	}
1688 
1689 	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1690 		printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1691 	} else {
1692 		error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1693 		drop_iocount = TRUE;
1694 	}
1695 
1696 	if (error || upl_ctx == NULL) {
1697 		return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1698 	}
1699 
1700 	return KERN_SUCCESS;
1701 }
1702 
1703 kern_return_t
vm_swap_put_finish(struct swapfile * swf,uint64_t * f_offset,int error,boolean_t drop_iocount)1704 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1705 {
1706 	if (drop_iocount) {
1707 		vnode_put(swf->swp_vp);
1708 	}
1709 
1710 	lck_mtx_lock(&vm_swap_data_lock);
1711 
1712 	swf->swp_io_count--;
1713 
1714 	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1715 		swf->swp_flags &= ~SWAP_WANTED;
1716 		thread_wakeup((event_t) &swf->swp_flags);
1717 	}
1718 	lck_mtx_unlock(&vm_swap_data_lock);
1719 
1720 	if (error) {
1721 		vm_swap_free(*f_offset);
1722 		vm_swap_put_failures++;
1723 
1724 		return KERN_FAILURE;
1725 	}
1726 	return KERN_SUCCESS;
1727 }
1728 
1729 
1730 static void
vm_swap_free_now(struct swapfile * swf,uint64_t f_offset)1731 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1732 {
1733 	uint64_t        file_offset = 0;
1734 	unsigned int    segidx = 0;
1735 
1736 
1737 	if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1738 		unsigned int byte_for_segidx = 0;
1739 		unsigned int offset_within_byte = 0;
1740 
1741 		file_offset = (f_offset & SWAP_SLOT_MASK);
1742 		segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1743 
1744 		byte_for_segidx = segidx >> 3;
1745 		offset_within_byte = segidx % 8;
1746 
1747 		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1748 			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1749 
1750 			swf->swp_csegs[segidx] = NULL;
1751 
1752 			swf->swp_nseginuse--;
1753 			vm_swapfile_total_segs_used--;
1754 
1755 			if (segidx < swf->swp_free_hint) {
1756 				swf->swp_free_hint = segidx;
1757 			}
1758 		}
1759 		if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1760 			thread_wakeup((event_t) &vm_swapfile_gc_needed);
1761 		}
1762 	}
1763 }
1764 
1765 
1766 uint32_t vm_swap_free_now_count = 0;
1767 uint32_t vm_swap_free_delayed_count = 0;
1768 
1769 
1770 void
vm_swap_free(uint64_t f_offset)1771 vm_swap_free(uint64_t f_offset)
1772 {
1773 	struct swapfile *swf = NULL;
1774 	struct trim_list *tl = NULL;
1775 	clock_sec_t     sec;
1776 	clock_nsec_t    nsec;
1777 
1778 	if (swp_trim_supported == TRUE) {
1779 		tl = kalloc_type(struct trim_list, Z_WAITOK);
1780 	}
1781 
1782 	lck_mtx_lock(&vm_swap_data_lock);
1783 
1784 	swf = vm_swapfile_for_handle(f_offset);
1785 
1786 	if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1787 		if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1788 			/*
1789 			 * don't delay the free if the underlying disk doesn't support
1790 			 * trim, or we're in the midst of reclaiming this swap file since
1791 			 * we don't want to move segments that are technically free
1792 			 * but not yet handled by the delayed free mechanism
1793 			 */
1794 			vm_swap_free_now(swf, f_offset);
1795 
1796 			vm_swap_free_now_count++;
1797 			goto done;
1798 		}
1799 		tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1800 		tl->tl_length = compressed_swap_chunk_size;
1801 
1802 		tl->tl_next = swf->swp_delayed_trim_list_head;
1803 		swf->swp_delayed_trim_list_head = tl;
1804 		swf->swp_delayed_trim_count++;
1805 		tl = NULL;
1806 
1807 		if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1808 			clock_get_system_nanotime(&sec, &nsec);
1809 
1810 			if (sec > dont_trim_until_ts) {
1811 				thread_wakeup((event_t) &vm_swapfile_create_needed);
1812 			}
1813 		}
1814 		vm_swap_free_delayed_count++;
1815 	}
1816 done:
1817 	lck_mtx_unlock(&vm_swap_data_lock);
1818 
1819 	if (tl != NULL) {
1820 		kfree_type(struct trim_list, tl);
1821 	}
1822 }
1823 
1824 
1825 static void
vm_swap_wait_on_trim_handling_in_progress()1826 vm_swap_wait_on_trim_handling_in_progress()
1827 {
1828 	while (delayed_trim_handling_in_progress == TRUE) {
1829 		assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1830 		lck_mtx_unlock(&vm_swap_data_lock);
1831 
1832 		thread_block(THREAD_CONTINUE_NULL);
1833 
1834 		lck_mtx_lock(&vm_swap_data_lock);
1835 	}
1836 }
1837 
1838 
1839 static void
vm_swap_handle_delayed_trims(boolean_t force_now)1840 vm_swap_handle_delayed_trims(boolean_t force_now)
1841 {
1842 	struct swapfile *swf = NULL;
1843 
1844 	/*
1845 	 * serialize the race between us and vm_swap_reclaim...
1846 	 * if vm_swap_reclaim wins it will turn off SWAP_READY
1847 	 * on the victim it has chosen... we can just skip over
1848 	 * that file since vm_swap_reclaim will first process
1849 	 * all of the delayed trims associated with it
1850 	 */
1851 
1852 	if (compressor_store_stop_compaction == TRUE) {
1853 		return;
1854 	}
1855 
1856 	lck_mtx_lock(&vm_swap_data_lock);
1857 
1858 	delayed_trim_handling_in_progress = TRUE;
1859 
1860 	lck_mtx_unlock(&vm_swap_data_lock);
1861 
1862 	/*
1863 	 * no need to hold the lock to walk the swf list since
1864 	 * vm_swap_create (the only place where we add to this list)
1865 	 * is run on the same thread as this function
1866 	 * and vm_swap_reclaim doesn't remove items from this list
1867 	 * instead marking them with SWAP_REUSE for future re-use
1868 	 */
1869 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1870 
1871 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1872 		if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1873 			assert(!(swf->swp_flags & SWAP_RECLAIM));
1874 			vm_swap_do_delayed_trim(swf);
1875 		}
1876 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1877 	}
1878 	lck_mtx_lock(&vm_swap_data_lock);
1879 
1880 	delayed_trim_handling_in_progress = FALSE;
1881 	thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1882 
1883 	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1884 		thread_wakeup((event_t) &vm_swapfile_gc_needed);
1885 	}
1886 
1887 	lck_mtx_unlock(&vm_swap_data_lock);
1888 }
1889 
1890 static void
vm_swap_do_delayed_trim(struct swapfile * swf)1891 vm_swap_do_delayed_trim(struct swapfile *swf)
1892 {
1893 	struct trim_list *tl, *tl_head;
1894 	int error;
1895 
1896 	if (compressor_store_stop_compaction == TRUE) {
1897 		return;
1898 	}
1899 
1900 	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1901 		printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1902 		return;
1903 	}
1904 
1905 	lck_mtx_lock(&vm_swap_data_lock);
1906 
1907 	tl_head = swf->swp_delayed_trim_list_head;
1908 	swf->swp_delayed_trim_list_head = NULL;
1909 	swf->swp_delayed_trim_count = 0;
1910 
1911 	lck_mtx_unlock(&vm_swap_data_lock);
1912 
1913 	vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1914 
1915 	(void) vnode_put(swf->swp_vp);
1916 
1917 	while ((tl = tl_head) != NULL) {
1918 		unsigned int    segidx = 0;
1919 		unsigned int    byte_for_segidx = 0;
1920 		unsigned int    offset_within_byte = 0;
1921 
1922 		lck_mtx_lock(&vm_swap_data_lock);
1923 
1924 		segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
1925 
1926 		byte_for_segidx = segidx >> 3;
1927 		offset_within_byte = segidx % 8;
1928 
1929 		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1930 			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1931 
1932 			swf->swp_csegs[segidx] = NULL;
1933 
1934 			swf->swp_nseginuse--;
1935 			vm_swapfile_total_segs_used--;
1936 
1937 			if (segidx < swf->swp_free_hint) {
1938 				swf->swp_free_hint = segidx;
1939 			}
1940 		}
1941 		lck_mtx_unlock(&vm_swap_data_lock);
1942 
1943 		tl_head = tl->tl_next;
1944 
1945 		kfree_type(struct trim_list, tl);
1946 	}
1947 }
1948 
1949 
1950 void
vm_swap_flush()1951 vm_swap_flush()
1952 {
1953 	return;
1954 }
1955 
1956 int     vm_swap_reclaim_yielded = 0;
1957 
1958 void
vm_swap_reclaim(void)1959 vm_swap_reclaim(void)
1960 {
1961 	vm_offset_t     addr = 0;
1962 	unsigned int    segidx = 0;
1963 	uint64_t        f_offset = 0;
1964 	struct swapfile *swf = NULL;
1965 	struct swapfile *smallest_swf = NULL;
1966 	unsigned int    min_nsegs = 0;
1967 	unsigned int    byte_for_segidx = 0;
1968 	unsigned int    offset_within_byte = 0;
1969 	uint32_t        c_size = 0;
1970 
1971 	c_segment_t     c_seg = NULL;
1972 
1973 	if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), c_seg_bufsize, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1974 		panic("vm_swap_reclaim: kernel_memory_allocate failed");
1975 	}
1976 
1977 	lck_mtx_lock(&vm_swap_data_lock);
1978 
1979 	/*
1980 	 * if we're running the swapfile list looking for
1981 	 * candidates with delayed trims, we need to
1982 	 * wait before making our decision concerning
1983 	 * the swapfile we want to reclaim
1984 	 */
1985 	vm_swap_wait_on_trim_handling_in_progress();
1986 
1987 	/*
1988 	 * from here until we knock down the SWAP_READY bit,
1989 	 * we need to remain behind the vm_swap_data_lock...
1990 	 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1991 	 * will not consider this swapfile for processing
1992 	 */
1993 	swf = (struct swapfile*) queue_first(&swf_global_queue);
1994 	min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
1995 	smallest_swf = NULL;
1996 
1997 	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1998 		if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1999 			smallest_swf = swf;
2000 			min_nsegs = swf->swp_nseginuse;
2001 		}
2002 		swf = (struct swapfile*) queue_next(&swf->swp_queue);
2003 	}
2004 
2005 	if (smallest_swf == NULL) {
2006 		goto done;
2007 	}
2008 
2009 	swf = smallest_swf;
2010 
2011 
2012 	swf->swp_flags &= ~SWAP_READY;
2013 	swf->swp_flags |= SWAP_RECLAIM;
2014 
2015 	if (swf->swp_delayed_trim_count) {
2016 		lck_mtx_unlock(&vm_swap_data_lock);
2017 
2018 		vm_swap_do_delayed_trim(swf);
2019 
2020 		lck_mtx_lock(&vm_swap_data_lock);
2021 	}
2022 	segidx = 0;
2023 
2024 	while (segidx < swf->swp_nsegs) {
2025 ReTry_for_cseg:
2026 		/*
2027 		 * Wait for outgoing I/Os.
2028 		 */
2029 		while (swf->swp_io_count) {
2030 			swf->swp_flags |= SWAP_WANTED;
2031 
2032 			assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
2033 			lck_mtx_unlock(&vm_swap_data_lock);
2034 
2035 			thread_block(THREAD_CONTINUE_NULL);
2036 
2037 			lck_mtx_lock(&vm_swap_data_lock);
2038 		}
2039 		if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2040 			vm_swap_reclaim_yielded++;
2041 			break;
2042 		}
2043 
2044 		byte_for_segidx = segidx >> 3;
2045 		offset_within_byte = segidx % 8;
2046 
2047 		if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2048 			segidx++;
2049 			continue;
2050 		}
2051 
2052 		c_seg = swf->swp_csegs[segidx];
2053 		assert(c_seg);
2054 
2055 		lck_mtx_lock_spin_always(&c_seg->c_lock);
2056 
2057 		if (c_seg->c_busy) {
2058 			/*
2059 			 * a swapped out c_segment in the process of being freed will remain in the
2060 			 * busy state until after the vm_swap_free is called on it... vm_swap_free
2061 			 * takes the vm_swap_data_lock, so can't change the swap state until after
2062 			 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2063 			 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2064 			 * at that point, we re-look up the swap state which will now indicate that
2065 			 * this c_segment no longer exists.
2066 			 */
2067 			c_seg->c_wanted = 1;
2068 
2069 			assert_wait((event_t) (c_seg), THREAD_UNINT);
2070 			lck_mtx_unlock_always(&c_seg->c_lock);
2071 
2072 			lck_mtx_unlock(&vm_swap_data_lock);
2073 
2074 			thread_block(THREAD_CONTINUE_NULL);
2075 
2076 			lck_mtx_lock(&vm_swap_data_lock);
2077 
2078 			goto ReTry_for_cseg;
2079 		}
2080 		(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2081 
2082 		f_offset = segidx * compressed_swap_chunk_size;
2083 
2084 		assert(c_seg == swf->swp_csegs[segidx]);
2085 		swf->swp_csegs[segidx] = NULL;
2086 		swf->swp_nseginuse--;
2087 
2088 		vm_swapfile_total_segs_used--;
2089 
2090 		lck_mtx_unlock(&vm_swap_data_lock);
2091 
2092 		assert(C_SEG_IS_ONDISK(c_seg));
2093 
2094 		C_SEG_BUSY(c_seg);
2095 		c_seg->c_busy_swapping = 1;
2096 #if !CHECKSUM_THE_SWAP
2097 		c_seg_trim_tail(c_seg);
2098 #endif
2099 		c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2100 
2101 		assert(c_size <= c_seg_bufsize && c_size);
2102 
2103 		lck_mtx_unlock_always(&c_seg->c_lock);
2104 
2105 		if (vnode_getwithref(swf->swp_vp)) {
2106 			printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2107 			vm_swap_get_failures++;
2108 			goto swap_io_failed;
2109 		} else {
2110 			if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2111 				/*
2112 				 * reading the data back in failed, so convert c_seg
2113 				 * to a swapped in c_segment that contains no data
2114 				 */
2115 				c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2116 				/*
2117 				 * returns with c_busy_swapping cleared
2118 				 */
2119 				vnode_put(swf->swp_vp);
2120 				vm_swap_get_failures++;
2121 				goto swap_io_failed;
2122 			}
2123 			vnode_put(swf->swp_vp);
2124 		}
2125 
2126 		counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
2127 		vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2128 
2129 		if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2130 			vm_offset_t     c_buffer;
2131 
2132 			/*
2133 			 * the put failed, so convert c_seg to a fully swapped in c_segment
2134 			 * with valid data
2135 			 */
2136 			c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2137 
2138 			kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
2139 
2140 			memcpy((char *)c_buffer, (char *)addr, c_size);
2141 
2142 			c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2143 #if ENCRYPTED_SWAP
2144 			vm_swap_decrypt(c_seg);
2145 #endif /* ENCRYPTED_SWAP */
2146 			c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2147 			/*
2148 			 * returns with c_busy_swapping cleared
2149 			 */
2150 			OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2151 
2152 			goto swap_io_failed;
2153 		}
2154 		counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
2155 
2156 		lck_mtx_lock_spin_always(&c_seg->c_lock);
2157 
2158 		c_seg->c_swappedin = false;
2159 
2160 		assert(C_SEG_IS_ONDISK(c_seg));
2161 		/*
2162 		 * The c_seg will now know about the new location on disk.
2163 		 */
2164 		c_seg->c_store.c_swap_handle = f_offset;
2165 
2166 		assert(c_seg->c_busy_swapping);
2167 		c_seg->c_busy_swapping = 0;
2168 swap_io_failed:
2169 		assert(c_seg->c_busy);
2170 		C_SEG_WAKEUP_DONE(c_seg);
2171 
2172 		lck_mtx_unlock_always(&c_seg->c_lock);
2173 		lck_mtx_lock(&vm_swap_data_lock);
2174 	}
2175 
2176 	if (swf->swp_nseginuse) {
2177 		swf->swp_flags &= ~SWAP_RECLAIM;
2178 		swf->swp_flags |= SWAP_READY;
2179 
2180 		goto done;
2181 	}
2182 	/*
2183 	 * We don't remove this inactive swf from the queue.
2184 	 * That way, we can re-use it when needed again and
2185 	 * preserve the namespace. The delayed_trim processing
2186 	 * is also dependent on us not removing swfs from the queue.
2187 	 */
2188 	//queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2189 
2190 	vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2191 
2192 	lck_mtx_unlock(&vm_swap_data_lock);
2193 
2194 	vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2195 
2196 	kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2197 	kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2198 
2199 	lck_mtx_lock(&vm_swap_data_lock);
2200 
2201 	if (swf->swp_flags & SWAP_PINNED) {
2202 		vm_num_pinned_swap_files--;
2203 		vm_swappin_avail += swf->swp_size;
2204 	}
2205 
2206 	swf->swp_vp = NULL;
2207 	swf->swp_size = 0;
2208 	swf->swp_free_hint = 0;
2209 	swf->swp_nsegs = 0;
2210 	swf->swp_flags = SWAP_REUSE;
2211 
2212 	vm_num_swap_files--;
2213 
2214 done:
2215 	thread_wakeup((event_t) &swf->swp_flags);
2216 	lck_mtx_unlock(&vm_swap_data_lock);
2217 
2218 	kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
2219 }
2220 
2221 
2222 uint64_t
vm_swap_get_total_space(void)2223 vm_swap_get_total_space(void)
2224 {
2225 	uint64_t total_space = 0;
2226 
2227 	total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2228 
2229 	return total_space;
2230 }
2231 
2232 uint64_t
vm_swap_get_used_space(void)2233 vm_swap_get_used_space(void)
2234 {
2235 	uint64_t used_space = 0;
2236 
2237 	used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2238 
2239 	return used_space;
2240 }
2241 
2242 uint64_t
vm_swap_get_free_space(void)2243 vm_swap_get_free_space(void)
2244 {
2245 	return vm_swap_get_total_space() - vm_swap_get_used_space();
2246 }
2247 
2248 uint64_t
vm_swap_get_max_configured_space(void)2249 vm_swap_get_max_configured_space(void)
2250 {
2251 	int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2252 	return num_swap_files * MAX_SWAP_FILE_SIZE;
2253 }
2254 
2255 int
vm_swap_low_on_space(void)2256 vm_swap_low_on_space(void)
2257 {
2258 	if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2259 		return 0;
2260 	}
2261 
2262 	if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2263 		if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2264 			return 0;
2265 		}
2266 
2267 		if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2268 			return 1;
2269 		}
2270 	}
2271 	return 0;
2272 }
2273 
2274 int
vm_swap_out_of_space(void)2275 vm_swap_out_of_space(void)
2276 {
2277 	if ((vm_num_swap_files == vm_num_swap_files_config) &&
2278 	    ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2279 		/*
2280 		 * Last swapfile and we have only space for the
2281 		 * last few swapouts.
2282 		 */
2283 		return 1;
2284 	}
2285 
2286 	return 0;
2287 }
2288 
2289 boolean_t
vm_swap_files_pinned(void)2290 vm_swap_files_pinned(void)
2291 {
2292 	boolean_t result;
2293 
2294 	if (vm_swappin_enabled == FALSE) {
2295 		return TRUE;
2296 	}
2297 
2298 	result = (vm_num_pinned_swap_files == vm_num_swap_files);
2299 
2300 	return result;
2301 }
2302 
2303 #if CONFIG_FREEZE
2304 boolean_t
vm_swap_max_budget(uint64_t * freeze_daily_budget)2305 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2306 {
2307 	boolean_t       use_device_value = FALSE;
2308 	struct swapfile *swf = NULL;
2309 
2310 	if (vm_num_swap_files) {
2311 		lck_mtx_lock(&vm_swap_data_lock);
2312 
2313 		swf = (struct swapfile*) queue_first(&swf_global_queue);
2314 
2315 		if (swf) {
2316 			while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2317 				if (swf->swp_flags == SWAP_READY) {
2318 					assert(swf->swp_vp);
2319 
2320 					if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2321 						use_device_value = TRUE;
2322 					}
2323 					break;
2324 				}
2325 				swf = (struct swapfile*) queue_next(&swf->swp_queue);
2326 			}
2327 		}
2328 
2329 		lck_mtx_unlock(&vm_swap_data_lock);
2330 	} else {
2331 		/*
2332 		 * This block is used for the initial budget value before any swap files
2333 		 * are created. We create a temp swap file to get the budget.
2334 		 */
2335 
2336 		struct vnode *temp_vp = NULL;
2337 
2338 		vm_swapfile_open(swapfilename, &temp_vp);
2339 
2340 		if (temp_vp) {
2341 			if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2342 				use_device_value = TRUE;
2343 			}
2344 
2345 			vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2346 			temp_vp = NULL;
2347 		} else {
2348 			*freeze_daily_budget = 0;
2349 		}
2350 	}
2351 
2352 	return use_device_value;
2353 }
2354 #endif /* CONFIG_FREEZE */
2355 
2356 void
vm_swap_reset_max_segs_tracking(uint64_t * alloced_max,uint64_t * used_max)2357 vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2358 {
2359 	lck_mtx_lock(&vm_swap_data_lock);
2360 
2361 	*alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2362 	*used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2363 
2364 	vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2365 	vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2366 
2367 	lck_mtx_unlock(&vm_swap_data_lock);
2368 }
2369