vm_compressor_backing_store.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796) - OpenGrok cross reference for /xnu-12377.81.4/osfmk/vm/vm_compressor_backing_store.c

/*
 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include "vm_compressor_backing_store_internal.h"
#include <vm/vm_pageout_xnu.h>
#include <vm/vm_protos_internal.h>
#include <vm/vm_kern_xnu.h>
#include <vm/vm_map_xnu.h>
#include <vm/vm_compressor_internal.h>
#include <vm/vm_iokit.h>
#include <vm/vm_map_internal.h>

#include <IOKit/IOHibernatePrivate.h>
#include <kern/policy_internal.h>
#include <sys/kern_memorystatus_xnu.h>

LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);

#if defined(XNU_TARGET_OS_OSX)
/*
 * launchd explicitly turns ON swap later during boot on macOS devices.
 */
boolean_t       compressor_store_stop_compaction = TRUE;
#else
boolean_t       compressor_store_stop_compaction = FALSE;
#endif

boolean_t       vm_swapfile_create_needed = FALSE;
boolean_t       vm_swapfile_gc_needed = FALSE;

int             vm_swapper_throttle = -1;
uint64_t        vm_swapout_thread_id;

uint64_t        vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
uint64_t        vm_swap_get_failures = 0; /* Fatal */
uint64_t        vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
int             vm_num_swap_files_config = 0;
int             vm_num_swap_files = 0;
int             vm_num_pinned_swap_files = 0;
uint64_t        vm_swap_volume_capacity = 0;
int             vm_swapout_thread_processed_segments = 0;
int             vm_swapout_thread_awakened = 0;
bool            vm_swapout_thread_running = FALSE;
_Atomic bool    vm_swapout_wake_pending = false;
int             vm_swapfile_create_thread_awakened = 0;
int             vm_swapfile_create_thread_running = 0;
int             vm_swapfile_gc_thread_awakened = 0;
int             vm_swapfile_gc_thread_running = 0;

int64_t         vm_swappin_avail = 0;
boolean_t       vm_swappin_enabled = FALSE;
unsigned int    vm_swapfile_total_segs_alloced = 0;
unsigned int    vm_swapfile_total_segs_alloced_max = 0;
unsigned int    vm_swapfile_total_segs_used = 0;
unsigned int    vm_swapfile_total_segs_used_max = 0;

char            swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;

extern vm_map_t compressor_map;
extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;

#define SWAP_READY      0x1     /* Swap file is ready to be used */
#define SWAP_RECLAIM    0x2     /* Swap file is marked to be reclaimed */
#define SWAP_WANTED     0x4     /* Swap file has waiters */
#define SWAP_REUSE      0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
#define SWAP_PINNED     0x10    /* Swap file is pinned (FusionDrive) */


struct swapfile {
	queue_head_t            swp_queue;      /* list of swap files */
	char                    *swp_path;      /* saved pathname of swap file */
	struct vnode            *swp_vp;        /* backing vnode */
	uint64_t                swp_size;       /* size of this swap file */
	uint8_t                 *swp_bitmap;    /* bitmap showing the alloced/freed slots in the swap file */
	unsigned int            swp_pathlen;    /* length of pathname */
	unsigned int            swp_nsegs;      /* #segments we can use */
	unsigned int            swp_nseginuse;  /* #segments in use */
	unsigned int            swp_index;      /* index of this swap file */
	unsigned int            swp_flags;      /* state of swap file */
	unsigned int            swp_free_hint;  /* offset of 1st free chunk */
	unsigned int            swp_io_count;   /* count of outstanding I/Os */
	c_segment_t             *swp_csegs;     /* back pointers to the c_segments. Used during swap reclaim. */

	struct trim_list        *swp_delayed_trim_list_head;
	unsigned int            swp_delayed_trim_count;
};

queue_head_t    swf_global_queue;
boolean_t       swp_trim_supported = FALSE;

extern uint64_t         dont_trim_until_ts;
uint64_t                vm_swapfile_last_failed_to_create_ts = 0;
uint64_t                vm_swapfile_last_successful_create_ts = 0;
static bool             vm_swapfile_can_be_created = false;
static bool             delayed_trim_handling_in_progress = false;

boolean_t               hibernate_in_progress_with_pinned_swap = FALSE;

static void vm_swapout_thread_throttle_adjust(void);
static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
static void vm_swapfile_create_thread(void);
static void vm_swapfile_gc_thread(void);
static void vm_swap_defragment(void);
static void vm_swap_handle_delayed_trims(boolean_t);
static void vm_swap_do_delayed_trim(struct swapfile *);
static void vm_swap_wait_on_trim_handling_in_progress(void);
static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);

extern int vnode_getwithref(struct vnode* vp);

boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;

#if !XNU_TARGET_OS_OSX

/*
 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
 * number of swapfiles allowed. That increases wired memory overhead.
 * So we want to keep the max swapfiles same on both DEV/RELEASE so
 * that the memory overhead is similar for performance comparisons.
 */
#define VM_MAX_SWAP_FILE_NUM            5
#if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
#else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
/*
 * We reserve compressor pool VA at boot for the max # of swap files. If someone
 * has enabled app swap but we're not an arm large memory device we can't hog
 * all of the VA so we only go up to 4GB.
 */
#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
#endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
#define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))

#define VM_SWAPFILE_DELAYED_TRIM_MAX    4

#define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
#define VM_SWAP_SHOULD_PIN(_size)       FALSE
#define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)

#else /* !XNU_TARGET_OS_OSX */

#define VM_MAX_SWAP_FILE_NUM            100
#define VM_SWAPFILE_DELAYED_TRIM_MAX    128

#define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
#define VM_SWAP_SHOULD_PIN(_size)       (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
#define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)

#endif /* !XNU_TARGET_OS_OSX */

#define VM_SWAP_SHOULD_RECLAIM()        (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
#define VM_SWAP_SHOULD_ABORT_RECLAIM()  (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)

#define VM_SWAP_BUSY()  (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)


#if CHECKSUM_THE_SWAP
extern unsigned int hash_string(char *cp, int len);
#endif

#if RECORD_THE_COMPRESSED_DATA
boolean_t       c_compressed_record_init_done = FALSE;  /* was the record file opened? */
int             c_compressed_record_write_error = 0;
struct vnode    *c_compressed_record_vp = NULL;         /* the file opened for record write */
uint64_t        c_compressed_record_file_offset = 0;    /* next write offset */
void    c_compressed_record_init(void);
void    c_compressed_record_write(char *, int);
#endif

extern void                     vm_pageout_io_throttle(void);

static struct swapfile *vm_swapfile_for_handle(uint64_t);

/*
 * Called with the vm_swap_data_lock held.
 */

static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)
{
	uint64_t                file_offset = 0;
	unsigned int            swapfile_index = 0;
	struct swapfile*        swf = NULL;

	file_offset = (f_offset & SWAP_SLOT_MASK);
	swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);

	swf = (struct swapfile*) queue_first(&swf_global_queue);

	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
		if (swapfile_index == swf->swp_index) {
			break;
		}

		swf = (struct swapfile*) queue_next(&swf->swp_queue);
	}

	if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
		swf = NULL;
	}

	return swf;
}

#if ENCRYPTED_SWAP

#include <libkern/crypto/aesxts.h>

extern int cc_rand_generate(void *, size_t);     /* from libkern/cyrpto/rand.h> */

boolean_t       swap_crypt_initialized;
void            swap_crypt_initialize(void);

symmetric_xts   xts_modectx;
uint32_t        swap_crypt_key1[8];   /* big enough for a 256 bit random key */
uint32_t        swap_crypt_key2[8];   /* big enough for a 256 bit random key */

#if DEVELOPMENT || DEBUG
boolean_t       swap_crypt_xts_tested = FALSE;
unsigned char   swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
unsigned char   swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
unsigned char   swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
#endif /* DEVELOPMENT || DEBUG */

unsigned long   vm_page_encrypt_counter;
unsigned long   vm_page_decrypt_counter;


void
swap_crypt_initialize(void)
{
	uint8_t  *enckey1, *enckey2;
	int      keylen1, keylen2;
	int      error;

	assert(swap_crypt_initialized == FALSE);

	keylen1 = sizeof(swap_crypt_key1);
	enckey1 = (uint8_t *)&swap_crypt_key1;
	keylen2 = sizeof(swap_crypt_key2);
	enckey2 = (uint8_t *)&swap_crypt_key2;

	error = cc_rand_generate((void *)enckey1, keylen1);
	assert(!error);

	error = cc_rand_generate((void *)enckey2, keylen2);
	assert(!error);

	error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
	assert(!error);

	swap_crypt_initialized = TRUE;

#if DEVELOPMENT || DEBUG
	uint8_t *encptr;
	uint8_t *decptr;
	uint8_t *refptr;
	uint8_t *iv;
	uint64_t ivnum[2];
	int size = 0;
	int i    = 0;
	int rc   = 0;

	assert(swap_crypt_xts_tested == FALSE);

	/*
	 * Validate the encryption algorithms.
	 *
	 * First initialize the test data.
	 */
	for (i = 0; i < 4096; i++) {
		swap_crypt_test_page_ref[i] = (char) i;
	}
	ivnum[0] = (uint64_t)0xaa;
	ivnum[1] = 0;
	iv = (uint8_t *)ivnum;

	refptr = (uint8_t *)swap_crypt_test_page_ref;
	encptr = (uint8_t *)swap_crypt_test_page_encrypt;
	decptr = (uint8_t *)swap_crypt_test_page_decrypt;
	size = 4096;

	/* encrypt */
	rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
	assert(!rc);

	/* compare result with original - should NOT match */
	for (i = 0; i < 4096; i++) {
		if (swap_crypt_test_page_encrypt[i] !=
		    swap_crypt_test_page_ref[i]) {
			break;
		}
	}
	assert(i != 4096);

	/* decrypt */
	rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
	assert(!rc);

	/* compare result with original */
	for (i = 0; i < 4096; i++) {
		if (swap_crypt_test_page_decrypt[i] !=
		    swap_crypt_test_page_ref[i]) {
			panic("encryption test failed");
		}
	}
	/* encrypt in place */
	rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
	assert(!rc);

	/* decrypt in place */
	rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
	assert(!rc);

	for (i = 0; i < 4096; i++) {
		if (swap_crypt_test_page_decrypt[i] !=
		    swap_crypt_test_page_ref[i]) {
			panic("in place encryption test failed");
		}
	}
	swap_crypt_xts_tested = TRUE;
#endif /* DEVELOPMENT || DEBUG */
}


void
vm_swap_encrypt(c_segment_t c_seg)
{
	uint8_t *ptr;
	uint8_t *iv;
	uint64_t ivnum[2];
	int size = 0;
	int rc   = 0;

	if (swap_crypt_initialized == FALSE) {
		swap_crypt_initialize();
	}

	/*
	 * Data stored in the compressor should never need to be faulted in.
	 * Make sure pages storing data that we're encrypting cannot
	 * be stolen out from under us in the off chance that the mapping
	 * gets disconnected while we're actively encrypting.
	 */
	PAGE_REPLACEMENT_DISALLOWED(TRUE);
#if DEVELOPMENT || DEBUG
	C_SEG_MAKE_WRITEABLE(c_seg);
#endif
	ptr = (uint8_t *)c_seg->c_store.c_buffer;
	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

	ivnum[0] = (uint64_t)c_seg;
	ivnum[1] = 0;
	iv = (uint8_t *)ivnum;

	rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
	assert(!rc);

	vm_page_encrypt_counter += (size / PAGE_SIZE_64);

#if DEVELOPMENT || DEBUG
	C_SEG_WRITE_PROTECT(c_seg);
#endif
	PAGE_REPLACEMENT_DISALLOWED(FALSE);
}

void
vm_swap_decrypt(c_segment_t c_seg, bool disallow_page_replacement)
{
	uint8_t *ptr;
	uint8_t *iv;
	uint64_t ivnum[2];
	int size = 0;
	int rc   = 0;

	assert(swap_crypt_initialized);

	/*
	 * See comment in vm_swap_encrypt().
	 * The master lock may already be held, though, which is why we don't do
	 * PAGE_REPLACEMENT_DISALLOWED(TRUE) and do a try_lock instead.
	 */
	if (disallow_page_replacement) {
		PAGE_REPLACEMENT_DISALLOWED(TRUE);
	}

#if DEVELOPMENT || DEBUG
	C_SEG_MAKE_WRITEABLE(c_seg);
#endif
	ptr = (uint8_t *)c_seg->c_store.c_buffer;
	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

	ivnum[0] = (uint64_t)c_seg;
	ivnum[1] = 0;
	iv = (uint8_t *)ivnum;

	rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
	assert(!rc);

	vm_page_decrypt_counter += (size / PAGE_SIZE_64);

#if DEVELOPMENT || DEBUG
	C_SEG_WRITE_PROTECT(c_seg);
#endif
	if (disallow_page_replacement) {
		PAGE_REPLACEMENT_DISALLOWED(FALSE);
	}
}
#endif /* ENCRYPTED_SWAP */

uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
extern bool memorystatus_swap_all_apps;

void
vm_compressor_swap_init_swap_file_limit(void)
{
	vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
#if CONFIG_JETSAM
	if (memorystatus_swap_all_apps) {
		if (vm_swap_volume_capacity == 0) {
			/*
			 * Early in boot we don't know the swap volume capacity.
			 * That's fine. Reserve space for the maximum config
			 * and we'll lower this later in boot once we have the capacity.
			 */
			vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
		} else {
			static uint64_t kFixedPointFactor = 100;
			/*
			 * Scale the max number of swap files linearly.
			 * But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
			 */
			vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
			    * VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
			vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
			vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
		}
	}
#endif /* CONFIG_JETSAM */
#if DEVELOPMENT || DEBUG
	typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
	if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
		if (parsed_vm_max_num_swap_files > 0) {
			vm_num_swap_files_config = parsed_vm_max_num_swap_files;
		} else {
			printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
		}
	}
#endif
	printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
}

int vm_swap_enabled = 0;
void
vm_compressor_swap_init(void)
{
	thread_t        thread = NULL;

	queue_init(&swf_global_queue);

#if !XNU_TARGET_OS_OSX
	/*
	 * dummy value until the swap file gets created
	 * when we drive the first c_segment_t to the
	 * swapout queue... at that time we will
	 * know the true size we have to work with
	 */
	c_overage_swapped_limit = 16;
#endif /* !XNU_TARGET_OS_OSX */

	compressed_swap_chunk_size = c_seg_bufsize;
	vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
	swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
	swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);

	if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
		panic("vm_swapout_thread: create failed");
	}
	thread_set_thread_name(thread, "VM_swapout");
	vm_swapout_thread_id = thread->thread_id;
	thread_deallocate(thread);

	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
		panic("vm_swapfile_create_thread: create failed");
	}
	thread_set_thread_name(thread, "VM_swapfile_create");
	thread_deallocate(thread);

	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
	    BASEPRI_VM, &thread) != KERN_SUCCESS) {
		panic("vm_swapfile_gc_thread: create failed");
	}
	thread_set_thread_name(thread, "VM_swapfile_gc");
	/*
	 * Swapfile garbage collection will need to allocate memory
	 * to complete its swap reclaim and in-memory compaction.
	 * So allow it to dip into the reserved VM page pool.
	 */
	thread_lock(thread);
	thread->options |= TH_OPT_VMPRIV;
	thread_unlock(thread);
	thread_deallocate(thread);
	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
	    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
	proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
	    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

	vm_swap_enabled = 1;
	printf("VM Swap Subsystem is ON\n");
}


#if RECORD_THE_COMPRESSED_DATA

void
c_compressed_record_init()
{
	if (c_compressed_record_init_done == FALSE) {
		vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
		c_compressed_record_init_done = TRUE;
	}
}

void
c_compressed_record_write(char *buf, int size)
{
	if (c_compressed_record_write_error == 0) {
		c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
		c_compressed_record_file_offset += size;
	}
}
#endif


int             compaction_swapper_inited = 0;

void
vm_compaction_swapper_do_init(void)
{
	struct  vnode *vp;
	char    *pathname;
	int     namelen;

	if (compaction_swapper_inited) {
		return;
	}

	if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
		compaction_swapper_inited = 1;
		return;
	}
	lck_mtx_lock(&vm_swap_data_lock);

	if (!compaction_swapper_inited) {
		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
		pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
		snprintf(pathname, namelen, "%s%d", swapfilename, 0);

		vm_swapfile_open(pathname, &vp);

		if (vp) {
			if (vnode_pager_isSSD(vp) == FALSE) {
				/*
				 * swap files live on an HDD, so let's make sure to start swapping
				 * much earlier since we're not worried about SSD write-wear and
				 * we have so little write bandwidth to work with
				 * these values were derived expermentially by running the performance
				 * teams stock test for evaluating HDD performance against various
				 * combinations and looking and comparing overall results.
				 * Note that the > relationship between these 4 values must be maintained
				 */
				if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
					vm_compressor_minorcompact_threshold_divisor = 15;
				}
				if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
					vm_compressor_majorcompact_threshold_divisor = 18;
				}
				if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
					vm_compressor_unthrottle_threshold_divisor = 24;
				}
				if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
					vm_compressor_catchup_threshold_divisor = 30;
				}
			}
#if XNU_TARGET_OS_OSX
			vnode_setswapmount(vp);
			vm_swappin_avail = vnode_getswappin_avail(vp);

			if (vm_swappin_avail) {
				vm_swappin_enabled = TRUE;
			}
#endif /* XNU_TARGET_OS_OSX */
			vm_swapfile_close((uint64_t)pathname, vp);
		}
		kfree_data(pathname, namelen);

		compaction_swapper_inited = 1;
	}
	lck_mtx_unlock(&vm_swap_data_lock);
}


void
vm_swap_consider_defragmenting(int flags)
{
	boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
	boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);

	if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
	    (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
		if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
			lck_mtx_lock(&vm_swap_data_lock);

			if (force_defrag) {
				vm_swap_force_defrag = TRUE;
			}

			if (force_reclaim) {
				vm_swap_force_reclaim = TRUE;
			}

			if (!vm_swapfile_gc_thread_running) {
				thread_wakeup((event_t) &vm_swapfile_gc_needed);
			}

			lck_mtx_unlock(&vm_swap_data_lock);
		}
	}
}


int vm_swap_defragment_yielded = 0;
int vm_swap_defragment_swapin = 0;
int vm_swap_defragment_free = 0;
int vm_swap_defragment_busy = 0;

static void
vm_swap_defragment()
{
	c_segment_t     c_seg;

	/*
	 * have to grab the master lock w/o holding
	 * any locks in spin mode
	 */
	PAGE_REPLACEMENT_DISALLOWED(TRUE);

	lck_mtx_lock_spin_always(c_list_lock);

	while (!queue_empty(&c_swappedout_sparse_list_head)) {
		if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
			vm_swap_defragment_yielded++;
			break;
		}
		c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);

		lck_mtx_lock_spin_always(&c_seg->c_lock);

		assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);

		if (c_seg->c_busy) {
			lck_mtx_unlock_always(c_list_lock);

			PAGE_REPLACEMENT_DISALLOWED(FALSE);
			/*
			 * c_seg_wait_on_busy consumes c_seg->c_lock
			 */
			c_seg_wait_on_busy(c_seg);

			PAGE_REPLACEMENT_DISALLOWED(TRUE);

			lck_mtx_lock_spin_always(c_list_lock);

			vm_swap_defragment_busy++;
			continue;
		}
		if (c_seg->c_bytes_used == 0) {
			/*
			 * c_seg_free_locked consumes the c_list_lock
			 * and c_seg->c_lock
			 */
			C_SEG_BUSY(c_seg);
			c_seg_free_locked(c_seg);

			vm_swap_defragment_free++;
		} else {
			lck_mtx_unlock_always(c_list_lock);

#if CONFIG_FREEZE
			if (freezer_incore_cseg_acct) {
				/*
				 * TODO(jason): These two are tricky because they're pre-emptive jetsams.
				 * The system is not unhealthy, but we know that it's about to become unhealthy once
				 * we do this swapin.
				 * So we're waking up the memorystatus thread to make space
				 * (hopefully) before this segment comes in.
				 *
				 * I think the compressor_backing_store needs to keep track of
				 * two new globals that will track the number of segments
				 * being swapped in due to defrag and the number of slots used
				 * in those segments.
				 * Then the health check below can be called from the memorystatus
				 * thread.
				 */
				if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
				}

				uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
				if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
					memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
				}
			}
#endif /* CONFIG_FREEZE */
			if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
				lck_mtx_unlock_always(&c_seg->c_lock);
				vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
			}

			vm_swap_defragment_swapin++;
		}
		PAGE_REPLACEMENT_DISALLOWED(FALSE);

		vm_pageout_io_throttle();

		/*
		 * because write waiters have privilege over readers,
		 * dropping and immediately retaking the master lock will
		 * still allow any thread waiting to acquire the
		 * master lock exclusively an opportunity to take it
		 */
		PAGE_REPLACEMENT_DISALLOWED(TRUE);

		lck_mtx_lock_spin_always(c_list_lock);
	}
	lck_mtx_unlock_always(c_list_lock);

	PAGE_REPLACEMENT_DISALLOWED(FALSE);
}

TUNABLE(uint64_t, vm_swapfile_creation_delay_ns, "vm_swapfile_creation_delay_ns", 15 * NSEC_PER_SEC);

static inline bool
vm_swapfile_should_create(uint64_t now)
{
	uint64_t delta_failed_creation_ns;
	absolutetime_to_nanoseconds(now - vm_swapfile_last_failed_to_create_ts, &delta_failed_creation_ns);

	return (vm_num_swap_files < vm_num_swap_files_config) &&
	       ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) &&
	       (delta_failed_creation_ns > vm_swapfile_creation_delay_ns);
}

bool vm_swapfile_create_thread_inited = false;

static void
vm_swapfile_create_thread(void)
{
	uint64_t now;

	if (!vm_swapfile_create_thread_inited) {
#if CONFIG_THREAD_GROUPS
		thread_group_vm_add();
#endif /* CONFIG_THREAD_GROUPS */
		current_thread()->options |= TH_OPT_VMPRIV;

		vm_swapfile_create_thread_inited = true;
	}

	vm_swapfile_create_thread_awakened++;
	vm_swapfile_create_thread_running = 1;

	while (TRUE) {
		/*
		 * walk through the list of swap files
		 * and do the delayed frees/trims for
		 * any swap file whose count of delayed
		 * frees is above the batch limit
		 */
		vm_swap_handle_delayed_trims(FALSE);

		lck_mtx_lock(&vm_swap_data_lock);

		if (hibernate_in_progress_with_pinned_swap == TRUE) {
			break;
		}

		if (compressor_store_stop_compaction == TRUE) {
			break;
		}

		now = mach_absolute_time();

		if (!vm_swapfile_should_create(now)) {
			break;
		}

		lck_mtx_unlock(&vm_swap_data_lock);

		if (vm_swap_create_file() == FALSE) {
			vm_swapfile_last_failed_to_create_ts = now;
			HIBLOG("low swap: failed to create swapfile\n");
		} else {
			vm_swapfile_last_successful_create_ts = now;
		}
	}
	vm_swapfile_create_thread_running = 0;

	if (hibernate_in_progress_with_pinned_swap == TRUE) {
		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
	}

	if (compressor_store_stop_compaction == TRUE) {
		thread_wakeup((event_t)&compressor_store_stop_compaction);
	}

	assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);

	lck_mtx_unlock(&vm_swap_data_lock);

	thread_block((thread_continue_t)vm_swapfile_create_thread);

	/* NOTREACHED */
}


#if HIBERNATION

kern_return_t
hibernate_pin_swap(boolean_t start)
{
	vm_compaction_swapper_do_init();

	if (start == FALSE) {
		lck_mtx_lock(&vm_swap_data_lock);
		hibernate_in_progress_with_pinned_swap = FALSE;
		lck_mtx_unlock(&vm_swap_data_lock);

		return KERN_SUCCESS;
	}
	if (vm_swappin_enabled == FALSE) {
		return KERN_SUCCESS;
	}

	lck_mtx_lock(&vm_swap_data_lock);

	hibernate_in_progress_with_pinned_swap = TRUE;

	while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
		assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);

		lck_mtx_unlock(&vm_swap_data_lock);

		thread_block(THREAD_CONTINUE_NULL);

		lck_mtx_lock(&vm_swap_data_lock);
	}
	if (vm_num_swap_files > vm_num_pinned_swap_files) {
		hibernate_in_progress_with_pinned_swap = FALSE;
		lck_mtx_unlock(&vm_swap_data_lock);

		HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
		    vm_num_swap_files, vm_num_pinned_swap_files);
		return KERN_FAILURE;
	}
	lck_mtx_unlock(&vm_swap_data_lock);

	while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
		if (vm_swap_create_file() == FALSE) {
			break;
		}
	}
	return KERN_SUCCESS;
}
#endif
bool vm_swapfile_gc_thread_inited = false;
static void
vm_swapfile_gc_thread(void)
{
	boolean_t       need_defragment;
	boolean_t       need_reclaim;

	if (!vm_swapfile_gc_thread_inited) {
#if CONFIG_THREAD_GROUPS
		thread_group_vm_add();
#endif /* CONFIG_THREAD_GROUPS */
		vm_swapfile_gc_thread_inited = true;
	}

	vm_swapfile_gc_thread_awakened++;
	vm_swapfile_gc_thread_running = 1;

	while (TRUE) {
		lck_mtx_lock(&vm_swap_data_lock);

		if (hibernate_in_progress_with_pinned_swap == TRUE) {
			break;
		}

		if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
			break;
		}

		need_defragment = FALSE;
		need_reclaim = FALSE;

		if (VM_SWAP_SHOULD_DEFRAGMENT()) {
			need_defragment = TRUE;
		}

		if (VM_SWAP_SHOULD_RECLAIM()) {
			need_defragment = TRUE;
			need_reclaim = TRUE;
		}
		if (need_defragment == FALSE && need_reclaim == FALSE) {
			break;
		}

		vm_swap_force_defrag = FALSE;
		vm_swap_force_reclaim = FALSE;

		lck_mtx_unlock(&vm_swap_data_lock);

		if (need_defragment == TRUE) {
			vm_swap_defragment();
		}
		if (need_reclaim == TRUE) {
			vm_swap_reclaim();
		}
	}
	vm_swapfile_gc_thread_running = 0;

	if (hibernate_in_progress_with_pinned_swap == TRUE) {
		thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
	}

	if (compressor_store_stop_compaction == TRUE) {
		thread_wakeup((event_t)&compressor_store_stop_compaction);
	}

	assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);

	lck_mtx_unlock(&vm_swap_data_lock);

	thread_block((thread_continue_t)vm_swapfile_gc_thread);

	/* NOTREACHED */
}


#define   VM_SWAPOUT_LIMIT_T2P  4
#define   VM_SWAPOUT_LIMIT_T1P  4
#define   VM_SWAPOUT_LIMIT_T0P  6
#define   VM_SWAPOUT_LIMIT_T0   8
#define   VM_SWAPOUT_LIMIT_MAX  8

#define   VM_SWAPOUT_START      0
#define   VM_SWAPOUT_T2_PASSIVE 1
#define   VM_SWAPOUT_T1_PASSIVE 2
#define   VM_SWAPOUT_T0_PASSIVE 3
#define   VM_SWAPOUT_T0         4

int vm_swapout_state = VM_SWAPOUT_START;
int vm_swapout_limit = 1;

int vm_swapper_entered_T0  = 0;
int vm_swapper_entered_T0P = 0;
int vm_swapper_entered_T1P = 0;
int vm_swapper_entered_T2P = 0;


static void
vm_swapout_thread_throttle_adjust(void)
{
	switch (vm_swapout_state) {
	case VM_SWAPOUT_START:

		vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
		vm_swapper_entered_T2P++;

		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
		    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
		proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
		    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
		vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
		vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;

		break;

	case VM_SWAPOUT_T2_PASSIVE:

		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
			vm_swapper_entered_T0P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;

			break;
		}
		if (swapout_target_age || hibernate_flushing == TRUE) {
			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
			vm_swapper_entered_T1P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
			vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
		}
		break;

	case VM_SWAPOUT_T1_PASSIVE:

		if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
			vm_swapper_entered_T0P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;

			break;
		}
		if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
			vm_swapper_entered_T2P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
		}
		break;

	case VM_SWAPOUT_T0_PASSIVE:

		if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
			vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
			vm_swapper_entered_T2P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
			vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;

			break;
		}
		if (SWAPPER_NEEDS_TO_CATCHUP()) {
			vm_swapper_entered_T0++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
			vm_swapout_state = VM_SWAPOUT_T0;
		}
		break;

	case VM_SWAPOUT_T0:

		if (SWAPPER_HAS_CAUGHTUP()) {
			vm_swapper_entered_T0P++;

			proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
			    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
			vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
			vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
		}
		break;
	}
}

int vm_swapout_found_empty = 0;

struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];

int vm_swapout_soc_busy = 0;
int vm_swapout_soc_done = 0;


static struct swapout_io_completion *
vm_swapout_find_free_soc(void)
{
	int      i;

	for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
		if (vm_swapout_ctx[i].swp_io_busy == 0) {
			return &vm_swapout_ctx[i];
		}
	}
	assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);

	return NULL;
}

static struct swapout_io_completion *
vm_swapout_find_done_soc(void)
{
	int      i;

	if (vm_swapout_soc_done) {
		for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
			if (vm_swapout_ctx[i].swp_io_done) {
				return &vm_swapout_ctx[i];
			}
		}
	}
	return NULL;
}

static void
vm_swapout_complete_soc(struct swapout_io_completion *soc)
{
	kern_return_t  kr;

	if (soc->swp_io_error) {
		kr = KERN_FAILURE;
	} else {
		kr = KERN_SUCCESS;
	}

	lck_mtx_unlock_always(c_list_lock);

	vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
	vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);

	lck_mtx_lock_spin_always(c_list_lock);

	soc->swp_io_done = 0;
	soc->swp_io_busy = 0;

	vm_swapout_soc_busy--;
	vm_swapout_soc_done--;
}

bool vm_swapout_thread_inited = false;
extern uint32_t c_donate_swapout_count;
#if CONFIG_JETSAM
bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
/*
 * swapout_sleep_threshold sets the percentage of the swapout threshold at which
 * the swap thread will stop processing the swapout queue.
 * By default this is 90 which means we will swap until the
 * swapout queue size is at 90% of the threshold to wake the swap thread.
 * By definition the queue  length must be >= 100% of the threshold when the.
 * swap thread is woken up. On development builds this can be adjusted with
 * the vm.swapout_sleep_threshold sysctl.
 */
uint32_t swapout_sleep_threshold = 90;
#endif /* CONFIG_JETSAM */
static bool
should_process_swapout_queue(const queue_head_t *swapout_list_head)
{
	bool process_queue = !queue_empty(swapout_list_head) &&
	    vm_swapout_soc_busy < vm_swapout_limit &&
	    !compressor_store_stop_compaction;
#if CONFIG_JETSAM
	if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
		process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
	}
#endif /* CONFIG_JETSAM */
	return process_queue;
}

void
vm_swapout_thread(void)
{
	uint32_t        size = 0;
	c_segment_t     c_seg = NULL;
	kern_return_t   kr = KERN_SUCCESS;
	struct swapout_io_completion *soc;
	queue_head_t    *swapout_list_head;
	bool            queues_empty = false;

	if (!vm_swapout_thread_inited) {
#if CONFIG_THREAD_GROUPS
		thread_group_vm_add();
#endif /* CONFIG_THREAD_GROUPS */
		current_thread()->options |= TH_OPT_VMPRIV;
		vm_swapout_thread_inited = true;
	}

	vm_swapout_thread_awakened++;

	lck_mtx_lock_spin_always(c_list_lock);

	swapout_list_head = &c_early_swapout_list_head;
	vm_swapout_thread_running = TRUE;
	os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
again:
	while (should_process_swapout_queue(swapout_list_head)) {
		c_seg = (c_segment_t)queue_first(swapout_list_head);

		lck_mtx_lock_spin_always(&c_seg->c_lock);

		assert(c_seg->c_state == C_ON_SWAPOUT_Q);

		if (c_seg->c_busy) {
			lck_mtx_unlock_always(c_list_lock);

			c_seg_wait_on_busy(c_seg);

			lck_mtx_lock_spin_always(c_list_lock);

			continue;
		}
		vm_swapout_thread_processed_segments++;

		size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

		if (size == 0) {
			assert(c_seg->c_bytes_used == 0);

			/*
			 * c_seg_free_locked will drop the c_list_lock and
			 * the c_seg->c_lock.
			 */
			C_SEG_BUSY(c_seg);
			c_seg_free_locked(c_seg);
			c_seg = NULL;

			vm_swapout_found_empty++;
			goto c_seg_is_empty;
		}
		C_SEG_BUSY(c_seg);
		c_seg->c_busy_swapping = 1;

		c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);

		lck_mtx_unlock_always(c_list_lock);
		lck_mtx_unlock_always(&c_seg->c_lock);

#if CHECKSUM_THE_SWAP
		c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
		c_seg->cseg_swap_size = size;
#endif /* CHECKSUM_THE_SWAP */

#if ENCRYPTED_SWAP
		vm_swap_encrypt(c_seg);
#endif /* ENCRYPTED_SWAP */

		soc = vm_swapout_find_free_soc();
		assert(soc);

		soc->swp_upl_ctx.io_context = (void *)soc;
		soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
		soc->swp_upl_ctx.io_error = 0;

		kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);

		if (kr != KERN_SUCCESS) {
			if (soc->swp_io_done) {
				lck_mtx_lock_spin_always(c_list_lock);

				soc->swp_io_done = 0;
				vm_swapout_soc_done--;

				lck_mtx_unlock_always(c_list_lock);
			}
			vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
		} else {
			soc->swp_io_busy = 1;
			vm_swapout_soc_busy++;
		}

c_seg_is_empty:
		if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
			vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
		}

		lck_mtx_lock_spin_always(c_list_lock);

		while ((soc = vm_swapout_find_done_soc())) {
			vm_swapout_complete_soc(soc);
		}
		lck_mtx_unlock_always(c_list_lock);

		vm_swapout_thread_throttle_adjust();

		lck_mtx_lock_spin_always(c_list_lock);
	}
	while ((soc = vm_swapout_find_done_soc())) {
		vm_swapout_complete_soc(soc);
	}
	lck_mtx_unlock_always(c_list_lock);

	vm_pageout_io_throttle();

	lck_mtx_lock_spin_always(c_list_lock);

	/*
	 * Recheck if we have some c_segs to wakeup
	 * post throttle. And, check to see if we
	 * have any more swapouts needed.
	 */
	if (vm_swapout_soc_done) {
		goto again;
	}

#if XNU_TARGET_OS_OSX
	queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
#else /* XNU_TARGET_OS_OSX */
	queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
#endif /* XNU_TARGET_OS_OSX */

	if (!queues_empty) {
		swapout_list_head = NULL;
		if (!queue_empty(&c_early_swapout_list_head)) {
			swapout_list_head = &c_early_swapout_list_head;
		} else {
#if XNU_TARGET_OS_OSX
			/*
			 * On macOS we _always_ processs all swapout queues.
			 */
			if (!queue_empty(&c_regular_swapout_list_head)) {
				swapout_list_head = &c_regular_swapout_list_head;
			} else {
				swapout_list_head = &c_late_swapout_list_head;
			}
#else /* XNU_TARGET_OS_OSX */
			/*
			 * On non-macOS swap-capable platforms, we might want to
			 * processs just the early queue (Freezer) or process both
			 * early and late queues (app swap). We processed the early
			 * queue up above. The late Q will only be processed if the
			 * checks in should_process_swapout_queue give the go-ahead.
			 */
			swapout_list_head = &c_late_swapout_list_head;
#endif /* XNU_TARGET_OS_OSX */
		}
		if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
			goto again;
		}
	}

	assert_wait((event_t)&vm_swapout_thread, THREAD_UNINT);

	vm_swapout_thread_running = FALSE;

	lck_mtx_unlock_always(c_list_lock);

	thread_block((thread_continue_t)vm_swapout_thread);

	/* NOTREACHED */
}


void
vm_swapout_iodone(void *io_context, int error)
{
	struct swapout_io_completion *soc;

	soc = (struct swapout_io_completion *)io_context;

	lck_mtx_lock_spin_always(c_list_lock);

	soc->swp_io_done = 1;
	soc->swp_io_error = error;
	vm_swapout_soc_done++;

	if (!vm_swapout_thread_running) {
		thread_wakeup((event_t)&vm_swapout_thread);
	}

	lck_mtx_unlock_always(c_list_lock);
}


static void
vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
{
	PAGE_REPLACEMENT_DISALLOWED(TRUE);

	if (kr == KERN_SUCCESS) {
		kernel_memory_depopulate((vm_offset_t)c_seg->c_store.c_buffer, size,
		    KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
	}
#if ENCRYPTED_SWAP
	else {
		vm_swap_decrypt(c_seg, false);
	}
#endif /* ENCRYPTED_SWAP */
	lck_mtx_lock_spin_always(c_list_lock);
	lck_mtx_lock_spin_always(&c_seg->c_lock);

	if (kr == KERN_SUCCESS) {
		int             new_state = C_ON_SWAPPEDOUT_Q;
		boolean_t       insert_head = FALSE;

		if (hibernate_flushing == TRUE) {
			if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
			    c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
				insert_head = TRUE;
			}
		} else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
			new_state = C_ON_SWAPPEDOUTSPARSE_Q;
		}

		c_seg_switch_state(c_seg, new_state, insert_head);

		c_seg->c_store.c_swap_handle = f_offset;

		counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
		__assert_only unsigned int new_swapped_count = os_atomic_add(
			&vm_page_swapped_count, c_seg->c_slots_used, relaxed);
		/* Detect overflow */
		assert3u(new_swapped_count, >=, c_seg->c_slots_used);

		c_seg->c_swappedin = false;

		if (c_seg->c_bytes_used) {
			os_atomic_sub(&compressor_bytes_used, c_seg->c_bytes_used, relaxed);
		}

#if CONFIG_FREEZE
		/*
		 * Successful swapout. Decrement the in-core compressed pages count.
		 */
		os_atomic_sub(&c_segment_pages_compressed_incore, c_seg->c_slots_used, relaxed);
		assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
		if (c_seg->c_has_donated_pages) {
			os_atomic_sub(&c_segment_pages_compressed_incore_late_swapout, (c_seg->c_slots_used), relaxed);
		}
#endif /* CONFIG_FREEZE */
	} else {
		if (c_seg->c_overage_swap == TRUE) {
			c_seg->c_overage_swap = FALSE;
			c_overage_swapped_count--;
		}

#if CONFIG_FREEZE
		if (c_seg->c_has_freezer_pages) {
			if (c_seg->c_task_owner) {
				c_seg_update_task_owner(c_seg, NULL);
			}
			/*
			 * We failed to swapout a frozen cseg. We need
			 * to put it back in the queues, specifically the
			 * AGE_Q. So clear the donated bit otherwise it'll
			 * land on the swapped_in Q.
			 */
			c_seg->c_has_donated_pages = 0;
			c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
		} else
#endif /* CONFIG_FREEZE */
		{
			if (c_seg->c_has_donated_pages) {
				c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
			} else {
				c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
			}
		}

		if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
			c_seg_need_delayed_compaction(c_seg, TRUE);
		}
	}
	assert(c_seg->c_busy_swapping);
	assert(c_seg->c_busy);

	c_seg->c_busy_swapping = 0;
	lck_mtx_unlock_always(c_list_lock);

	C_SEG_WAKEUP_DONE(c_seg);
	lck_mtx_unlock_always(&c_seg->c_lock);

	PAGE_REPLACEMENT_DISALLOWED(FALSE);
}


boolean_t
vm_swap_create_file()
{
	uint64_t        size = 0;
	int             namelen = 0;
	boolean_t       swap_file_created = FALSE;
	boolean_t       swap_file_reuse = FALSE;
	boolean_t       swap_file_pin = FALSE;
	struct swapfile *swf = NULL;

	/*
	 * make sure we've got all the info we need
	 * to potentially pin a swap file... we could
	 * be swapping out due to hibernation w/o ever
	 * having run vm_pageout_scan, which is normally
	 * the trigger to do the init
	 */
	vm_compaction_swapper_do_init();

	/*
	 * Any swapfile structure ready for re-use?
	 */

	lck_mtx_lock(&vm_swap_data_lock);

	swf = (struct swapfile*) queue_first(&swf_global_queue);

	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
		if (swf->swp_flags == SWAP_REUSE) {
			swap_file_reuse = TRUE;
			break;
		}
		swf = (struct swapfile*) queue_next(&swf->swp_queue);
	}

	lck_mtx_unlock(&vm_swap_data_lock);

	if (swap_file_reuse == FALSE) {
		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;

		swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
		swf->swp_index = vm_num_swap_files + 1;
		swf->swp_pathlen = namelen;
		swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);

		snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
	}

	vm_swapfile_open(swf->swp_path, &swf->swp_vp);

	if (swf->swp_vp == NULL) {
		if (swap_file_reuse == FALSE) {
			kfree_data(swf->swp_path, swf->swp_pathlen);
			kfree_type(struct swapfile, swf);
		}
		return FALSE;
	}
	vm_swapfile_can_be_created = true;

	size = MAX_SWAP_FILE_SIZE;

	while (size >= MIN_SWAP_FILE_SIZE) {
		swap_file_pin = VM_SWAP_SHOULD_PIN(size);

		if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
			int num_bytes_for_bitmap = 0;

			swap_file_created = TRUE;

			swf->swp_size = size;
			swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
			swf->swp_nseginuse = 0;
			swf->swp_free_hint = 0;

			num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
			/*
			 * Allocate a bitmap that describes the
			 * number of segments held by this swapfile.
			 */
			swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
			    Z_WAITOK | Z_ZERO);

			swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
			    Z_WAITOK | Z_ZERO);

			/*
			 * passing a NULL trim_list into vnode_trim_list
			 * will return ENOTSUP if trim isn't supported
			 * and 0 if it is
			 */
			if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
				swp_trim_supported = TRUE;
			}

			lck_mtx_lock(&vm_swap_data_lock);

			swf->swp_flags = SWAP_READY;

			if (swap_file_reuse == FALSE) {
				queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
			}

			vm_num_swap_files++;

			vm_swapfile_total_segs_alloced += swf->swp_nsegs;
			if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
				vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
			}

			if (swap_file_pin == TRUE) {
				vm_num_pinned_swap_files++;
				swf->swp_flags |= SWAP_PINNED;
				vm_swappin_avail -= swf->swp_size;
			}

			lck_mtx_unlock(&vm_swap_data_lock);

			thread_wakeup((event_t) &vm_num_swap_files);
#if !XNU_TARGET_OS_OSX
			if (vm_num_swap_files == 1) {
				c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;

				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
					c_overage_swapped_limit /= 2;
				}
			}
#endif /* !XNU_TARGET_OS_OSX */
			break;
		} else {
			size = size / 2;
		}
	}
	if (swap_file_created == FALSE) {
		vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);

		swf->swp_vp = NULL;

		if (swap_file_reuse == FALSE) {
			kfree_data(swf->swp_path, swf->swp_pathlen);
			kfree_type(struct swapfile, swf);
		}
	}
	return swap_file_created;
}

extern void vnode_put(struct vnode* vp);
kern_return_t
vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
{
	struct swapfile *swf = NULL;
	uint64_t        file_offset = 0;
	int             retval = 0;

	assert(c_seg->c_store.c_buffer);

	lck_mtx_lock(&vm_swap_data_lock);

	swf = vm_swapfile_for_handle(f_offset);

	if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
		vm_swap_get_failures++;
		retval = 1;
		goto done;
	}
	swf->swp_io_count++;

	lck_mtx_unlock(&vm_swap_data_lock);

#if DEVELOPMENT || DEBUG
	C_SEG_MAKE_WRITEABLE(c_seg);
#endif
	file_offset = (f_offset & SWAP_SLOT_MASK);

	if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
		printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
	} else {
		retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
		vnode_put(swf->swp_vp);
	}

#if DEVELOPMENT || DEBUG
	C_SEG_WRITE_PROTECT(c_seg);
#endif
	if (retval == 0) {
		counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
	} else {
		vm_swap_get_failures++;
	}

	/*
	 * Free this slot in the swap structure.
	 */
	vm_swap_free(f_offset);

	lck_mtx_lock(&vm_swap_data_lock);
	swf->swp_io_count--;

	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
		swf->swp_flags &= ~SWAP_WANTED;
		thread_wakeup((event_t) &swf->swp_flags);
	}
done:
	lck_mtx_unlock(&vm_swap_data_lock);

	if (retval == 0) {
		return KERN_SUCCESS;
	} else {
		return KERN_FAILURE;
	}
}

kern_return_t
vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
{
	unsigned int    segidx = 0;
	struct swapfile *swf = NULL;
	uint64_t        file_offset = 0;
	uint64_t        swapfile_index = 0;
	unsigned int    byte_for_segidx = 0;
	unsigned int    offset_within_byte = 0;
	boolean_t       swf_eligible = FALSE;
	boolean_t       waiting = FALSE;
	boolean_t       retried = FALSE;
	int             error = 0;
	uint64_t        now;
	void            *upl_ctx = NULL;
	boolean_t       drop_iocount = FALSE;

	if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
		return KERN_FAILURE;
	}
retry:
	lck_mtx_lock(&vm_swap_data_lock);

	swf = (struct swapfile*) queue_first(&swf_global_queue);

	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
		segidx = swf->swp_free_hint;

		swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);

		if (swf_eligible) {
			while (segidx < swf->swp_nsegs) {
				byte_for_segidx = segidx >> 3;
				offset_within_byte = segidx % 8;

				if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
					segidx++;
					continue;
				}

				(swf->swp_bitmap)[byte_for_segidx] |= (uint8_t)(1 << offset_within_byte);

				file_offset = segidx * compressed_swap_chunk_size;
				swf->swp_nseginuse++;
				swf->swp_io_count++;
				swf->swp_csegs[segidx] = c_seg;

				swapfile_index = swf->swp_index;
				vm_swapfile_total_segs_used++;
				if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
					vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
				}

				now = mach_absolute_time();

				if (vm_swapfile_should_create(now) && !vm_swapfile_create_thread_running) {
					thread_wakeup((event_t) &vm_swapfile_create_needed);
				}

				lck_mtx_unlock(&vm_swap_data_lock);

				goto issue_io;
			}
		}
		swf = (struct swapfile*) queue_next(&swf->swp_queue);
	}
	assert(queue_end(&swf_global_queue, (queue_entry_t) swf));

	/*
	 * we've run out of swap segments, but may not
	 * be in a position to immediately create a new swap
	 * file if we've recently failed to create due to a lack
	 * of free space in the root filesystem... we'll try
	 * to kick that create off, but in any event we're going
	 * to take a breather (up to 1 second) so that we're not caught in a tight
	 * loop back in "vm_compressor_compact_and_swap" trying to stuff
	 * segments into swap files only to have them immediately put back
	 * on the c_age queue due to vm_swap_put failing.
	 *
	 * if we're doing these puts due to a hibernation flush,
	 * no need to block... setting hibernate_no_swapspace to TRUE,
	 * will cause "vm_compressor_compact_and_swap" to immediately abort
	 */
	now = mach_absolute_time();

	if (vm_swapfile_should_create(now)) {
		if (!vm_swapfile_create_thread_running) {
			thread_wakeup((event_t) &vm_swapfile_create_needed);
		}
		waiting = TRUE;
		assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
	} else {
		if (hibernate_flushing) {
			hibernate_no_swapspace = TRUE;
		}
	}

	lck_mtx_unlock(&vm_swap_data_lock);

	if (waiting == TRUE) {
		thread_block(THREAD_CONTINUE_NULL);

		if (retried == FALSE && hibernate_flushing == TRUE) {
			retried = TRUE;
			goto retry;
		}
	}
	vm_swap_put_failures_no_swap_file++;

	return KERN_FAILURE;

issue_io:
	assert(c_seg->c_busy_swapping);
	assert(c_seg->c_busy);
	assert(!c_seg->c_on_minorcompact_q);

	*f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;

	if (soc) {
		soc->swp_c_seg = c_seg;
		soc->swp_c_size = size;

		soc->swp_swf = swf;

		soc->swp_io_error = 0;
		soc->swp_io_done = 0;

		upl_ctx = (void *)&soc->swp_upl_ctx;
	}

	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
		printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
	} else {
		error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
		drop_iocount = TRUE;
	}

	if (error || upl_ctx == NULL) {
		return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
	}

	return KERN_SUCCESS;
}

kern_return_t
vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
{
	if (drop_iocount) {
		vnode_put(swf->swp_vp);
	}

	lck_mtx_lock(&vm_swap_data_lock);

	swf->swp_io_count--;

	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
		swf->swp_flags &= ~SWAP_WANTED;
		thread_wakeup((event_t) &swf->swp_flags);
	}
	lck_mtx_unlock(&vm_swap_data_lock);

	if (error) {
		vm_swap_free(*f_offset);
		vm_swap_put_failures++;

		return KERN_FAILURE;
	}
	return KERN_SUCCESS;
}


static void
vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
{
	uint64_t        file_offset = 0;
	unsigned int    segidx = 0;


	if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
		unsigned int byte_for_segidx = 0;
		unsigned int offset_within_byte = 0;

		file_offset = (f_offset & SWAP_SLOT_MASK);
		segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);

		byte_for_segidx = segidx >> 3;
		offset_within_byte = segidx % 8;

		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

			swf->swp_csegs[segidx] = NULL;

			swf->swp_nseginuse--;
			vm_swapfile_total_segs_used--;

			if (segidx < swf->swp_free_hint) {
				swf->swp_free_hint = segidx;
			}
		}
		if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
			thread_wakeup((event_t) &vm_swapfile_gc_needed);
		}
	}
}


uint32_t vm_swap_free_now_count = 0;
uint32_t vm_swap_free_delayed_count = 0;


void
vm_swap_free(uint64_t f_offset)
{
	struct swapfile *swf = NULL;
	struct trim_list *tl = NULL;
	uint64_t now;

	if (swp_trim_supported == TRUE) {
		tl = kalloc_type(struct trim_list, Z_WAITOK);
	}

	lck_mtx_lock(&vm_swap_data_lock);

	swf = vm_swapfile_for_handle(f_offset);

	if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
		if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
			/*
			 * don't delay the free if the underlying disk doesn't support
			 * trim, or we're in the midst of reclaiming this swap file since
			 * we don't want to move segments that are technically free
			 * but not yet handled by the delayed free mechanism
			 */
			vm_swap_free_now(swf, f_offset);

			vm_swap_free_now_count++;
			goto done;
		}
		tl->tl_offset = f_offset & SWAP_SLOT_MASK;
		tl->tl_length = compressed_swap_chunk_size;

		tl->tl_next = swf->swp_delayed_trim_list_head;
		swf->swp_delayed_trim_list_head = tl;
		swf->swp_delayed_trim_count++;
		tl = NULL;

		if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
			now = mach_absolute_time();

			if (now > dont_trim_until_ts) {
				thread_wakeup((event_t) &vm_swapfile_create_needed);
			}
		}
		vm_swap_free_delayed_count++;
	}
done:
	lck_mtx_unlock(&vm_swap_data_lock);

	if (tl != NULL) {
		kfree_type(struct trim_list, tl);
	}
}


static void
vm_swap_wait_on_trim_handling_in_progress()
{
	while (delayed_trim_handling_in_progress) {
		assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
		lck_mtx_unlock(&vm_swap_data_lock);

		thread_block(THREAD_CONTINUE_NULL);

		lck_mtx_lock(&vm_swap_data_lock);
	}
}


static void
vm_swap_handle_delayed_trims(boolean_t force_now)
{
	struct swapfile *swf = NULL;

	/*
	 * serialize the race between us and vm_swap_reclaim...
	 * if vm_swap_reclaim wins it will turn off SWAP_READY
	 * on the victim it has chosen... we can just skip over
	 * that file since vm_swap_reclaim will first process
	 * all of the delayed trims associated with it
	 */

	if (compressor_store_stop_compaction == TRUE) {
		return;
	}

	lck_mtx_lock(&vm_swap_data_lock);

	delayed_trim_handling_in_progress = true;

	lck_mtx_unlock(&vm_swap_data_lock);

	/*
	 * no need to hold the lock to walk the swf list since
	 * vm_swap_create (the only place where we add to this list)
	 * is run on the same thread as this function
	 * and vm_swap_reclaim doesn't remove items from this list
	 * instead marking them with SWAP_REUSE for future re-use
	 */
	swf = (struct swapfile*) queue_first(&swf_global_queue);

	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
		if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
			assert(!(swf->swp_flags & SWAP_RECLAIM));
			vm_swap_do_delayed_trim(swf);
		}
		swf = (struct swapfile*) queue_next(&swf->swp_queue);
	}
	lck_mtx_lock(&vm_swap_data_lock);

	delayed_trim_handling_in_progress = false;
	thread_wakeup((event_t) &delayed_trim_handling_in_progress);

	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
		thread_wakeup((event_t) &vm_swapfile_gc_needed);
	}

	lck_mtx_unlock(&vm_swap_data_lock);
}

static void
vm_swap_do_delayed_trim(struct swapfile *swf)
{
	struct trim_list *tl, *tl_head;
	int error;

	if (compressor_store_stop_compaction == TRUE) {
		return;
	}

	if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
		printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
		return;
	}

	lck_mtx_lock(&vm_swap_data_lock);

	tl_head = swf->swp_delayed_trim_list_head;
	swf->swp_delayed_trim_list_head = NULL;
	swf->swp_delayed_trim_count = 0;

	lck_mtx_unlock(&vm_swap_data_lock);

	vnode_trim_list(swf->swp_vp, tl_head, TRUE);

	(void) vnode_put(swf->swp_vp);

	while ((tl = tl_head) != NULL) {
		unsigned int    segidx = 0;
		unsigned int    byte_for_segidx = 0;
		unsigned int    offset_within_byte = 0;

		lck_mtx_lock(&vm_swap_data_lock);

		segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);

		byte_for_segidx = segidx >> 3;
		offset_within_byte = segidx % 8;

		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

			swf->swp_csegs[segidx] = NULL;

			swf->swp_nseginuse--;
			vm_swapfile_total_segs_used--;

			if (segidx < swf->swp_free_hint) {
				swf->swp_free_hint = segidx;
			}
		}
		lck_mtx_unlock(&vm_swap_data_lock);

		tl_head = tl->tl_next;

		kfree_type(struct trim_list, tl);
	}
}


void
vm_swap_flush()
{
	return;
}

int     vm_swap_reclaim_yielded = 0;

void
vm_swap_reclaim(void)
{
	vm_offset_t     addr = 0;
	unsigned int    segidx = 0;
	uint64_t        f_offset = 0;
	struct swapfile *swf = NULL;
	struct swapfile *smallest_swf = NULL;
	unsigned int    min_nsegs = 0;
	unsigned int    byte_for_segidx = 0;
	unsigned int    offset_within_byte = 0;
	uint32_t        c_size = 0;

	c_segment_t     c_seg = NULL;

	kmem_alloc(compressor_map, (vm_offset_t *)&addr, c_seg_bufsize,
	    KMA_NOFAIL | KMA_KOBJECT | KMA_DATA_SHARED, VM_KERN_MEMORY_COMPRESSOR);

	lck_mtx_lock(&vm_swap_data_lock);

	/*
	 * if we're running the swapfile list looking for
	 * candidates with delayed trims, we need to
	 * wait before making our decision concerning
	 * the swapfile we want to reclaim
	 */
	vm_swap_wait_on_trim_handling_in_progress();

	/*
	 * from here until we knock down the SWAP_READY bit,
	 * we need to remain behind the vm_swap_data_lock...
	 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
	 * will not consider this swapfile for processing
	 */
	swf = (struct swapfile*) queue_first(&swf_global_queue);
	min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
	smallest_swf = NULL;

	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
		if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
			smallest_swf = swf;
			min_nsegs = swf->swp_nseginuse;
		}
		swf = (struct swapfile*) queue_next(&swf->swp_queue);
	}

	if (smallest_swf == NULL) {
		goto done;
	}

	swf = smallest_swf;


	swf->swp_flags &= ~SWAP_READY;
	swf->swp_flags |= SWAP_RECLAIM;

	if (swf->swp_delayed_trim_count) {
		lck_mtx_unlock(&vm_swap_data_lock);

		vm_swap_do_delayed_trim(swf);

		lck_mtx_lock(&vm_swap_data_lock);
	}
	segidx = 0;

	while (segidx < swf->swp_nsegs) {
ReTry_for_cseg:
		/*
		 * Wait for outgoing I/Os.
		 */
		while (swf->swp_io_count) {
			swf->swp_flags |= SWAP_WANTED;

			assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
			lck_mtx_unlock(&vm_swap_data_lock);

			thread_block(THREAD_CONTINUE_NULL);

			lck_mtx_lock(&vm_swap_data_lock);
		}
		if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
			vm_swap_reclaim_yielded++;
			break;
		}

		byte_for_segidx = segidx >> 3;
		offset_within_byte = segidx % 8;

		if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
			segidx++;
			continue;
		}

		c_seg = swf->swp_csegs[segidx];
		assert(c_seg);

		lck_mtx_lock_spin_always(&c_seg->c_lock);

		if (c_seg->c_busy) {
			/*
			 * a swapped out c_segment in the process of being freed will remain in the
			 * busy state until after the vm_swap_free is called on it... vm_swap_free
			 * takes the vm_swap_data_lock, so can't change the swap state until after
			 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
			 * which will allow c_seg_free_locked to clear busy and wake up this thread...
			 * at that point, we re-look up the swap state which will now indicate that
			 * this c_segment no longer exists.
			 */
			c_seg->c_wanted = 1;

			assert_wait((event_t) (c_seg), THREAD_UNINT);
			lck_mtx_unlock_always(&c_seg->c_lock);

			lck_mtx_unlock(&vm_swap_data_lock);

			thread_block(THREAD_CONTINUE_NULL);

			lck_mtx_lock(&vm_swap_data_lock);

			goto ReTry_for_cseg;
		}
		(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

		f_offset = segidx * compressed_swap_chunk_size;

		assert(c_seg == swf->swp_csegs[segidx]);
		swf->swp_csegs[segidx] = NULL;
		swf->swp_nseginuse--;

		vm_swapfile_total_segs_used--;

		lck_mtx_unlock(&vm_swap_data_lock);

		assert(C_SEG_IS_ONDISK(c_seg));

		C_SEG_BUSY(c_seg);
		c_seg->c_busy_swapping = 1;
#if !CHECKSUM_THE_SWAP
		c_seg_trim_tail(c_seg);
#endif
		c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

		assert(c_size <= c_seg_bufsize && c_size);

		lck_mtx_unlock_always(&c_seg->c_lock);

		if (vnode_getwithref(swf->swp_vp)) {
			printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
			vm_swap_get_failures++;
			goto swap_io_failed;
		} else {
			if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
				/*
				 * reading the data back in failed, so convert c_seg
				 * to a swapped in c_segment that contains no data
				 */
				c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
				/*
				 * returns with c_busy_swapping cleared
				 */
				vnode_put(swf->swp_vp);
				vm_swap_get_failures++;
				goto swap_io_failed;
			}
			vnode_put(swf->swp_vp);
		}

		counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
		vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;

		if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
			vm_offset_t     c_buffer;

			/*
			 * the put failed, so convert c_seg to a fully swapped in c_segment
			 * with valid data
			 */
			c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);

			kernel_memory_populate(c_buffer, c_size,
			    KMA_NOFAIL | KMA_COMPRESSOR,
			    VM_KERN_MEMORY_COMPRESSOR);

			memcpy((char *)c_buffer, (char *)addr, c_size);

			c_seg->c_store.c_buffer = (int32_t *)c_buffer;
#if ENCRYPTED_SWAP
			vm_swap_decrypt(c_seg, true);
#endif /* ENCRYPTED_SWAP */
			c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
			/*
			 * returns with c_busy_swapping cleared
			 */
			OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);

			goto swap_io_failed;
		}
		counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);

		lck_mtx_lock_spin_always(&c_seg->c_lock);

		c_seg->c_swappedin = false;

		assert(C_SEG_IS_ONDISK(c_seg));
		/*
		 * The c_seg will now know about the new location on disk.
		 */
		c_seg->c_store.c_swap_handle = f_offset;

		assert(c_seg->c_busy_swapping);
		c_seg->c_busy_swapping = 0;
swap_io_failed:
		assert(c_seg->c_busy);
		C_SEG_WAKEUP_DONE(c_seg);

		lck_mtx_unlock_always(&c_seg->c_lock);
		lck_mtx_lock(&vm_swap_data_lock);
	}

	if (swf->swp_nseginuse) {
		swf->swp_flags &= ~SWAP_RECLAIM;
		swf->swp_flags |= SWAP_READY;

		goto done;
	}
	/*
	 * We don't remove this inactive swf from the queue.
	 * That way, we can re-use it when needed again and
	 * preserve the namespace. The delayed_trim processing
	 * is also dependent on us not removing swfs from the queue.
	 */
	//queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);

	vm_swapfile_total_segs_alloced -= swf->swp_nsegs;

	lck_mtx_unlock(&vm_swap_data_lock);

	vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);

	kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
	kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));

	lck_mtx_lock(&vm_swap_data_lock);

	if (swf->swp_flags & SWAP_PINNED) {
		vm_num_pinned_swap_files--;
		vm_swappin_avail += swf->swp_size;
	}

	swf->swp_vp = NULL;
	swf->swp_size = 0;
	swf->swp_free_hint = 0;
	swf->swp_nsegs = 0;
	swf->swp_flags = SWAP_REUSE;

	vm_num_swap_files--;

done:
	thread_wakeup((event_t) &swf->swp_flags);
	lck_mtx_unlock(&vm_swap_data_lock);

	kmem_free(compressor_map, (vm_offset_t) addr, c_seg_bufsize);
}


uint64_t
vm_swap_get_total_space(void)
{
	uint64_t total_space = 0;

	total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;

	return total_space;
}

uint64_t
vm_swap_get_used_space(void)
{
	uint64_t used_space = 0;

	used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;

	return used_space;
}

uint64_t
vm_swap_get_free_space(void)
{
	return vm_swap_get_total_space() - vm_swap_get_used_space();
}

uint64_t
vm_swap_get_max_configured_space(void)
{
	int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
	return num_swap_files * MAX_SWAP_FILE_SIZE;
}

bool
vm_swap_low_on_space(void)
{
	if (vm_num_swap_files == 0 &&
	    (!vm_swapfile_can_be_created || !SWAPPER_NEEDS_TO_UNTHROTTLE())) {
		/* We haven't started creating swap files yet */
		return false;
	}

	if (vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used <
	    (unsigned int)vm_swapfile_hiwater_segs / 8) {
		/*
		 * We're running low on swapfile segments
		 */
		if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
			/*
			 * We've recently failed to create a new swapfile, likely due to disk
			 * space exhaustion
			 */
			return true;
		}

		if (vm_num_swap_files == vm_num_swap_files_config) {
			/* We've reached the swapfile limit */
			return true;
		}
	}
	return false;
}

bool
vm_swap_out_of_space(void)
{
	if (vm_num_swap_files == 0 &&
	    (!vm_swapfile_can_be_created || !SWAPPER_NEEDS_TO_UNTHROTTLE())) {
		/* We haven't started creating swap files yet */
		return false;
	}

	if (vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used <
	    VM_SWAPOUT_LIMIT_MAX) {
		/*
		 * We have run out of swapfile segments
		 */
		if (vm_num_swap_files == vm_num_swap_files_config) {
			/* And we can't create any more swapfiles */
			return true;
		}
	}

	return false;
}

boolean_t
vm_swap_files_pinned(void)
{
	boolean_t result;

	if (vm_swappin_enabled == FALSE) {
		return TRUE;
	}

	result = (vm_num_pinned_swap_files == vm_num_swap_files);

	return result;
}

#if CONFIG_FREEZE
boolean_t
vm_swap_max_budget(uint64_t *freeze_daily_budget)
{
	boolean_t       use_device_value = FALSE;
	struct swapfile *swf = NULL;

	if (vm_num_swap_files) {
		lck_mtx_lock(&vm_swap_data_lock);

		swf = (struct swapfile*) queue_first(&swf_global_queue);

		if (swf) {
			while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
				if (swf->swp_flags == SWAP_READY) {
					assert(swf->swp_vp);

					if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
						use_device_value = TRUE;
					}
					break;
				}
				swf = (struct swapfile*) queue_next(&swf->swp_queue);
			}
		}

		lck_mtx_unlock(&vm_swap_data_lock);
	} else {
		/*
		 * This block is used for the initial budget value before any swap files
		 * are created. We create a temp swap file to get the budget.
		 */

		struct vnode *temp_vp = NULL;

		vm_swapfile_open(swapfilename, &temp_vp);

		if (temp_vp) {
			if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
				use_device_value = TRUE;
			}

			vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
			temp_vp = NULL;
		} else {
			*freeze_daily_budget = 0;
		}
	}

	return use_device_value;
}
#endif /* CONFIG_FREEZE */

void
vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
{
	lck_mtx_lock(&vm_swap_data_lock);

	*alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
	*used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;

	vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
	vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;

	lck_mtx_unlock(&vm_swap_data_lock);
}