xref: /xnu-12377.41.6/bsd/vm/vm_compressor_backing_file.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <stdint.h>
30 #include <sys/fcntl.h>
31 #include <sys/vnode_internal.h>
32 #include <sys/vnode.h>
33 #include <sys/kauth.h>
34 #include <sys/mount_internal.h>
35 #include <sys/buf_internal.h>
36 #include <kern/debug.h>
37 #include <kern/kalloc.h>
38 #include <sys/cprotect.h>
39 #include <sys/disk.h>
40 #include <vm/vm_protos_internal.h>
41 #include <vm/vm_pageout_xnu.h>
42 #include <sys/content_protection.h>
43 #include <vm/vm_ubc.h>
44 #include <vm/vm_compressor_backing_store_internal.h>
45 
46 void
vm_swapfile_open(const char * path,vnode_t * vp)47 vm_swapfile_open(const char *path, vnode_t *vp)
48 {
49 	int error = 0;
50 	vfs_context_t   ctx = vfs_context_kernel();
51 
52 	if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
53 		printf("Failed to open swap file %d\n", error);
54 		*vp = NULL;
55 		return;
56 	}
57 
58 	/*
59 	 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
60 	 * To avoid a race on the mount we only make this check after creating the
61 	 * vnode.
62 	 */
63 	if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
64 		vnode_put(*vp);
65 		vm_swapfile_close((uint64_t)path, *vp);
66 		*vp = NULL;
67 		return;
68 	}
69 
70 	vnode_put(*vp);
71 }
72 
73 uint64_t
vm_swapfile_get_blksize(vnode_t vp)74 vm_swapfile_get_blksize(vnode_t vp)
75 {
76 	return (uint64_t)vfs_devblocksize(vnode_mount(vp));
77 }
78 
79 uint64_t
vm_swapfile_get_transfer_size(vnode_t vp)80 vm_swapfile_get_transfer_size(vnode_t vp)
81 {
82 	return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
83 }
84 
85 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
86 
87 void
vm_swapfile_close(uint64_t path_addr,vnode_t vp)88 vm_swapfile_close(uint64_t path_addr, vnode_t vp)
89 {
90 	vfs_context_t context = vfs_context_kernel();
91 	int error;
92 
93 	vnode_getwithref(vp);
94 	vnode_close(vp, 0, context);
95 
96 	error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
97 	    UIO_SYSSPACE, 0);
98 
99 #if DEVELOPMENT || DEBUG
100 	if (error) {
101 		printf("%s : unlink of %s failed with error %d", __FUNCTION__,
102 		    (char *)path_addr, error);
103 	}
104 #endif
105 }
106 
107 int
vm_swapfile_preallocate(vnode_t vp,uint64_t * size,boolean_t * pin)108 vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
109 {
110 	int             error = 0;
111 	uint64_t        file_size = 0;
112 	vfs_context_t   ctx = NULL;
113 #if CONFIG_FREEZE
114 	struct vnode_attr va;
115 #endif /* CONFIG_FREEZE */
116 
117 	ctx = vfs_context_kernel();
118 
119 	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
120 
121 	if (error) {
122 		printf("vnode_setsize for swap files failed: %d\n", error);
123 		goto done;
124 	}
125 
126 	error = vnode_size(vp, (off_t*) &file_size, ctx);
127 
128 	if (error) {
129 		printf("vnode_size (new file) for swap file failed: %d\n", error);
130 		goto done;
131 	}
132 	assert(file_size == *size);
133 
134 	if (pin != NULL && *pin != FALSE) {
135 		error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
136 
137 		if (error) {
138 			printf("pin for swap files failed: %d,  file_size = %lld\n", error, file_size);
139 			/* this is not fatal, carry on with files wherever they landed */
140 			*pin = FALSE;
141 			error = 0;
142 		}
143 	}
144 
145 	vnode_lock_spin(vp);
146 	SET(vp->v_flag, VSWAP);
147 	vnode_unlock(vp);
148 
149 #if CONFIG_FREEZE
150 	VATTR_INIT(&va);
151 	VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
152 	error = VNOP_SETATTR(vp, &va, ctx);
153 
154 	if (error) {
155 		printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
156 		goto done;
157 	}
158 #endif /* CONFIG_FREEZE */
159 
160 done:
161 	return error;
162 }
163 
164 
165 int
vm_record_file_write(vnode_t vp,uint64_t offset,char * buf,int size)166 vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
167 {
168 	int error = 0;
169 	vfs_context_t ctx;
170 
171 	ctx = vfs_context_kernel();
172 
173 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
174 	    UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
175 
176 	return error;
177 }
178 
179 
180 
181 int
vm_swapfile_io(vnode_t vp,uint64_t offset,uint64_t start,int npages,int flags,void * upl_iodone)182 vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
183 {
184 	int error = 0;
185 	upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
186 #if 1
187 	kern_return_t   kr = KERN_SUCCESS;
188 	upl_t           upl = NULL;
189 	unsigned int    count = 0;
190 	upl_control_flags_t upl_create_flags = 0;
191 	int             upl_control_flags = 0;
192 	upl_size_t      upl_size = 0;
193 
194 	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
195 
196 	if (upl_iodone == NULL) {
197 		upl_control_flags = UPL_IOSYNC;
198 	}
199 
200 #if ENCRYPTED_SWAP
201 	upl_control_flags |= UPL_PAGING_ENCRYPTED;
202 #endif
203 
204 	if ((flags & SWAP_READ) == FALSE) {
205 		upl_create_flags |= UPL_COPYOUT_FROM;
206 	}
207 
208 	upl_size = io_size;
209 	kr = vm_map_create_upl( kernel_map,
210 	    start,
211 	    &upl_size,
212 	    &upl,
213 	    NULL,
214 	    &count,
215 	    &upl_create_flags,
216 	    VM_KERN_MEMORY_OSFMK);
217 
218 	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
219 		panic("vm_map_create_upl failed with %d", kr);
220 	}
221 
222 	if (flags & SWAP_READ) {
223 		vnode_pagein(vp,
224 		    upl,
225 		    0,
226 		    offset,
227 		    io_size,
228 		    upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
229 		    &error);
230 		if (error) {
231 #if DEBUG
232 			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
233 #else /* DEBUG */
234 			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
235 #endif /* DEBUG */
236 		}
237 	} else {
238 		upl_set_iodone(upl, upl_iodone);
239 
240 		vnode_pageout(vp,
241 		    upl,
242 		    0,
243 		    offset,
244 		    io_size,
245 		    upl_control_flags,
246 		    &error);
247 		if (error) {
248 #if DEBUG
249 			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
250 #else /* DEBUG */
251 			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
252 #endif /* DEBUG */
253 		}
254 	}
255 
256 	return error;
257 
258 #else /* 1 */
259 	vfs_context_t ctx;
260 	ctx = vfs_context_kernel();
261 
262 	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
263 	    UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
264 
265 	if (error) {
266 		printf("vn_rdwr: Swap I/O failed with %d\n", error);
267 	}
268 	return error;
269 #endif /* 1 */
270 }
271 
272 
273 #define MAX_BATCH_TO_TRIM       256
274 
275 #define ROUTE_ONLY              0x10            /* if corestorage is present, tell it to just pass */
276                                                 /* the DKIOUNMAP command through w/o acting on it */
277                                                 /* this is used by the compressed swap system to reclaim empty space */
278 
279 
280 u_int32_t
vnode_trim_list(vnode_t vp,struct trim_list * tl,boolean_t route_only)281 vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
282 {
283 	int             error = 0;
284 	int             trim_index = 0;
285 	u_int32_t       blocksize = 0;
286 	struct vnode    *devvp;
287 	dk_extent_t     *extents;
288 	dk_unmap_t      unmap;
289 	_dk_cs_unmap_t  cs_unmap;
290 
291 	if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
292 		return ENOTSUP;
293 	}
294 
295 	if (tl == NULL) {
296 		return 0;
297 	}
298 
299 	/*
300 	 * Get the underlying device vnode and physical block size
301 	 */
302 	devvp = vp->v_mount->mnt_devvp;
303 	blocksize = vp->v_mount->mnt_devblocksize;
304 
305 	extents = kalloc_data(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
306 
307 	if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
308 		memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
309 		cs_unmap.extents = extents;
310 
311 		if (route_only == TRUE) {
312 			cs_unmap.options = ROUTE_ONLY;
313 		}
314 	} else {
315 		memset(&unmap, 0, sizeof(dk_unmap_t));
316 		unmap.extents = extents;
317 	}
318 
319 	while (tl) {
320 		daddr64_t       io_blockno;     /* Block number corresponding to the start of the extent */
321 		size_t          io_bytecount;   /* Number of bytes in current extent for the specified range */
322 		size_t          trimmed;
323 		size_t          remaining_length;
324 		off_t           current_offset;
325 
326 		current_offset = tl->tl_offset;
327 		remaining_length = tl->tl_length;
328 		trimmed = 0;
329 
330 		/*
331 		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
332 		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
333 		 * the whole range or if we encounter an error.
334 		 */
335 		while (trimmed < tl->tl_length) {
336 			/*
337 			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
338 			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is
339 			 * broken into multiple extents, it must be called multiple times, increasing the offset
340 			 * in each call to ensure that the entire range is covered.
341 			 */
342 			error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
343 			    &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
344 
345 			if (error) {
346 				goto trim_exit;
347 			}
348 			if (io_blockno != -1) {
349 				extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
350 				extents[trim_index].length = io_bytecount;
351 
352 				trim_index++;
353 			}
354 			if (trim_index == MAX_BATCH_TO_TRIM) {
355 				if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
356 					cs_unmap.extentsCount = trim_index;
357 					error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
358 				} else {
359 					unmap.extentsCount = trim_index;
360 					error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
361 				}
362 				if (error) {
363 					goto trim_exit;
364 				}
365 				trim_index = 0;
366 			}
367 			trimmed += io_bytecount;
368 			current_offset += io_bytecount;
369 			remaining_length -= io_bytecount;
370 		}
371 		tl = tl->tl_next;
372 	}
373 	if (trim_index) {
374 		if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
375 			cs_unmap.extentsCount = trim_index;
376 			error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
377 		} else {
378 			unmap.extentsCount = trim_index;
379 			error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
380 		}
381 	}
382 trim_exit:
383 	kfree_data(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
384 
385 	return error;
386 }
387 
388 #if CONFIG_FREEZE
389 int
vm_swap_vol_get_budget(vnode_t vp,uint64_t * freeze_daily_budget)390 vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
391 {
392 	vnode_t         devvp = NULL;
393 	vfs_context_t   ctx = vfs_context_kernel();
394 	errno_t         err = 0;
395 
396 	err = vnode_getwithref(vp);
397 	if (err == 0) {
398 		if (vp->v_mount && vp->v_mount->mnt_devvp) {
399 			devvp = vp->v_mount->mnt_devvp;
400 			err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
401 		} else {
402 			err = ENODEV;
403 		}
404 		vnode_put(vp);
405 	}
406 
407 	return err;
408 }
409 #endif /* CONFIG_FREEZE */
410 
411 int
vm_swap_vol_get_capacity(const char * volume_name,uint64_t * capacity)412 vm_swap_vol_get_capacity(const char *volume_name, uint64_t *capacity)
413 {
414 	vfs_context_t   ctx = vfs_context_kernel();
415 	vnode_t vp = NULL, devvp = NULL;
416 	uint64_t block_size = 0;
417 	uint64_t block_count = 0;
418 	int error = 0;
419 	*capacity = 0;
420 
421 	if ((error = vnode_open(volume_name, FREAD, 0, 0, &vp, ctx))) {
422 		printf("Unable to open swap volume\n");
423 		return error;
424 	}
425 
426 	devvp = vp->v_mount->mnt_devvp;
427 	if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&block_size, 0, ctx))) {
428 		printf("Unable to get swap volume block size\n");
429 		goto out;
430 	}
431 	if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&block_count, 0, ctx))) {
432 		printf("Unable to get swap volume block count\n");
433 		goto out;
434 	}
435 
436 	*capacity = block_count * block_size;
437 out:
438 	error = vnode_close(vp, 0, ctx);
439 	return error;
440 }
441