xref: /xnu-8020.140.41/bsd/vm/vm_compressor_backing_file.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <stdint.h>
30 #include <sys/fcntl.h>
31 #include <sys/vnode_internal.h>
32 #include <sys/vnode.h>
33 #include <sys/kauth.h>
34 #include <sys/mount_internal.h>
35 #include <sys/buf_internal.h>
36 #include <kern/debug.h>
37 #include <kern/kalloc.h>
38 #include <sys/cprotect.h>
39 #include <sys/disk.h>
40 #include <vm/vm_protos.h>
41 #include <vm/vm_pageout.h>
42 #include <sys/content_protection.h>
43 
44 void vm_swapfile_open(const char *path, vnode_t *vp);
45 void vm_swapfile_close(uint64_t path, vnode_t vp);
46 int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin);
47 uint64_t vm_swapfile_get_blksize(vnode_t vp);
48 uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
49 int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *);
50 int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
51 
52 #if CONFIG_FREEZE
53 int vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget);
54 #endif /* CONFIG_FREEZE */
55 
56 
57 void
vm_swapfile_open(const char * path,vnode_t * vp)58 vm_swapfile_open(const char *path, vnode_t *vp)
59 {
60 	int error = 0;
61 	vfs_context_t   ctx = vfs_context_kernel();
62 
63 	if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
64 		printf("Failed to open swap file %d\n", error);
65 		*vp = NULL;
66 		return;
67 	}
68 
69 	/*
70 	 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
71 	 * To avoid a race on the mount we only make this check after creating the
72 	 * vnode.
73 	 */
74 	if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
75 		vnode_put(*vp);
76 		vm_swapfile_close((uint64_t)path, *vp);
77 		*vp = NULL;
78 		return;
79 	}
80 
81 	vnode_put(*vp);
82 }
83 
84 uint64_t
vm_swapfile_get_blksize(vnode_t vp)85 vm_swapfile_get_blksize(vnode_t vp)
86 {
87 	return (uint64_t)vfs_devblocksize(vnode_mount(vp));
88 }
89 
90 uint64_t
vm_swapfile_get_transfer_size(vnode_t vp)91 vm_swapfile_get_transfer_size(vnode_t vp)
92 {
93 	return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
94 }
95 
96 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
97 
98 void
vm_swapfile_close(uint64_t path_addr,vnode_t vp)99 vm_swapfile_close(uint64_t path_addr, vnode_t vp)
100 {
101 	vfs_context_t context = vfs_context_kernel();
102 	int error;
103 
104 	vnode_getwithref(vp);
105 	vnode_close(vp, 0, context);
106 
107 	error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
108 	    UIO_SYSSPACE, 0);
109 
110 #if DEVELOPMENT || DEBUG
111 	if (error) {
112 		printf("%s : unlink of %s failed with error %d", __FUNCTION__,
113 		    (char *)path_addr, error);
114 	}
115 #endif
116 }
117 
118 int
vm_swapfile_preallocate(vnode_t vp,uint64_t * size,boolean_t * pin)119 vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
120 {
121 	int             error = 0;
122 	uint64_t        file_size = 0;
123 	vfs_context_t   ctx = NULL;
124 #if CONFIG_FREEZE
125 	struct vnode_attr va;
126 #endif /* CONFIG_FREEZE */
127 
128 	ctx = vfs_context_kernel();
129 
130 	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
131 
132 	if (error) {
133 		printf("vnode_setsize for swap files failed: %d\n", error);
134 		goto done;
135 	}
136 
137 	error = vnode_size(vp, (off_t*) &file_size, ctx);
138 
139 	if (error) {
140 		printf("vnode_size (new file) for swap file failed: %d\n", error);
141 		goto done;
142 	}
143 	assert(file_size == *size);
144 
145 	if (pin != NULL && *pin != FALSE) {
146 		error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
147 
148 		if (error) {
149 			printf("pin for swap files failed: %d,  file_size = %lld\n", error, file_size);
150 			/* this is not fatal, carry on with files wherever they landed */
151 			*pin = FALSE;
152 			error = 0;
153 		}
154 	}
155 
156 	vnode_lock_spin(vp);
157 	SET(vp->v_flag, VSWAP);
158 	vnode_unlock(vp);
159 
160 #if CONFIG_FREEZE
161 	VATTR_INIT(&va);
162 	VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
163 	error = VNOP_SETATTR(vp, &va, ctx);
164 
165 	if (error) {
166 		printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
167 		goto done;
168 	}
169 #endif /* CONFIG_FREEZE */
170 
171 done:
172 	return error;
173 }
174 
175 
176 int
vm_record_file_write(vnode_t vp,uint64_t offset,char * buf,int size)177 vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
178 {
179 	int error = 0;
180 	vfs_context_t ctx;
181 
182 	ctx = vfs_context_kernel();
183 
184 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
185 	    UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
186 
187 	return error;
188 }
189 
190 
191 
192 int
vm_swapfile_io(vnode_t vp,uint64_t offset,uint64_t start,int npages,int flags,void * upl_iodone)193 vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
194 {
195 	int error = 0;
196 	upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
197 #if 1
198 	kern_return_t   kr = KERN_SUCCESS;
199 	upl_t           upl = NULL;
200 	unsigned int    count = 0;
201 	upl_control_flags_t upl_create_flags = 0;
202 	int             upl_control_flags = 0;
203 	upl_size_t      upl_size = 0;
204 
205 	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
206 
207 	if (upl_iodone == NULL) {
208 		upl_control_flags = UPL_IOSYNC;
209 	}
210 
211 #if ENCRYPTED_SWAP
212 	upl_control_flags |= UPL_PAGING_ENCRYPTED;
213 #endif
214 
215 	if ((flags & SWAP_READ) == FALSE) {
216 		upl_create_flags |= UPL_COPYOUT_FROM;
217 	}
218 
219 	upl_size = io_size;
220 	kr = vm_map_create_upl( kernel_map,
221 	    start,
222 	    &upl_size,
223 	    &upl,
224 	    NULL,
225 	    &count,
226 	    &upl_create_flags,
227 	    VM_KERN_MEMORY_OSFMK);
228 
229 	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
230 		panic("vm_map_create_upl failed with %d", kr);
231 	}
232 
233 	if (flags & SWAP_READ) {
234 		vnode_pagein(vp,
235 		    upl,
236 		    0,
237 		    offset,
238 		    io_size,
239 		    upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
240 		    &error);
241 		if (error) {
242 #if DEBUG
243 			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
244 #else /* DEBUG */
245 			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
246 #endif /* DEBUG */
247 		}
248 	} else {
249 		upl_set_iodone(upl, upl_iodone);
250 
251 		vnode_pageout(vp,
252 		    upl,
253 		    0,
254 		    offset,
255 		    io_size,
256 		    upl_control_flags,
257 		    &error);
258 		if (error) {
259 #if DEBUG
260 			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
261 #else /* DEBUG */
262 			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
263 #endif /* DEBUG */
264 		}
265 	}
266 
267 	return error;
268 
269 #else /* 1 */
270 	vfs_context_t ctx;
271 	ctx = vfs_context_kernel();
272 
273 	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
274 	    UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
275 
276 	if (error) {
277 		printf("vn_rdwr: Swap I/O failed with %d\n", error);
278 	}
279 	return error;
280 #endif /* 1 */
281 }
282 
283 
284 #define MAX_BATCH_TO_TRIM       256
285 
286 #define ROUTE_ONLY              0x10            /* if corestorage is present, tell it to just pass */
287                                                 /* the DKIOUNMAP command through w/o acting on it */
288                                                 /* this is used by the compressed swap system to reclaim empty space */
289 
290 
291 u_int32_t
vnode_trim_list(vnode_t vp,struct trim_list * tl,boolean_t route_only)292 vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
293 {
294 	int             error = 0;
295 	int             trim_index = 0;
296 	u_int32_t       blocksize = 0;
297 	struct vnode    *devvp;
298 	dk_extent_t     *extents;
299 	dk_unmap_t      unmap;
300 	_dk_cs_unmap_t  cs_unmap;
301 
302 	if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
303 		return ENOTSUP;
304 	}
305 
306 	if (tl == NULL) {
307 		return 0;
308 	}
309 
310 	/*
311 	 * Get the underlying device vnode and physical block size
312 	 */
313 	devvp = vp->v_mount->mnt_devvp;
314 	blocksize = vp->v_mount->mnt_devblocksize;
315 
316 	extents = kalloc_data(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
317 
318 	if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
319 		memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
320 		cs_unmap.extents = extents;
321 
322 		if (route_only == TRUE) {
323 			cs_unmap.options = ROUTE_ONLY;
324 		}
325 	} else {
326 		memset(&unmap, 0, sizeof(dk_unmap_t));
327 		unmap.extents = extents;
328 	}
329 
330 	while (tl) {
331 		daddr64_t       io_blockno;     /* Block number corresponding to the start of the extent */
332 		size_t          io_bytecount;   /* Number of bytes in current extent for the specified range */
333 		size_t          trimmed;
334 		size_t          remaining_length;
335 		off_t           current_offset;
336 
337 		current_offset = tl->tl_offset;
338 		remaining_length = tl->tl_length;
339 		trimmed = 0;
340 
341 		/*
342 		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
343 		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
344 		 * the whole range or if we encounter an error.
345 		 */
346 		while (trimmed < tl->tl_length) {
347 			/*
348 			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
349 			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is
350 			 * broken into multiple extents, it must be called multiple times, increasing the offset
351 			 * in each call to ensure that the entire range is covered.
352 			 */
353 			error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
354 			    &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
355 
356 			if (error) {
357 				goto trim_exit;
358 			}
359 			if (io_blockno != -1) {
360 				extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
361 				extents[trim_index].length = io_bytecount;
362 
363 				trim_index++;
364 			}
365 			if (trim_index == MAX_BATCH_TO_TRIM) {
366 				if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
367 					cs_unmap.extentsCount = trim_index;
368 					error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
369 				} else {
370 					unmap.extentsCount = trim_index;
371 					error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
372 				}
373 				if (error) {
374 					goto trim_exit;
375 				}
376 				trim_index = 0;
377 			}
378 			trimmed += io_bytecount;
379 			current_offset += io_bytecount;
380 			remaining_length -= io_bytecount;
381 		}
382 		tl = tl->tl_next;
383 	}
384 	if (trim_index) {
385 		if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
386 			cs_unmap.extentsCount = trim_index;
387 			error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
388 		} else {
389 			unmap.extentsCount = trim_index;
390 			error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
391 		}
392 	}
393 trim_exit:
394 	kfree_data(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
395 
396 	return error;
397 }
398 
399 #if CONFIG_FREEZE
400 int
vm_swap_vol_get_budget(vnode_t vp,uint64_t * freeze_daily_budget)401 vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
402 {
403 	vnode_t         devvp = NULL;
404 	vfs_context_t   ctx = vfs_context_kernel();
405 	errno_t         err = 0;
406 
407 	devvp = vp->v_mount->mnt_devvp;
408 
409 	err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
410 
411 	return err;
412 }
413 #endif /* CONFIG_FREEZE */
414