1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <stdint.h>
30 #include <sys/fcntl.h>
31 #include <sys/vnode_internal.h>
32 #include <sys/vnode.h>
33 #include <sys/kauth.h>
34 #include <sys/mount_internal.h>
35 #include <sys/buf_internal.h>
36 #include <kern/debug.h>
37 #include <kern/kalloc.h>
38 #include <sys/cprotect.h>
39 #include <sys/disk.h>
40 #include <vm/vm_protos_internal.h>
41 #include <vm/vm_pageout_xnu.h>
42 #include <sys/content_protection.h>
43 #include <vm/vm_ubc.h>
44 #include <vm/vm_compressor_backing_store_internal.h>
45
46 void
vm_swapfile_open(const char * path,vnode_t * vp)47 vm_swapfile_open(const char *path, vnode_t *vp)
48 {
49 int error = 0;
50 vfs_context_t ctx = vfs_context_kernel();
51
52 error = vnode_open(
53 path,
54 (O_CREAT | O_TRUNC | O_NOFOLLOW_ANY | FREAD | FWRITE),
55 S_IRUSR | S_IWUSR,
56 0,
57 vp,
58 ctx);
59 if (error) {
60 printf("Failed to open swap file %d\n", error);
61 *vp = NULL;
62 return;
63 }
64
65 /*
66 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
67 * To avoid a race on the mount we only make this check after creating the
68 * vnode.
69 */
70 if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
71 vnode_put(*vp);
72 vm_swapfile_close((uint64_t)path, *vp);
73 *vp = NULL;
74 return;
75 }
76
77 vnode_put(*vp);
78 }
79
80 uint64_t
vm_swapfile_get_blksize(vnode_t vp)81 vm_swapfile_get_blksize(vnode_t vp)
82 {
83 return (uint64_t)vfs_devblocksize(vnode_mount(vp));
84 }
85
86 uint64_t
vm_swapfile_get_transfer_size(vnode_t vp)87 vm_swapfile_get_transfer_size(vnode_t vp)
88 {
89 return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
90 }
91
92 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
93
94 void
vm_swapfile_close(uint64_t path_addr,vnode_t vp)95 vm_swapfile_close(uint64_t path_addr, vnode_t vp)
96 {
97 vfs_context_t context = vfs_context_kernel();
98 int error;
99
100 vnode_getwithref(vp);
101 vnode_close(vp, 0, context);
102
103 error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
104 UIO_SYSSPACE, 0);
105
106 #if DEVELOPMENT || DEBUG
107 if (error) {
108 printf("%s : unlink of %s failed with error %d", __FUNCTION__,
109 (char *)path_addr, error);
110 }
111 #endif
112 }
113
114 int
vm_swapfile_preallocate(vnode_t vp,uint64_t * size,boolean_t * pin)115 vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
116 {
117 int error = 0;
118 uint64_t file_size = 0;
119 vfs_context_t ctx = NULL;
120 #if CONFIG_FREEZE
121 struct vnode_attr va;
122 #endif /* CONFIG_FREEZE */
123
124 ctx = vfs_context_kernel();
125
126 error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
127
128 if (error) {
129 printf("vnode_setsize for swap files failed: %d\n", error);
130 goto done;
131 }
132
133 error = vnode_size(vp, (off_t*) &file_size, ctx);
134
135 if (error) {
136 printf("vnode_size (new file) for swap file failed: %d\n", error);
137 goto done;
138 }
139 assert(file_size == *size);
140
141 if (pin != NULL && *pin != FALSE) {
142 error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
143
144 if (error) {
145 printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size);
146 /* this is not fatal, carry on with files wherever they landed */
147 *pin = FALSE;
148 error = 0;
149 }
150 }
151
152 vnode_lock_spin(vp);
153 SET(vp->v_flag, VSWAP);
154 vnode_unlock(vp);
155
156 #if CONFIG_FREEZE
157 VATTR_INIT(&va);
158 VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
159 error = VNOP_SETATTR(vp, &va, ctx);
160
161 if (error) {
162 printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
163 goto done;
164 }
165 #endif /* CONFIG_FREEZE */
166
167 done:
168 return error;
169 }
170
171
172 int
vm_record_file_write(vnode_t vp,uint64_t offset,char * buf,int size)173 vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
174 {
175 int error = 0;
176 vfs_context_t ctx;
177
178 ctx = vfs_context_kernel();
179
180 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
181 UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
182
183 return error;
184 }
185
186
187
188 int
vm_swapfile_io(vnode_t vp,uint64_t offset,uint64_t start,int npages,int flags,void * upl_iodone)189 vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
190 {
191 int error = 0;
192 upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
193 #if 1
194 kern_return_t kr = KERN_SUCCESS;
195 upl_t upl = NULL;
196 unsigned int count = 0;
197 upl_control_flags_t upl_create_flags = 0;
198 int upl_control_flags = 0;
199 upl_size_t upl_size = 0;
200
201 upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
202
203 if (upl_iodone == NULL) {
204 upl_control_flags = UPL_IOSYNC;
205 }
206
207 #if ENCRYPTED_SWAP
208 upl_control_flags |= UPL_PAGING_ENCRYPTED;
209 #endif
210
211 if ((flags & SWAP_READ) == FALSE) {
212 upl_create_flags |= UPL_COPYOUT_FROM;
213 }
214
215 upl_size = io_size;
216 kr = vm_map_create_upl( kernel_map,
217 start,
218 &upl_size,
219 &upl,
220 NULL,
221 &count,
222 &upl_create_flags,
223 VM_KERN_MEMORY_OSFMK);
224
225 if (kr != KERN_SUCCESS || (upl_size != io_size)) {
226 panic("vm_map_create_upl failed with %d", kr);
227 }
228
229 if (flags & SWAP_READ) {
230 vnode_pagein(vp,
231 upl,
232 0,
233 offset,
234 io_size,
235 upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
236 &error);
237 if (error) {
238 #if DEBUG
239 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
240 #else /* DEBUG */
241 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
242 #endif /* DEBUG */
243 }
244 } else {
245 upl_set_iodone(upl, upl_iodone);
246
247 vnode_pageout(vp,
248 upl,
249 0,
250 offset,
251 io_size,
252 upl_control_flags,
253 &error);
254 if (error) {
255 #if DEBUG
256 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
257 #else /* DEBUG */
258 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
259 #endif /* DEBUG */
260 }
261 }
262
263 return error;
264
265 #else /* 1 */
266 vfs_context_t ctx;
267 ctx = vfs_context_kernel();
268
269 error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
270 UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
271
272 if (error) {
273 printf("vn_rdwr: Swap I/O failed with %d\n", error);
274 }
275 return error;
276 #endif /* 1 */
277 }
278
279
280 #define MAX_BATCH_TO_TRIM 256
281
282 #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */
283 /* the DKIOUNMAP command through w/o acting on it */
284 /* this is used by the compressed swap system to reclaim empty space */
285
286
287 u_int32_t
vnode_trim_list(vnode_t vp,struct trim_list * tl,boolean_t route_only)288 vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
289 {
290 int error = 0;
291 int trim_index = 0;
292 u_int32_t blocksize = 0;
293 struct vnode *devvp;
294 dk_extent_t *extents;
295 dk_unmap_t unmap;
296 _dk_cs_unmap_t cs_unmap;
297
298 if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
299 return ENOTSUP;
300 }
301
302 if (tl == NULL) {
303 return 0;
304 }
305
306 /*
307 * Get the underlying device vnode and physical block size
308 */
309 devvp = vp->v_mount->mnt_devvp;
310 blocksize = vp->v_mount->mnt_devblocksize;
311
312 extents = kalloc_data(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
313
314 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
315 memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
316 cs_unmap.extents = extents;
317
318 if (route_only == TRUE) {
319 cs_unmap.options = ROUTE_ONLY;
320 }
321 } else {
322 memset(&unmap, 0, sizeof(dk_unmap_t));
323 unmap.extents = extents;
324 }
325
326 while (tl) {
327 daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
328 size_t io_bytecount; /* Number of bytes in current extent for the specified range */
329 size_t trimmed;
330 size_t remaining_length;
331 off_t current_offset;
332
333 current_offset = tl->tl_offset;
334 remaining_length = tl->tl_length;
335 trimmed = 0;
336
337 /*
338 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
339 * extent from the blockmap call. Keep looping/going until we are sure we've hit
340 * the whole range or if we encounter an error.
341 */
342 while (trimmed < tl->tl_length) {
343 /*
344 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
345 * specified offset. It returns blocks in contiguous chunks, so if the logical range is
346 * broken into multiple extents, it must be called multiple times, increasing the offset
347 * in each call to ensure that the entire range is covered.
348 */
349 error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
350 &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
351
352 if (error) {
353 goto trim_exit;
354 }
355 if (io_blockno != -1) {
356 extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
357 extents[trim_index].length = io_bytecount;
358
359 trim_index++;
360 }
361 if (trim_index == MAX_BATCH_TO_TRIM) {
362 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
363 cs_unmap.extentsCount = trim_index;
364 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
365 } else {
366 unmap.extentsCount = trim_index;
367 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
368 }
369 if (error) {
370 goto trim_exit;
371 }
372 trim_index = 0;
373 }
374 trimmed += io_bytecount;
375 current_offset += io_bytecount;
376 remaining_length -= io_bytecount;
377 }
378 tl = tl->tl_next;
379 }
380 if (trim_index) {
381 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
382 cs_unmap.extentsCount = trim_index;
383 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
384 } else {
385 unmap.extentsCount = trim_index;
386 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
387 }
388 }
389 trim_exit:
390 kfree_data(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
391
392 return error;
393 }
394
395 #if CONFIG_FREEZE
396 int
vm_swap_vol_get_budget(vnode_t vp,uint64_t * freeze_daily_budget)397 vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
398 {
399 vnode_t devvp = NULL;
400 vfs_context_t ctx = vfs_context_kernel();
401 errno_t err = 0;
402
403 err = vnode_getwithref(vp);
404 if (err == 0) {
405 if (vp->v_mount && vp->v_mount->mnt_devvp) {
406 devvp = vp->v_mount->mnt_devvp;
407 err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
408 } else {
409 err = ENODEV;
410 }
411 vnode_put(vp);
412 }
413
414 return err;
415 }
416 #endif /* CONFIG_FREEZE */
417
418 int
vm_swap_vol_get_capacity(const char * volume_name,uint64_t * capacity)419 vm_swap_vol_get_capacity(const char *volume_name, uint64_t *capacity)
420 {
421 vfs_context_t ctx = vfs_context_kernel();
422 vnode_t vp = NULL, devvp = NULL;
423 uint64_t block_size = 0;
424 uint64_t block_count = 0;
425 int error = 0;
426 *capacity = 0;
427
428 if ((error = vnode_open(volume_name, FREAD, 0, 0, &vp, ctx))) {
429 printf("Unable to open swap volume\n");
430 return error;
431 }
432
433 devvp = vp->v_mount->mnt_devvp;
434 if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&block_size, 0, ctx))) {
435 printf("Unable to get swap volume block size\n");
436 goto out;
437 }
438 if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&block_count, 0, ctx))) {
439 printf("Unable to get swap volume block count\n");
440 goto out;
441 }
442
443 *capacity = block_count * block_size;
444 out:
445 error = vnode_close(vp, 0, ctx);
446 return error;
447 }
448