1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <stdint.h>
30 #include <sys/fcntl.h>
31 #include <sys/vnode_internal.h>
32 #include <sys/vnode.h>
33 #include <sys/kauth.h>
34 #include <sys/mount_internal.h>
35 #include <sys/buf_internal.h>
36 #include <kern/debug.h>
37 #include <kern/kalloc.h>
38 #include <sys/cprotect.h>
39 #include <sys/disk.h>
40 #include <vm/vm_protos_internal.h>
41 #include <vm/vm_pageout_xnu.h>
42 #include <sys/content_protection.h>
43 #include <vm/vm_ubc.h>
44 #include <vm/vm_compressor_backing_store_internal.h>
45
46 void
vm_swapfile_open(const char * path,vnode_t * vp)47 vm_swapfile_open(const char *path, vnode_t *vp)
48 {
49 int error = 0;
50 vfs_context_t ctx = vfs_context_kernel();
51
52 if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
53 printf("Failed to open swap file %d\n", error);
54 *vp = NULL;
55 return;
56 }
57
58 /*
59 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
60 * To avoid a race on the mount we only make this check after creating the
61 * vnode.
62 */
63 if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
64 vnode_put(*vp);
65 vm_swapfile_close((uint64_t)path, *vp);
66 *vp = NULL;
67 return;
68 }
69
70 vnode_put(*vp);
71 }
72
73 uint64_t
vm_swapfile_get_blksize(vnode_t vp)74 vm_swapfile_get_blksize(vnode_t vp)
75 {
76 return (uint64_t)vfs_devblocksize(vnode_mount(vp));
77 }
78
79 uint64_t
vm_swapfile_get_transfer_size(vnode_t vp)80 vm_swapfile_get_transfer_size(vnode_t vp)
81 {
82 return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
83 }
84
85 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
86
87 void
vm_swapfile_close(uint64_t path_addr,vnode_t vp)88 vm_swapfile_close(uint64_t path_addr, vnode_t vp)
89 {
90 vfs_context_t context = vfs_context_kernel();
91 int error;
92
93 vnode_getwithref(vp);
94 vnode_close(vp, 0, context);
95
96 error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
97 UIO_SYSSPACE, 0);
98
99 #if DEVELOPMENT || DEBUG
100 if (error) {
101 printf("%s : unlink of %s failed with error %d", __FUNCTION__,
102 (char *)path_addr, error);
103 }
104 #endif
105 }
106
107 int
vm_swapfile_preallocate(vnode_t vp,uint64_t * size,boolean_t * pin)108 vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
109 {
110 int error = 0;
111 uint64_t file_size = 0;
112 vfs_context_t ctx = NULL;
113 #if CONFIG_FREEZE
114 struct vnode_attr va;
115 #endif /* CONFIG_FREEZE */
116
117 ctx = vfs_context_kernel();
118
119 error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
120
121 if (error) {
122 printf("vnode_setsize for swap files failed: %d\n", error);
123 goto done;
124 }
125
126 error = vnode_size(vp, (off_t*) &file_size, ctx);
127
128 if (error) {
129 printf("vnode_size (new file) for swap file failed: %d\n", error);
130 goto done;
131 }
132 assert(file_size == *size);
133
134 if (pin != NULL && *pin != FALSE) {
135 error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
136
137 if (error) {
138 printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size);
139 /* this is not fatal, carry on with files wherever they landed */
140 *pin = FALSE;
141 error = 0;
142 }
143 }
144
145 vnode_lock_spin(vp);
146 SET(vp->v_flag, VSWAP);
147 vnode_unlock(vp);
148
149 #if CONFIG_FREEZE
150 VATTR_INIT(&va);
151 VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
152 error = VNOP_SETATTR(vp, &va, ctx);
153
154 if (error) {
155 printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
156 goto done;
157 }
158 #endif /* CONFIG_FREEZE */
159
160 done:
161 return error;
162 }
163
164
165 int
vm_record_file_write(vnode_t vp,uint64_t offset,char * buf,int size)166 vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
167 {
168 int error = 0;
169 vfs_context_t ctx;
170
171 ctx = vfs_context_kernel();
172
173 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
174 UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
175
176 return error;
177 }
178
179
180
181 int
vm_swapfile_io(vnode_t vp,uint64_t offset,uint64_t start,int npages,int flags,void * upl_iodone)182 vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
183 {
184 int error = 0;
185 upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
186 #if 1
187 kern_return_t kr = KERN_SUCCESS;
188 upl_t upl = NULL;
189 unsigned int count = 0;
190 upl_control_flags_t upl_create_flags = 0;
191 int upl_control_flags = 0;
192 upl_size_t upl_size = 0;
193
194 upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
195
196 if (upl_iodone == NULL) {
197 upl_control_flags = UPL_IOSYNC;
198 }
199
200 #if ENCRYPTED_SWAP
201 upl_control_flags |= UPL_PAGING_ENCRYPTED;
202 #endif
203
204 if ((flags & SWAP_READ) == FALSE) {
205 upl_create_flags |= UPL_COPYOUT_FROM;
206 }
207
208 upl_size = io_size;
209 kr = vm_map_create_upl( kernel_map,
210 start,
211 &upl_size,
212 &upl,
213 NULL,
214 &count,
215 &upl_create_flags,
216 VM_KERN_MEMORY_OSFMK);
217
218 if (kr != KERN_SUCCESS || (upl_size != io_size)) {
219 panic("vm_map_create_upl failed with %d", kr);
220 }
221
222 if (flags & SWAP_READ) {
223 vnode_pagein(vp,
224 upl,
225 0,
226 offset,
227 io_size,
228 upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
229 &error);
230 if (error) {
231 #if DEBUG
232 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
233 #else /* DEBUG */
234 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
235 #endif /* DEBUG */
236 }
237 } else {
238 upl_set_iodone(upl, upl_iodone);
239
240 vnode_pageout(vp,
241 upl,
242 0,
243 offset,
244 io_size,
245 upl_control_flags,
246 &error);
247 if (error) {
248 #if DEBUG
249 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
250 #else /* DEBUG */
251 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
252 #endif /* DEBUG */
253 }
254 }
255
256 return error;
257
258 #else /* 1 */
259 vfs_context_t ctx;
260 ctx = vfs_context_kernel();
261
262 error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
263 UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
264
265 if (error) {
266 printf("vn_rdwr: Swap I/O failed with %d\n", error);
267 }
268 return error;
269 #endif /* 1 */
270 }
271
272
273 #define MAX_BATCH_TO_TRIM 256
274
275 #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */
276 /* the DKIOUNMAP command through w/o acting on it */
277 /* this is used by the compressed swap system to reclaim empty space */
278
279
280 u_int32_t
vnode_trim_list(vnode_t vp,struct trim_list * tl,boolean_t route_only)281 vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
282 {
283 int error = 0;
284 int trim_index = 0;
285 u_int32_t blocksize = 0;
286 struct vnode *devvp;
287 dk_extent_t *extents;
288 dk_unmap_t unmap;
289 _dk_cs_unmap_t cs_unmap;
290
291 if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
292 return ENOTSUP;
293 }
294
295 if (tl == NULL) {
296 return 0;
297 }
298
299 /*
300 * Get the underlying device vnode and physical block size
301 */
302 devvp = vp->v_mount->mnt_devvp;
303 blocksize = vp->v_mount->mnt_devblocksize;
304
305 extents = kalloc_data(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
306
307 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
308 memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
309 cs_unmap.extents = extents;
310
311 if (route_only == TRUE) {
312 cs_unmap.options = ROUTE_ONLY;
313 }
314 } else {
315 memset(&unmap, 0, sizeof(dk_unmap_t));
316 unmap.extents = extents;
317 }
318
319 while (tl) {
320 daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
321 size_t io_bytecount; /* Number of bytes in current extent for the specified range */
322 size_t trimmed;
323 size_t remaining_length;
324 off_t current_offset;
325
326 current_offset = tl->tl_offset;
327 remaining_length = tl->tl_length;
328 trimmed = 0;
329
330 /*
331 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
332 * extent from the blockmap call. Keep looping/going until we are sure we've hit
333 * the whole range or if we encounter an error.
334 */
335 while (trimmed < tl->tl_length) {
336 /*
337 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
338 * specified offset. It returns blocks in contiguous chunks, so if the logical range is
339 * broken into multiple extents, it must be called multiple times, increasing the offset
340 * in each call to ensure that the entire range is covered.
341 */
342 error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
343 &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
344
345 if (error) {
346 goto trim_exit;
347 }
348 if (io_blockno != -1) {
349 extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
350 extents[trim_index].length = io_bytecount;
351
352 trim_index++;
353 }
354 if (trim_index == MAX_BATCH_TO_TRIM) {
355 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
356 cs_unmap.extentsCount = trim_index;
357 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
358 } else {
359 unmap.extentsCount = trim_index;
360 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
361 }
362 if (error) {
363 goto trim_exit;
364 }
365 trim_index = 0;
366 }
367 trimmed += io_bytecount;
368 current_offset += io_bytecount;
369 remaining_length -= io_bytecount;
370 }
371 tl = tl->tl_next;
372 }
373 if (trim_index) {
374 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
375 cs_unmap.extentsCount = trim_index;
376 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
377 } else {
378 unmap.extentsCount = trim_index;
379 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
380 }
381 }
382 trim_exit:
383 kfree_data(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
384
385 return error;
386 }
387
388 #if CONFIG_FREEZE
389 int
vm_swap_vol_get_budget(vnode_t vp,uint64_t * freeze_daily_budget)390 vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
391 {
392 vnode_t devvp = NULL;
393 vfs_context_t ctx = vfs_context_kernel();
394 errno_t err = 0;
395
396 err = vnode_getwithref(vp);
397 if (err == 0) {
398 if (vp->v_mount && vp->v_mount->mnt_devvp) {
399 devvp = vp->v_mount->mnt_devvp;
400 err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
401 } else {
402 err = ENODEV;
403 }
404 vnode_put(vp);
405 }
406
407 return err;
408 }
409 #endif /* CONFIG_FREEZE */
410
411 int
vm_swap_vol_get_capacity(const char * volume_name,uint64_t * capacity)412 vm_swap_vol_get_capacity(const char *volume_name, uint64_t *capacity)
413 {
414 vfs_context_t ctx = vfs_context_kernel();
415 vnode_t vp = NULL, devvp = NULL;
416 uint64_t block_size = 0;
417 uint64_t block_count = 0;
418 int error = 0;
419 *capacity = 0;
420
421 if ((error = vnode_open(volume_name, FREAD, 0, 0, &vp, ctx))) {
422 printf("Unable to open swap volume\n");
423 return error;
424 }
425
426 devvp = vp->v_mount->mnt_devvp;
427 if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&block_size, 0, ctx))) {
428 printf("Unable to get swap volume block size\n");
429 goto out;
430 }
431 if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&block_count, 0, ctx))) {
432 printf("Unable to get swap volume block count\n");
433 goto out;
434 }
435
436 *capacity = block_count * block_size;
437 out:
438 error = vnode_close(vp, 0, ctx);
439 return error;
440 }
441