1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*-
25 * Portions Copyright (c) 1992, 1993, 1995
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software donated to Berkeley by
29 * Jan-Simon Pendry.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
56 *
57 * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
58 * $FreeBSD$
59 */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/fcntl.h>
64 #include <sys/kernel.h>
65 #include <sys/lock.h>
66 #include <sys/malloc.h>
67 #include <sys/mount.h>
68 #include <sys/mount_internal.h>
69 #include <sys/namei.h>
70 #include <sys/proc.h>
71 #include <sys/vnode.h>
72 #include <sys/vnode_internal.h>
73 #include <security/mac_internal.h>
74
75 #include <sys/param.h>
76
77 #include <IOKit/IOBSD.h>
78
79 #include "bindfs.h"
80
81 #define BINDFS_ENTITLEMENT "com.apple.private.bindfs-allow"
82
83 static int
bindfs_vfs_getlowerattr(mount_t mp,struct vfs_attr * vfap,vfs_context_t ctx)84 bindfs_vfs_getlowerattr(mount_t mp, struct vfs_attr * vfap, vfs_context_t ctx)
85 {
86 memset(vfap, 0, sizeof(*vfap));
87 VFSATTR_INIT(vfap);
88 VFSATTR_WANTED(vfap, f_bsize);
89 VFSATTR_WANTED(vfap, f_iosize);
90 VFSATTR_WANTED(vfap, f_blocks);
91 VFSATTR_WANTED(vfap, f_bfree);
92 VFSATTR_WANTED(vfap, f_bavail);
93 VFSATTR_WANTED(vfap, f_bused);
94 VFSATTR_WANTED(vfap, f_files);
95 VFSATTR_WANTED(vfap, f_ffree);
96 VFSATTR_WANTED(vfap, f_capabilities);
97
98 return vfs_getattr(mp, vfap, ctx);
99 }
100
101 /*
102 * Mount bind layer
103 */
104 static int
bindfs_mount(struct mount * mp,__unused vnode_t devvp,user_addr_t user_data,vfs_context_t ctx)105 bindfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
106 {
107 int error = 0;
108 struct vnode *lowerrootvp = NULL, *vp = NULL;
109 struct vfsstatfs * sp = NULL;
110 struct bind_mount * xmp = NULL;
111 char data[MAXPATHLEN];
112 size_t count;
113 struct vfs_attr vfa;
114 /* set defaults (arbitrary since this file system is readonly) */
115 uint32_t bsize = BLKDEV_IOSIZE;
116 size_t iosize = BLKDEV_IOSIZE;
117 uint64_t blocks = 4711 * 4711;
118 uint64_t bfree = 0;
119 uint64_t bavail = 0;
120 uint64_t bused = 4711;
121 uint64_t files = 4711;
122 uint64_t ffree = 0;
123
124 kauth_cred_t cred = vfs_context_ucred(ctx);
125
126 BINDFSDEBUG("mp = %p %llx\n", (void *)mp, vfs_flags(mp));
127
128 if (vfs_flags(mp) & MNT_ROOTFS) {
129 return EOPNOTSUPP;
130 }
131
132 /*
133 * Update is a no-op
134 */
135 if (vfs_isupdate(mp)) {
136 return ENOTSUP;
137 }
138
139 /* check entitlement */
140 if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT)) {
141 return EPERM;
142 }
143
144 /*
145 * Get argument
146 */
147 error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
148 if (error) {
149 BINDFSERROR("error copying data from user %d\n", error);
150 goto error;
151 }
152
153 /* This could happen if the system is configured for 32 bit inodes instead of
154 * 64 bit */
155 if (count > sizeof(vfs_statfs(mp)->f_mntfromname)) {
156 error = EINVAL;
157 BINDFSERROR("path to mount too large for this system %zu vs %lu\n", count, sizeof(vfs_statfs(mp)->f_mntfromname));
158 goto error;
159 }
160
161 error = vnode_lookup(data, 0, &lowerrootvp, ctx);
162 if (error) {
163 BINDFSERROR("lookup of %s failed error: %d\n", data, error);
164 goto error;
165 }
166
167 /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
168 * Keep this to signal what we want to keep around the thing we are mirroring.
169 * Drop it in unmount.*/
170 error = vnode_ref(lowerrootvp);
171 vnode_put(lowerrootvp);
172 if (error) {
173 // If vnode_ref failed, then bind it out so it can't be used anymore in cleanup.
174 lowerrootvp = NULL;
175 goto error;
176 }
177
178 BINDFSDEBUG("mount %s\n", data);
179
180 xmp = kalloc_type(struct bind_mount, Z_WAITOK | Z_ZERO | Z_NOFAIL);
181
182 /*
183 * Save reference to underlying FS
184 */
185 xmp->bindm_lowerrootvp = lowerrootvp;
186 xmp->bindm_lowerrootvid = vnode_vid(lowerrootvp);
187
188 error = bind_nodeget(mp, lowerrootvp, NULL, &vp, NULL, 1);
189 if (error) {
190 goto error;
191 }
192 /* After bind_nodeget our root vnode is in the hash table and we have to usecounts on lowerrootvp
193 * One use count will get dropped when we reclaim the root during unmount.
194 * The other will get dropped in unmount */
195
196
197 /* vp has an iocount on it from vnode_create. drop that for a usecount. This
198 * is our root vnode so we drop the ref in unmount
199 *
200 * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
201 vnode_ref(vp);
202 vnode_put(vp);
203
204 xmp->bindm_rootvp = vp;
205
206 /* read the flags the user set, but then ignore some of them, we will only
207 * allow them if they are set on the lower file system */
208 uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
209 uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);
210
211 if (lowerflags) {
212 flags |= lowerflags;
213 }
214
215 /* force these flags */
216 flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
217 vfs_setflags(mp, flags);
218
219 vfs_setfsprivate(mp, xmp);
220 vfs_getnewfsid(mp);
221 vfs_setlocklocal(mp);
222
223 /* fill in the stat block */
224 sp = vfs_statfs(mp);
225 strlcpy(sp->f_mntfromname, data, sizeof(sp->f_mntfromname));
226
227 sp->f_flags = flags;
228
229 xmp->bindm_flags = BINDM_CASEINSENSITIVE; /* default to case insensitive */
230
231 error = bindfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
232 if (error == 0) {
233 if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
234 bsize = vfa.f_bsize;
235 }
236 if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
237 iosize = vfa.f_iosize;
238 }
239 if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
240 blocks = vfa.f_blocks;
241 }
242 if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
243 bfree = vfa.f_bfree;
244 }
245 if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
246 bavail = vfa.f_bavail;
247 }
248 if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
249 bused = vfa.f_bused;
250 }
251 if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
252 files = vfa.f_files;
253 }
254 if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
255 ffree = vfa.f_ffree;
256 }
257 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
258 if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
259 (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
260 xmp->bindm_flags &= ~BINDM_CASEINSENSITIVE;
261 }
262 }
263 } else {
264 goto error;
265 }
266
267 sp->f_bsize = bsize;
268 sp->f_iosize = iosize;
269 sp->f_blocks = blocks;
270 sp->f_bfree = bfree;
271 sp->f_bavail = bavail;
272 sp->f_bused = bused;
273 sp->f_files = files;
274 sp->f_ffree = ffree;
275
276 /* Associate the mac label information from the mirrored filesystem with the
277 * mirror */
278 MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));
279
280 BINDFSDEBUG("lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
281 return 0;
282
283 error:
284 if (xmp) {
285 kfree_type(struct bind_mount, xmp);
286 }
287 if (lowerrootvp) {
288 vnode_getwithref(lowerrootvp);
289 vnode_rele(lowerrootvp);
290 vnode_put(lowerrootvp);
291 }
292 if (vp) {
293 /* we made the root vnode but the mount is failed, so clean it up */
294 vnode_getwithref(vp);
295 vnode_rele(vp);
296 /* give vp back */
297 vnode_recycle(vp);
298 vnode_put(vp);
299 }
300 return error;
301 }
302
303 /*
304 * Free reference to bind layer
305 */
306 static int
bindfs_unmount(struct mount * mp,int mntflags,__unused vfs_context_t ctx)307 bindfs_unmount(struct mount * mp, int mntflags, __unused vfs_context_t ctx)
308 {
309 struct bind_mount * mntdata;
310 struct vnode * vp;
311 int error, flags;
312
313 BINDFSDEBUG("mp = %p\n", (void *)mp);
314
315 /* check entitlement or superuser*/
316 if (!IOCurrentTaskHasEntitlement(BINDFS_ENTITLEMENT) &&
317 vfs_context_suser(ctx) != 0) {
318 return EPERM;
319 }
320
321 if (mntflags & MNT_FORCE) {
322 flags = FORCECLOSE;
323 } else {
324 flags = 0;
325 }
326
327 mntdata = MOUNTTOBINDMOUNT(mp);
328 vp = mntdata->bindm_rootvp;
329
330 // release our reference on the root before flushing.
331 // it will get pulled out of the mount structure by reclaim
332 vnode_getalways(vp);
333
334 error = vflush(mp, vp, flags);
335 if (error) {
336 vnode_put(vp);
337 return error;
338 }
339
340 if (vnode_isinuse(vp, 1) && flags == 0) {
341 vnode_put(vp);
342 return EBUSY;
343 }
344
345 vnode_rele(vp); // Drop reference taken by bindfs_mount
346 vnode_put(vp); // Drop ref taken above
347
348 //Force close to get rid of the last vnode
349 (void)vflush(mp, NULL, FORCECLOSE);
350
351 /* no more vnodes, so tear down the mountpoint */
352
353 vfs_setfsprivate(mp, NULL);
354
355 vnode_getalways(mntdata->bindm_lowerrootvp);
356 vnode_rele(mntdata->bindm_lowerrootvp);
357 vnode_put(mntdata->bindm_lowerrootvp);
358
359 kfree_type(struct bind_mount, mntdata);
360
361 uint64_t vflags = vfs_flags(mp);
362 vfs_setflags(mp, vflags & ~MNT_LOCAL);
363
364 return 0;
365 }
366
367 static int
bindfs_root(struct mount * mp,struct vnode ** vpp,__unused vfs_context_t ctx)368 bindfs_root(struct mount * mp, struct vnode ** vpp, __unused vfs_context_t ctx)
369 {
370 struct vnode * vp;
371 int error;
372
373 BINDFSDEBUG("mp = %p, vp = %p\n", (void *)mp, (void *)MOUNTTOBINDMOUNT(mp)->bindm_rootvp);
374
375 /*
376 * Return locked reference to root.
377 */
378 vp = MOUNTTOBINDMOUNT(mp)->bindm_rootvp;
379
380 error = vnode_get(vp);
381 if (error) {
382 return error;
383 }
384
385 *vpp = vp;
386 return 0;
387 }
388
389 static int
bindfs_vfs_getattr(struct mount * mp,struct vfs_attr * vfap,vfs_context_t ctx)390 bindfs_vfs_getattr(struct mount * mp, struct vfs_attr * vfap, vfs_context_t ctx)
391 {
392 struct vnode * coveredvp = NULL;
393 struct vfs_attr vfa;
394 struct bind_mount * bind_mp = MOUNTTOBINDMOUNT(mp);
395 vol_capabilities_attr_t capabilities;
396 struct vfsstatfs * sp = vfs_statfs(mp);
397
398 struct timespec tzero = {.tv_sec = 0, .tv_nsec = 0};
399
400 BINDFSDEBUG("\n");
401
402 /* Set default capabilities in case the lower file system is gone */
403 memset(&capabilities, 0, sizeof(capabilities));
404 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
405 capabilities.valid[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
406
407 if (bindfs_vfs_getlowerattr(vnode_mount(bind_mp->bindm_lowerrootvp), &vfa, ctx) == 0) {
408 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
409 memcpy(&capabilities, &vfa.f_capabilities, sizeof(capabilities));
410 /* don't support vget */
411 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
412
413 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
414
415 capabilities.valid[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
416
417 capabilities.valid[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
418
419 /* dont' support interfaces that only make sense on a writable file system
420 * or one with specific vnops implemented */
421 capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = 0;
422
423 capabilities.valid[VOL_CAPABILITIES_INTERFACES] &=
424 ~(VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_READDIRATTR | VOL_CAP_INT_EXCHANGEDATA |
425 VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK);
426 }
427 }
428
429 if (VFSATTR_IS_ACTIVE(vfap, f_create_time)) {
430 VFSATTR_RETURN(vfap, f_create_time, tzero);
431 }
432
433 if (VFSATTR_IS_ACTIVE(vfap, f_modify_time)) {
434 VFSATTR_RETURN(vfap, f_modify_time, tzero);
435 }
436
437 if (VFSATTR_IS_ACTIVE(vfap, f_access_time)) {
438 VFSATTR_RETURN(vfap, f_access_time, tzero);
439 }
440
441 if (VFSATTR_IS_ACTIVE(vfap, f_bsize)) {
442 VFSATTR_RETURN(vfap, f_bsize, sp->f_bsize);
443 }
444
445 if (VFSATTR_IS_ACTIVE(vfap, f_iosize)) {
446 VFSATTR_RETURN(vfap, f_iosize, sp->f_iosize);
447 }
448
449 if (VFSATTR_IS_ACTIVE(vfap, f_owner)) {
450 VFSATTR_RETURN(vfap, f_owner, 0);
451 }
452
453 if (VFSATTR_IS_ACTIVE(vfap, f_blocks)) {
454 VFSATTR_RETURN(vfap, f_blocks, sp->f_blocks);
455 }
456
457 if (VFSATTR_IS_ACTIVE(vfap, f_bfree)) {
458 VFSATTR_RETURN(vfap, f_bfree, sp->f_bfree);
459 }
460
461 if (VFSATTR_IS_ACTIVE(vfap, f_bavail)) {
462 VFSATTR_RETURN(vfap, f_bavail, sp->f_bavail);
463 }
464
465 if (VFSATTR_IS_ACTIVE(vfap, f_bused)) {
466 VFSATTR_RETURN(vfap, f_bused, sp->f_bused);
467 }
468
469 if (VFSATTR_IS_ACTIVE(vfap, f_files)) {
470 VFSATTR_RETURN(vfap, f_files, sp->f_files);
471 }
472
473 if (VFSATTR_IS_ACTIVE(vfap, f_ffree)) {
474 VFSATTR_RETURN(vfap, f_ffree, sp->f_ffree);
475 }
476
477 if (VFSATTR_IS_ACTIVE(vfap, f_fssubtype)) {
478 VFSATTR_RETURN(vfap, f_fssubtype, 0);
479 }
480
481 if (VFSATTR_IS_ACTIVE(vfap, f_capabilities)) {
482 memcpy(&vfap->f_capabilities, &capabilities, sizeof(vol_capabilities_attr_t));
483
484 VFSATTR_SET_SUPPORTED(vfap, f_capabilities);
485 }
486
487 if (VFSATTR_IS_ACTIVE(vfap, f_attributes)) {
488 vol_attributes_attr_t * volattr = &vfap->f_attributes;
489
490 volattr->validattr.commonattr = 0;
491 volattr->validattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
492 volattr->validattr.dirattr = 0;
493 volattr->validattr.fileattr = 0;
494 volattr->validattr.forkattr = 0;
495
496 volattr->nativeattr.commonattr = 0;
497 volattr->nativeattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
498 volattr->nativeattr.dirattr = 0;
499 volattr->nativeattr.fileattr = 0;
500 volattr->nativeattr.forkattr = 0;
501
502 VFSATTR_SET_SUPPORTED(vfap, f_attributes);
503 }
504
505 if (VFSATTR_IS_ACTIVE(vfap, f_vol_name)) {
506 /* The name of the volume is the same as the directory we mounted on */
507 coveredvp = vfs_vnodecovered(mp);
508 if (coveredvp) {
509 const char * name = vnode_getname_printable(coveredvp);
510 strlcpy(vfap->f_vol_name, name, MAXPATHLEN);
511 vnode_putname_printable(name);
512
513 VFSATTR_SET_SUPPORTED(vfap, f_vol_name);
514 vnode_put(coveredvp);
515 }
516 }
517
518 return 0;
519 }
520
521 static int
bindfs_sync(__unused struct mount * mp,__unused int waitfor,__unused vfs_context_t ctx)522 bindfs_sync(__unused struct mount * mp, __unused int waitfor, __unused vfs_context_t ctx)
523 {
524 return 0;
525 }
526
527
528
529 static int
bindfs_vfs_start(__unused struct mount * mp,__unused int flags,__unused vfs_context_t ctx)530 bindfs_vfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t ctx)
531 {
532 BINDFSDEBUG("\n");
533 return 0;
534 }
535
536 extern const struct vnodeopv_desc bindfs_vnodeop_opv_desc;
537
538 const struct vnodeopv_desc * bindfs_vnodeopv_descs[] = {
539 &bindfs_vnodeop_opv_desc,
540 };
541
542 struct vfsops bindfs_vfsops = {
543 .vfs_mount = bindfs_mount,
544 .vfs_unmount = bindfs_unmount,
545 .vfs_start = bindfs_vfs_start,
546 .vfs_root = bindfs_root,
547 .vfs_getattr = bindfs_vfs_getattr,
548 .vfs_sync = bindfs_sync,
549 .vfs_init = bindfs_init,
550 .vfs_sysctl = NULL,
551 .vfs_setattr = NULL,
552 };
553