1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <string.h>
30 #include <sys/fcntl.h>
31 #include <sys/fsctl.h>
32 #include <sys/namei.h>
33 #include <sys/stat.h>
34 #include <sys/vnode.h>
35 #include <sys/vnode_internal.h>
36 #include <sys/uio_internal.h>
37 #include <sys/fsevents.h>
38 #include <kern/kalloc.h>
39 #include <vfs/vfs_exclave_fs.h>
40 #include <miscfs/devfs/devfs.h>
41 #include <pexpert/pexpert.h>
42
43 __private_extern__ int unlink1(vfs_context_t, vnode_t, user_addr_t,
44 enum uio_seg, int);
45
46 struct open_vnode {
47 LIST_ENTRY(open_vnode) chain;
48 vnode_t vp;
49 dev_t dev;
50 uint64_t file_id;
51 uint32_t open_count;
52 };
53
54 #define ROOT_DIR_INO_NUM 2
55
56 #define VFS_EXCLAVE_FS_BASE_DIR_GRAFT 1
57
58 typedef struct {
59 uint32_t flags;
60 vnode_t vp;
61 dev_t dev;
62 fsioc_graft_info_t graft_info;
63 } base_dir_t;
64
65 /* hash table that maps from file_id to a vnode and its open count */
66 typedef LIST_HEAD(open_vnode_head, open_vnode) open_vnodes_list_head_t;
67 static open_vnodes_list_head_t *open_vnodes_hashtbl = NULL;
68 static u_long open_vnodes_hashmask = 0;
69 static int open_vnodes_hashsize = 0;
70 static uint32_t num_open_vnodes = 0;
71
72 /* registered base directories */
73 static base_dir_t base_dirs[EFT_FS_NUM_TAGS] = {0};
74 static uint32_t num_base_dirs = 0;
75
76 static LCK_GRP_DECLARE(vfs_exclave_lck_grp, "vfs_exclave");
77
78 /* protects base_dirs */
79 static lck_mtx_t base_dirs_mtx;
80
81 /* protects open vnodes hash table */
82 static lck_mtx_t open_vnodes_mtx;
83
84 #define HASHFUNC(dev, file_id) (((dev) + (file_id)) & open_vnodes_hashmask)
85 #define OPEN_VNODES_HASH(dev, file_id) (&open_vnodes_hashtbl[HASHFUNC(dev, file_id)])
86
87 static bool integrity_checks_enabled = false;
88 #define EXCLAVE_INTEGRITY_CHECKS_ENABLED_BOOTARG "enable_integrity_checks"
89
90 static int exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id, const char *path,
91 int flags, uint64_t *file_id);
92
93 /*
94 * Get the fsid and fileid attributes of the given vnode.
95 */
96 static int
get_vnode_info(vnode_t vp,dev_t * dev,fsid_t * fsid,uint64_t * file_id)97 get_vnode_info(vnode_t vp, dev_t *dev, fsid_t *fsid, uint64_t *file_id)
98 {
99 struct vnode_attr va;
100 int error;
101
102 memset(&va, 0, sizeof(va));
103 VATTR_INIT(&va);
104 if (dev) {
105 VATTR_WANTED(&va, va_fsid);
106 }
107 if (fsid) {
108 VATTR_WANTED(&va, va_fsid64);
109 }
110 if (file_id) {
111 VATTR_WANTED(&va, va_fileid);
112 }
113
114 error = vnode_getattr(vp, &va, vfs_context_kernel());
115 if (error) {
116 return error;
117 }
118
119 if (dev) {
120 if (!VATTR_IS_SUPPORTED(&va, va_fsid)) {
121 return ENOTSUP;
122 }
123 *dev = va.va_fsid;
124 }
125
126 if (fsid) {
127 if (!VATTR_IS_SUPPORTED(&va, va_fsid64)) {
128 return ENOTSUP;
129 }
130 *fsid = va.va_fsid64;
131 }
132
133 if (file_id) {
134 if (!VATTR_IS_SUPPORTED(&va, va_fileid)) {
135 return ENOTSUP;
136 }
137 *file_id = va.va_fileid;
138 }
139
140 return 0;
141 }
142
143 static inline bool
is_graft(base_dir_t * base_dir)144 is_graft(base_dir_t *base_dir)
145 {
146 return base_dir->flags & VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
147 }
148
149 static int
graft_to_host_inum(fsioc_graft_info_t * gi,uint64_t graft_inum,uint64_t * host_inum)150 graft_to_host_inum(fsioc_graft_info_t *gi, uint64_t graft_inum, uint64_t *host_inum)
151 {
152 if (graft_inum == ROOT_DIR_INO_NUM) {
153 *host_inum = gi->gi_graft_dir;
154 } else if (graft_inum < gi->gi_inum_len) {
155 *host_inum = gi->gi_inum_base + graft_inum;
156 } else {
157 return ERANGE;
158 }
159
160 return 0;
161 }
162
163 static int
host_to_graft_inum(fsioc_graft_info_t * gi,uint64_t host_inum,uint64_t * graft_inum)164 host_to_graft_inum(fsioc_graft_info_t *gi, uint64_t host_inum, uint64_t *graft_inum)
165 {
166 if (host_inum == gi->gi_graft_dir) {
167 *graft_inum = ROOT_DIR_INO_NUM;
168 } else if ((host_inum >= gi->gi_inum_base) && (host_inum < gi->gi_inum_base + gi->gi_inum_len)) {
169 *graft_inum = host_inum - gi->gi_inum_base;
170 } else {
171 return ERANGE;
172 }
173
174 return 0;
175 }
176
177 /*
178 * Check if a vnode is in an APFS graft and if so obtain information about the graft.
179 */
180 static int
get_graft_info(vnode_t vp,bool * is_graft,fsioc_graft_info_t * graft_info)181 get_graft_info(vnode_t vp, bool *is_graft, fsioc_graft_info_t *graft_info)
182 {
183 fsioc_get_graft_info_t ggi = {0};
184 uint16_t alloc_count;
185 fsioc_graft_info_t *graft_infos = NULL;
186 int error = 0;
187
188 *is_graft = false;
189
190 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, (caddr_t)&ggi, 0, vfs_context_kernel());
191 if (error) {
192 return error;
193 }
194
195 if (!ggi.ggi_is_in_graft) {
196 return 0;
197 }
198
199 if (ggi.ggi_count == 0) {
200 return EINVAL;
201 }
202
203 alloc_count = ggi.ggi_count;
204
205 graft_infos = kalloc_type(fsioc_graft_info_t, alloc_count, Z_WAITOK | Z_ZERO);
206 if (!graft_infos) {
207 return ENOMEM;
208 }
209
210 memset(&ggi, 0, sizeof(ggi));
211 ggi.ggi_count = alloc_count;
212 ggi.ggi_buffer = (user64_addr_t)graft_infos;
213
214 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, (caddr_t)&ggi, 0, vfs_context_kernel());
215 if (error) {
216 goto out;
217 }
218
219 if (!ggi.ggi_is_in_graft) {
220 error = EAGAIN;
221 goto out;
222 }
223
224 if (ggi.ggi_graft_index >= alloc_count) {
225 error = ERANGE;
226 goto out;
227 }
228
229 *graft_info = graft_infos[ggi.ggi_graft_index];
230 *is_graft = true;
231
232 out:
233 if (graft_infos) {
234 kfree_type(fsioc_graft_info_t, alloc_count, graft_infos);
235 }
236
237 return error;
238 }
239
240 /*
241 * Set a base directory for the given fs tag.
242 */
243 static int
set_base_dir(uint32_t fs_tag,vnode_t vp,fsioc_graft_info_t * graft_info)244 set_base_dir(uint32_t fs_tag, vnode_t vp, fsioc_graft_info_t *graft_info)
245 {
246 dev_t dev;
247 base_dir_t *base_dir;
248 int error = 0;
249
250 if (fs_tag >= EFT_FS_NUM_TAGS) {
251 return EINVAL;
252 }
253
254 lck_mtx_lock(&base_dirs_mtx);
255
256 if (base_dirs[fs_tag].vp) {
257 error = EBUSY;
258 goto out;
259 }
260
261 error = get_vnode_info(vp, &dev, NULL, NULL);
262 if (error) {
263 goto out;
264 }
265
266 /*
267 * make sure that EFT_EXCLAVE does not share a dev_t with another fs,
268 * since EFT_EXCLAVE vnodes are opened RW whereas other fs vnodes
269 * are opened RO
270 */
271 if (fs_tag == EFT_EXCLAVE) {
272 int i;
273 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
274 if (!base_dirs[i].vp) {
275 continue;
276 }
277 if (base_dirs[i].dev == dev) {
278 error = EBUSY;
279 goto out;
280 }
281 }
282 } else if (base_dirs[EFT_EXCLAVE].vp && (base_dirs[EFT_EXCLAVE].dev == dev)) {
283 error = EBUSY;
284 goto out;
285 }
286
287 base_dir = &base_dirs[fs_tag];
288
289 if (graft_info) {
290 base_dir->flags |= VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
291 base_dir->graft_info = *graft_info;
292 }
293
294 base_dir->vp = vp;
295 base_dir->dev = dev;
296
297 num_base_dirs++;
298
299 out:
300 lck_mtx_unlock(&base_dirs_mtx);
301 return error;
302 }
303
304 /*
305 * Get the base directory entry for the given fs tag. If vpp is passed, return
306 * with an iocount taken on the vnode.
307 */
308 static int
get_base_dir(uint32_t fs_tag,base_dir_t * base_dir,vnode_t * vpp)309 get_base_dir(uint32_t fs_tag, base_dir_t *base_dir, vnode_t *vpp)
310 {
311 vnode_t base_vp;
312 int error = 0;
313
314 if (!base_dir && !vpp) {
315 return EINVAL;
316 }
317
318 if (fs_tag >= EFT_FS_NUM_TAGS) {
319 return EINVAL;
320 }
321
322 lck_mtx_lock(&base_dirs_mtx);
323
324 base_vp = base_dirs[fs_tag].vp;
325
326 if (base_vp == NULLVP) {
327 error = ENOENT;
328 goto out;
329 }
330
331 if (vpp) {
332 error = vnode_getwithref(base_vp);
333 if (error) {
334 goto out;
335 }
336 *vpp = base_vp;
337 }
338
339 if (base_dir) {
340 *base_dir = base_dirs[fs_tag];
341 }
342
343 out:
344 lck_mtx_unlock(&base_dirs_mtx);
345 return error;
346 }
347
348 int
vfs_exclave_fs_start(void)349 vfs_exclave_fs_start(void)
350 {
351 uint32_t bootarg_val;
352
353 lck_mtx_init(&base_dirs_mtx, &vfs_exclave_lck_grp, LCK_ATTR_NULL);
354 lck_mtx_init(&open_vnodes_mtx, &vfs_exclave_lck_grp, LCK_ATTR_NULL);
355
356 assert(open_vnodes_hashtbl == NULL);
357
358 open_vnodes_hashsize = desiredvnodes / 16;
359 open_vnodes_hashtbl = hashinit(open_vnodes_hashsize, M_VNODE, &open_vnodes_hashmask);
360 if (open_vnodes_hashtbl == NULL) {
361 open_vnodes_hashsize = open_vnodes_hashmask = 0;
362 return ENOMEM;
363 }
364
365 if (PE_parse_boot_argn(EXCLAVE_INTEGRITY_CHECKS_ENABLED_BOOTARG, &bootarg_val, sizeof(bootarg_val))) {
366 if (bootarg_val) {
367 integrity_checks_enabled = true;
368 }
369 }
370
371 return 0;
372 }
373
374 static bool
exclave_fs_started(void)375 exclave_fs_started(void)
376 {
377 return open_vnodes_hashtbl != NULL;
378 }
379
380 void
vfs_exclave_fs_stop(void)381 vfs_exclave_fs_stop(void)
382 {
383 int i;
384
385 if (!exclave_fs_started()) {
386 return;
387 }
388
389 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
390 vfs_exclave_fs_unregister_tag(i);
391 }
392
393 assert(num_open_vnodes == 0);
394 assert(open_vnodes_hashtbl);
395
396 hashdestroy(open_vnodes_hashtbl, M_VNODE, open_vnodes_hashmask);
397 open_vnodes_hashtbl = NULL;
398 open_vnodes_hashmask = open_vnodes_hashsize = 0;
399
400 lck_mtx_destroy(&base_dirs_mtx, &vfs_exclave_lck_grp);
401 lck_mtx_destroy(&open_vnodes_mtx, &vfs_exclave_lck_grp);
402
403 integrity_checks_enabled = false;
404 }
405
406 static bool
is_fs_writeable(uint32_t fs_tag)407 is_fs_writeable(uint32_t fs_tag)
408 {
409 return fs_tag == EFT_EXCLAVE;
410 }
411
412 int
vfs_exclave_fs_register(uint32_t fs_tag,vnode_t vp)413 vfs_exclave_fs_register(uint32_t fs_tag, vnode_t vp)
414 {
415 char vfs_name[MFSNAMELEN];
416 bool is_graft;
417 fsioc_graft_info_t graft_info;
418 int error;
419
420 if (!exclave_fs_started()) {
421 return ENXIO;
422 }
423
424 if (fs_tag >= EFT_FS_NUM_TAGS) {
425 return EINVAL;
426 }
427
428 vnode_vfsname(vp, vfs_name);
429 if (strcmp(vfs_name, "apfs")) {
430 return ENOTSUP;
431 }
432
433 if (!vnode_isdir(vp)) {
434 return ENOTDIR;
435 }
436
437 error = get_graft_info(vp, &is_graft, &graft_info);
438 if (error) {
439 return error;
440 }
441
442 if (is_graft && is_fs_writeable(fs_tag)) {
443 return EROFS;
444 }
445
446 error = vnode_ref(vp);
447 if (error) {
448 return error;
449 }
450
451 error = set_base_dir(fs_tag, vp, is_graft ? &graft_info : NULL);
452 if (error) {
453 vnode_rele(vp);
454 return error;
455 }
456
457 return 0;
458 }
459
460 int
vfs_exclave_fs_register_path(uint32_t fs_tag,const char * base_path)461 vfs_exclave_fs_register_path(uint32_t fs_tag, const char *base_path)
462 {
463 struct nameidata nd;
464 int error;
465
466 if (!exclave_fs_started()) {
467 return ENXIO;
468 }
469
470 if (fs_tag >= EFT_FS_NUM_TAGS) {
471 return EINVAL;
472 }
473
474 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_SYSSPACE,
475 CAST_USER_ADDR_T(base_path), vfs_context_kernel());
476
477 error = namei(&nd);
478 if (error) {
479 return error;
480 }
481
482 error = vfs_exclave_fs_register(fs_tag, nd.ni_vp);
483
484 vnode_put(nd.ni_vp);
485 nameidone(&nd);
486
487 return error;
488 }
489
490 /*
491 * Release open vnodes for the given fs_tag.
492 * base_dirs_mtx and open_vnodes_mtx must be locked by caller.
493 */
494 static void
release_open_vnodes(uint32_t fs_tag)495 release_open_vnodes(uint32_t fs_tag)
496 {
497 dev_t dev;
498 int i;
499
500 if (num_open_vnodes == 0) {
501 return;
502 }
503
504 dev = base_dirs[fs_tag].dev;
505
506 if (num_base_dirs > 1) {
507 /* skip release if another base dir has the same device */
508 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
509 if ((i != fs_tag) && base_dirs[i].vp
510 && (base_dirs[i].dev == dev)) {
511 return;
512 }
513 }
514 }
515
516 for (i = 0; i < open_vnodes_hashmask + 1; i++) {
517 struct open_vnode *entry, *temp_entry;
518
519 LIST_FOREACH_SAFE(entry, &open_vnodes_hashtbl[i], chain, temp_entry) {
520 if (entry->dev != dev) {
521 continue;
522 }
523 while (entry->open_count) {
524 vnode_rele(entry->vp);
525 entry->open_count--;
526 }
527 LIST_REMOVE(entry, chain);
528 kfree_type(struct open_vnode, entry);
529 num_open_vnodes--;
530 }
531 }
532 }
533
534 static int
vfs_exclave_fs_unregister_internal(uint32_t fs_tag,vnode_t vp)535 vfs_exclave_fs_unregister_internal(uint32_t fs_tag, vnode_t vp)
536 {
537 int error = 0;
538
539 if (!exclave_fs_started()) {
540 return ENXIO;
541 }
542
543 if (fs_tag >= EFT_FS_NUM_TAGS) {
544 return EINVAL;
545 }
546
547 lck_mtx_lock(&base_dirs_mtx);
548
549 if (vp) {
550 for (fs_tag = 0; fs_tag < EFT_FS_NUM_TAGS; fs_tag++) {
551 if (base_dirs[fs_tag].vp == vp) {
552 break;
553 }
554 }
555 } else {
556 vp = base_dirs[fs_tag].vp;
557 }
558
559 if (!vp || (fs_tag == EFT_FS_NUM_TAGS)) {
560 lck_mtx_unlock(&base_dirs_mtx);
561 return ENOENT;
562 }
563
564 lck_mtx_lock(&open_vnodes_mtx);
565
566 release_open_vnodes(fs_tag);
567
568 vnode_rele(vp);
569 base_dirs[fs_tag].vp = NULL;
570 base_dirs[fs_tag].dev = 0;
571 memset(&base_dirs[fs_tag], 0, sizeof(base_dirs[fs_tag]));
572 num_base_dirs--;
573
574 lck_mtx_unlock(&base_dirs_mtx);
575 lck_mtx_unlock(&open_vnodes_mtx);
576 return error;
577 }
578
579 int
vfs_exclave_fs_unregister(vnode_t vp)580 vfs_exclave_fs_unregister(vnode_t vp)
581 {
582 return vfs_exclave_fs_unregister_internal(0, vp);
583 }
584
585 int
vfs_exclave_fs_unregister_tag(uint32_t fs_tag)586 vfs_exclave_fs_unregister_tag(uint32_t fs_tag)
587 {
588 return vfs_exclave_fs_unregister_internal(fs_tag, NULLVP);
589 }
590
591 int
vfs_exclave_fs_get_base_dirs(void * buf,uint32_t * count)592 vfs_exclave_fs_get_base_dirs(void *buf, uint32_t *count)
593 {
594 int error = 0;
595 uint32_t i, num_copied = 0;
596 exclave_fs_base_dir_t *dirs = (exclave_fs_base_dir_t *)buf;
597
598 if (!count || (dirs && !*count)) {
599 return EINVAL;
600 }
601
602 lck_mtx_lock(&base_dirs_mtx);
603
604 if (!dirs) {
605 *count = num_base_dirs;
606 goto out;
607 } else if (*count < num_base_dirs) {
608 error = ENOSPC;
609 goto out;
610 }
611
612 for (i = 0; (i < EFT_FS_NUM_TAGS) && (num_copied < num_base_dirs); i++) {
613 base_dir_t *base_dir = &base_dirs[i];
614 exclave_fs_base_dir_t *out_dir = &dirs[num_copied];
615
616 if (base_dir->vp == NULLVP) {
617 continue;
618 }
619
620 memset(out_dir, 0, sizeof(exclave_fs_base_dir_t));
621
622 error = get_vnode_info(base_dir->vp, NULL, &out_dir->fsid, &out_dir->base_dir);
623 if (error) {
624 goto out;
625 }
626
627 out_dir->fs_tag = i;
628 out_dir->graft_file = is_graft(base_dir) ? base_dir->graft_info.gi_graft_file : 0;
629 num_copied++;
630 }
631
632 *count = num_copied;
633
634 out:
635 lck_mtx_unlock(&base_dirs_mtx);
636 return error;
637 }
638
639 static int
create_exclave_dir(vnode_t base_vp,const char * exclave_id)640 create_exclave_dir(vnode_t base_vp, const char *exclave_id)
641 {
642 vnode_t vp = NULLVP, dvp = NULLVP;
643 vfs_context_t ctx;
644 struct vnode_attr va, *vap = &va;
645 struct nameidata nd;
646 int update_flags = 0;
647 int error;
648
649 ctx = vfs_context_kernel();
650
651 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_SYSSPACE,
652 CAST_USER_ADDR_T(exclave_id), ctx);
653 nd.ni_cnd.cn_flags |= WILLBEDIR;
654
655 continue_lookup:
656 nd.ni_dvp = base_vp;
657 nd.ni_cnd.cn_flags |= USEDVP;
658
659 error = namei(&nd);
660 if (error) {
661 return error;
662 }
663
664 dvp = nd.ni_dvp;
665 vp = nd.ni_vp;
666
667 if (vp != NULLVP) {
668 error = EEXIST;
669 goto out;
670 }
671
672 nd.ni_cnd.cn_flags &= ~USEDVP;
673
674 VATTR_INIT(vap);
675 VATTR_SET(vap, va_mode, S_IRWXU | S_IRWXG);
676 VATTR_SET(vap, va_type, VDIR);
677
678 error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL);
679 if (error) {
680 goto out;
681 }
682
683 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
684 if (error == EKEEPLOOKING) {
685 nd.ni_vp = vp;
686 goto continue_lookup;
687 }
688
689 if (error) {
690 goto out;
691 }
692
693 if (vp->v_name == NULL) {
694 update_flags |= VNODE_UPDATE_NAME;
695 }
696 if (vp->v_parent == NULLVP) {
697 update_flags |= VNODE_UPDATE_PARENT;
698 }
699
700 if (update_flags) {
701 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr,
702 nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
703 }
704
705 out:
706 nameidone(&nd);
707 if (vp) {
708 vnode_put(vp);
709 }
710 if (dvp) {
711 vnode_put(dvp);
712 }
713
714 return error;
715 }
716
717 int
vfs_exclave_fs_root(const char * exclave_id,uint64_t * root_id)718 vfs_exclave_fs_root(const char *exclave_id, uint64_t *root_id)
719 {
720 int error;
721
722 if (!exclave_fs_started()) {
723 return ENXIO;
724 }
725
726 if (strchr(exclave_id, '/') || !strcmp(exclave_id, ".") || !strcmp(exclave_id, "..")) {
727 /* don't allow an exclave_id that looks like a path */
728 return EINVAL;
729 }
730
731 error = exclave_fs_open_internal(EFT_EXCLAVE, EXCLAVE_FS_BASEDIR_ROOT_ID,
732 exclave_id, O_DIRECTORY, root_id);
733
734 if (error == ENOENT) {
735 vnode_t base_vp;
736
737 error = get_base_dir(EFT_EXCLAVE, NULL, &base_vp);
738 if (error) {
739 return error;
740 }
741
742 error = create_exclave_dir(base_vp, exclave_id);
743 if (!error) {
744 error = exclave_fs_open_internal(EFT_EXCLAVE, EXCLAVE_FS_BASEDIR_ROOT_ID,
745 exclave_id, O_DIRECTORY, root_id);
746 }
747
748 vnode_put(base_vp);
749 }
750
751 return error;
752 }
753
754 /*
755 * Find a vnode in the open vnodes hash table with the given file_id
756 * under a base dir, take an iocount on it and return it.
757 * If base dir is a graft, file_id should be the graft inode number.
758 */
759 static int
get_open_vnode(base_dir_t * base_dir,uint64_t file_id,vnode_t * vpp)760 get_open_vnode(base_dir_t *base_dir, uint64_t file_id, vnode_t *vpp)
761 {
762 uint64_t vp_file_id;
763 struct open_vnode *entry;
764 int error;
765
766 if (is_graft(base_dir)) {
767 error = graft_to_host_inum(&base_dir->graft_info, file_id, &vp_file_id);
768 if (error) {
769 return error;
770 }
771 } else {
772 vp_file_id = file_id;
773 }
774
775 error = ENOENT;
776
777 lck_mtx_lock(&open_vnodes_mtx);
778
779 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
780 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
781 error = vnode_getwithref(entry->vp);
782 if (!error) {
783 *vpp = entry->vp;
784 }
785 break;
786 }
787 }
788
789 lck_mtx_unlock(&open_vnodes_mtx);
790 return error;
791 }
792
793 /*
794 * Increment a vnode open count in the open vnodes hash table.
795 * If base dir is a graft, file_id should be the host inode number.
796 */
797 static int
increment_vnode_open_count(vnode_t vp,base_dir_t * base_dir,uint64_t file_id)798 increment_vnode_open_count(vnode_t vp, base_dir_t *base_dir, uint64_t file_id)
799 {
800 struct open_vnode *entry;
801 open_vnodes_list_head_t *list;
802 int error = 0;
803
804 lck_mtx_lock(&open_vnodes_mtx);
805
806 list = OPEN_VNODES_HASH(base_dir->dev, file_id);
807
808 LIST_FOREACH(entry, list, chain) {
809 if ((entry->dev == base_dir->dev) && (entry->file_id == file_id)) {
810 break;
811 }
812 }
813
814 if (!entry) {
815 entry = kalloc_type(struct open_vnode, Z_WAITOK | Z_ZERO);
816 if (!entry) {
817 error = ENOMEM;
818 goto out;
819 }
820 entry->vp = vp;
821 entry->dev = base_dir->dev;
822 entry->file_id = file_id;
823 LIST_INSERT_HEAD(list, entry, chain);
824 num_open_vnodes++;
825 }
826
827 entry->open_count++;
828
829 out:
830 lck_mtx_unlock(&open_vnodes_mtx);
831 return error;
832 }
833
834 /*
835 * Decrement a vnode open count in the open vnodes hash table and
836 * return it with an iocount taken on it.
837 * If base dir is a graft, file_id should be the graft inode number.
838 */
839 static int
decrement_vnode_open_count(base_dir_t * base_dir,uint64_t file_id,vnode_t * vpp)840 decrement_vnode_open_count(base_dir_t *base_dir, uint64_t file_id, vnode_t *vpp)
841 {
842 struct open_vnode *entry;
843 vnode_t vp;
844 uint64_t vp_file_id;
845 int error = 0;
846
847 if (is_graft(base_dir)) {
848 error = graft_to_host_inum(&base_dir->graft_info, file_id, &vp_file_id);
849 if (error) {
850 return error;
851 }
852 } else {
853 vp_file_id = file_id;
854 }
855
856 lck_mtx_lock(&open_vnodes_mtx);
857
858 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
859 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
860 break;
861 }
862 }
863
864 if (!entry) {
865 error = ENOENT;
866 goto out;
867 }
868
869 vp = entry->vp;
870 entry->open_count--;
871
872 if (entry->open_count == 0) {
873 LIST_REMOVE(entry, chain);
874 kfree_type(struct open_vnode, entry);
875 num_open_vnodes--;
876 }
877
878 error = vnode_getwithref(vp);
879 if (!error) {
880 *vpp = vp;
881 }
882
883 out:
884 lck_mtx_unlock(&open_vnodes_mtx);
885 return error;
886 }
887
888 static int
exclave_fs_open_internal(uint32_t fs_tag,uint64_t root_id,const char * path,int flags,uint64_t * file_id)889 exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id, const char *path,
890 int flags, uint64_t *file_id)
891 {
892 vnode_t dvp = NULLVP, vp = NULLVP;
893 base_dir_t base_dir;
894 vfs_context_t ctx;
895 struct nameidata *ndp = NULL;
896 struct vnode_attr *vap = NULL;
897 uint64_t vp_file_id;
898 int error;
899
900 if (flags & ~(O_CREAT | O_DIRECTORY)) {
901 return EINVAL;
902 }
903
904 if ((flags & O_CREAT) && !is_fs_writeable(fs_tag)) {
905 return EROFS;
906 }
907
908 if (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID) {
909 error = get_base_dir(fs_tag, &base_dir, &dvp);
910 } else {
911 error = get_base_dir(fs_tag, &base_dir, NULL);
912 if (!error) {
913 error = get_open_vnode(&base_dir, root_id, &dvp);
914 }
915 }
916
917 if (error) {
918 return error;
919 }
920
921 ndp = kalloc_type(struct nameidata, Z_WAITOK);
922 if (!ndp) {
923 error = ENOMEM;
924 goto out;
925 }
926
927 ctx = vfs_context_kernel();
928
929 NDINIT(ndp, LOOKUP, OP_OPEN, NOFOLLOW | NOCROSSMOUNT, UIO_SYSSPACE,
930 CAST_USER_ADDR_T(path), ctx);
931
932 ndp->ni_rootdir = dvp;
933 ndp->ni_flag = NAMEI_ROOTDIR;
934 ndp->ni_dvp = dvp;
935 ndp->ni_cnd.cn_flags |= USEDVP;
936
937 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
938 if (!vap) {
939 error = ENOMEM;
940 goto out;
941 }
942
943 VATTR_INIT(vap);
944 VATTR_SET(vap, va_mode, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
945
946 flags |= FREAD;
947
948 if (is_fs_writeable(fs_tag) && (root_id != EXCLAVE_FS_BASEDIR_ROOT_ID)) {
949 flags |= FWRITE;
950 }
951
952 error = vn_open_auth(ndp, &flags, vap, NULLVP);
953 if (error) {
954 goto out;
955 }
956
957 vp = ndp->ni_vp;
958
959 error = get_vnode_info(vp, NULL, NULL, &vp_file_id);
960 if (error) {
961 goto out;
962 }
963
964 if (is_graft(&base_dir)) {
965 error = host_to_graft_inum(&base_dir.graft_info, vp_file_id, file_id);
966 if (error) {
967 goto out;
968 }
969 } else {
970 *file_id = vp_file_id;
971 }
972
973 error = increment_vnode_open_count(vp, &base_dir, vp_file_id);
974
975 out:
976 if (dvp) {
977 vnode_put(dvp);
978 }
979 if (vp) {
980 vnode_put(vp);
981 }
982 if (ndp) {
983 kfree_type(struct nameidata, ndp);
984 }
985 if (vap) {
986 kfree_type(struct vnode_attr, vap);
987 }
988
989 return error;
990 }
991
992 int
vfs_exclave_fs_open(uint32_t fs_tag,uint64_t root_id,const char * name,uint64_t * file_id)993 vfs_exclave_fs_open(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
994 {
995 if (!exclave_fs_started()) {
996 return ENXIO;
997 }
998
999 if ((fs_tag == EFT_EXCLAVE) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1000 return EINVAL;
1001 }
1002
1003 return exclave_fs_open_internal(fs_tag, root_id, name, 0, file_id);
1004 }
1005
1006 int
vfs_exclave_fs_create(uint32_t fs_tag,uint64_t root_id,const char * name,uint64_t * file_id)1007 vfs_exclave_fs_create(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
1008 {
1009 if (!exclave_fs_started()) {
1010 return ENXIO;
1011 }
1012
1013 if ((fs_tag == EFT_EXCLAVE) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1014 return EINVAL;
1015 }
1016
1017 return exclave_fs_open_internal(fs_tag, root_id, name, O_CREAT, file_id);
1018 }
1019
1020 int
vfs_exclave_fs_close(uint32_t fs_tag,uint64_t file_id)1021 vfs_exclave_fs_close(uint32_t fs_tag, uint64_t file_id)
1022 {
1023 vnode_t vp = NULLVP;
1024 base_dir_t base_dir;
1025 int flags = FREAD;
1026 int error;
1027
1028 if (!exclave_fs_started()) {
1029 return ENXIO;
1030 }
1031
1032 error = get_base_dir(fs_tag, &base_dir, NULL);
1033 if (error) {
1034 return error;
1035 }
1036
1037 error = decrement_vnode_open_count(&base_dir, file_id, &vp);
1038 if (error) {
1039 goto out;
1040 }
1041
1042 if (is_fs_writeable(fs_tag) && !vnode_isdir(vp)) {
1043 flags |= FWRITE;
1044 }
1045
1046 error = vn_close(vp, flags, vfs_context_kernel());
1047
1048 out:
1049 if (vp) {
1050 vnode_put(vp);
1051 }
1052
1053 return error;
1054 }
1055
1056 static int
exclave_fs_io(uint32_t fs_tag,uint64_t file_id,uint64_t offset,uint64_t length,uint8_t * data,bool read)1057 exclave_fs_io(uint32_t fs_tag, uint64_t file_id, uint64_t offset, uint64_t length, uint8_t *data, bool read)
1058 {
1059 vnode_t vp = NULLVP;
1060 base_dir_t base_dir;
1061 UIO_STACKBUF(uio_buf, 1);
1062 uio_t auio = NULL;
1063 int error = 0;
1064
1065 if (!read && !is_fs_writeable(fs_tag)) {
1066 return EROFS;
1067 }
1068
1069 error = get_base_dir(fs_tag, &base_dir, NULL);
1070 if (error) {
1071 return error;
1072 }
1073
1074 error = get_open_vnode(&base_dir, file_id, &vp);
1075 if (error) {
1076 goto out;
1077 }
1078
1079 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, read ? UIO_READ : UIO_WRITE,
1080 &uio_buf[0], sizeof(uio_buf));
1081 if (!auio) {
1082 error = ENOMEM;
1083 goto out;
1084 }
1085
1086 error = uio_addiov(auio, (uintptr_t)data, length);
1087 if (error) {
1088 goto out;
1089 }
1090
1091 if (read) {
1092 error = VNOP_READ(vp, auio, 0, vfs_context_kernel());
1093 } else {
1094 error = VNOP_WRITE(vp, auio, 0, vfs_context_kernel());
1095 }
1096
1097 if (!error && uio_resid(auio)) {
1098 error = EIO;
1099 }
1100
1101 out:
1102 if (vp) {
1103 vnode_put(vp);
1104 }
1105
1106 return error;
1107 }
1108
1109 int
vfs_exclave_fs_read(uint32_t fs_tag,uint64_t file_id,uint64_t file_offset,uint64_t length,void * data)1110 vfs_exclave_fs_read(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1111 {
1112 if (!exclave_fs_started()) {
1113 return ENXIO;
1114 }
1115
1116 return exclave_fs_io(fs_tag, file_id, file_offset, length, data, true);
1117 }
1118
1119 int
vfs_exclave_fs_write(uint32_t fs_tag,uint64_t file_id,uint64_t file_offset,uint64_t length,void * data)1120 vfs_exclave_fs_write(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1121 {
1122 if (!exclave_fs_started()) {
1123 return ENXIO;
1124 }
1125
1126 return exclave_fs_io(fs_tag, file_id, file_offset, length, (void *)data, false);
1127 }
1128
1129 int
vfs_exclave_fs_remove(uint32_t fs_tag,uint64_t root_id,const char * name)1130 vfs_exclave_fs_remove(uint32_t fs_tag, uint64_t root_id, const char *name)
1131 {
1132 vnode_t rvp = NULLVP;
1133 base_dir_t base_dir;
1134 int error;
1135
1136 if (!exclave_fs_started()) {
1137 return ENXIO;
1138 }
1139
1140 if (!is_fs_writeable(fs_tag)) {
1141 return EROFS;
1142 }
1143
1144 error = get_base_dir(fs_tag, &base_dir, NULL);
1145 if (error) {
1146 return error;
1147 }
1148
1149 error = get_open_vnode(&base_dir, root_id, &rvp);
1150 if (error) {
1151 return error;
1152 }
1153
1154 error = unlink1(vfs_context_kernel(), rvp, CAST_USER_ADDR_T(name), UIO_SYSSPACE, 0);
1155
1156 if (rvp) {
1157 vnode_put(rvp);
1158 }
1159
1160 return error;
1161 }
1162
1163 int
vfs_exclave_fs_sync(uint32_t fs_tag,uint64_t file_id,uint64_t sync_op)1164 vfs_exclave_fs_sync(uint32_t fs_tag, uint64_t file_id, uint64_t sync_op)
1165 {
1166 vnode_t vp = NULLVP;
1167 base_dir_t base_dir;
1168 u_long command;
1169 int error;
1170
1171 if (!exclave_fs_started()) {
1172 return ENXIO;
1173 }
1174
1175 if (!is_fs_writeable(fs_tag)) {
1176 return EROFS;
1177 }
1178
1179 if (sync_op == EXCLAVE_FS_SYNC_OP_BARRIER) {
1180 command = F_BARRIERFSYNC;
1181 } else if (sync_op == EXCLAVE_FS_SYNC_OP_FULL) {
1182 command = F_FULLFSYNC;
1183 } else {
1184 return EINVAL;
1185 }
1186
1187 error = get_base_dir(fs_tag, &base_dir, NULL);
1188 if (error) {
1189 return error;
1190 }
1191
1192 error = get_open_vnode(&base_dir, file_id, &vp);
1193 if (error) {
1194 goto out;
1195 }
1196
1197 error = VNOP_IOCTL(vp, command, (caddr_t)NULL, 0, vfs_context_kernel());
1198
1199 out:
1200 if (vp) {
1201 vnode_put(vp);
1202 }
1203
1204 return error;
1205 }
1206
1207 static int
map_graft_dirents(fsioc_graft_info_t * graft_info,void * dirent_buf,int32_t count)1208 map_graft_dirents(fsioc_graft_info_t *graft_info, void *dirent_buf, int32_t count)
1209 {
1210 int i, error = 0;
1211
1212 for (i = 0; i < count; i++) {
1213 exclave_fs_dirent_t *dirent = (exclave_fs_dirent_t *)dirent_buf;
1214 uint64_t mapped_file_id;
1215
1216 error = host_to_graft_inum(graft_info, dirent->file_id, &mapped_file_id);
1217 if (error) {
1218 return error;
1219 }
1220 dirent->file_id = mapped_file_id;
1221 dirent_buf = (char *)dirent_buf + dirent->length;
1222 }
1223
1224 return 0;
1225 }
1226
1227 int
vfs_exclave_fs_readdir(uint32_t fs_tag,uint64_t file_id,void * dirent_buf,uint32_t buf_size,int32_t * count)1228 vfs_exclave_fs_readdir(uint32_t fs_tag, uint64_t file_id, void *dirent_buf,
1229 uint32_t buf_size, int32_t *count)
1230 {
1231 vnode_t dvp = NULLVP;
1232 base_dir_t base_dir;
1233 UIO_STACKBUF(uio_buf, 1);
1234 uio_t auio = NULL;
1235 vfs_context_t ctx;
1236 uthread_t ut;
1237 struct attrlist al;
1238 struct vnode_attr *vap = NULL;
1239 char *va_name = NULL;
1240 int32_t eofflag;
1241 int error;
1242
1243 if (!exclave_fs_started()) {
1244 return ENXIO;
1245 }
1246
1247 if (fs_tag != EFT_EXCLAVE) {
1248 #if (DEVELOPMENT || DEBUG)
1249 if (integrity_checks_enabled) {
1250 return ENOTSUP;
1251 }
1252 #else
1253 return ENOTSUP;
1254 #endif
1255 }
1256
1257 error = get_base_dir(fs_tag, &base_dir, NULL);
1258 if (error) {
1259 return error;
1260 }
1261
1262 error = get_open_vnode(&base_dir, file_id, &dvp);
1263 if (error) {
1264 goto out;
1265 }
1266
1267 if (!vnode_isdir(dvp)) {
1268 error = ENOTDIR;
1269 goto out;
1270 }
1271
1272 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
1273 &uio_buf[0], sizeof(uio_buf));
1274 if (!auio) {
1275 error = ENOMEM;
1276 goto out;
1277 }
1278
1279 error = uio_addiov(auio, (uintptr_t)dirent_buf, buf_size);
1280 if (error) {
1281 goto out;
1282 }
1283
1284 al.bitmapcount = ATTR_BIT_MAP_COUNT;
1285 al.commonattr = ATTR_CMN_RETURNED_ATTRS | ATTR_CMN_NAME | ATTR_CMN_OBJTYPE | ATTR_CMN_FILEID;
1286 al.fileattr = ATTR_FILE_DATALENGTH;
1287
1288 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1289 if (!vap) {
1290 error = ENOMEM;
1291 goto out;
1292 }
1293
1294 VATTR_INIT(vap);
1295 va_name = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_ZERO);
1296 if (!va_name) {
1297 error = ENOMEM;
1298 goto out;
1299 }
1300 vap->va_name = va_name;
1301
1302 VATTR_SET_ACTIVE(vap, va_name);
1303 VATTR_SET_ACTIVE(vap, va_objtype);
1304 VATTR_SET_ACTIVE(vap, va_fileid);
1305 VATTR_SET_ACTIVE(vap, va_total_size);
1306 VATTR_SET_ACTIVE(vap, va_data_size);
1307
1308 ctx = vfs_context_kernel();
1309 ut = current_uthread();
1310
1311 ut->uu_flag |= UT_KERN_RAGE_VNODES;
1312 error = VNOP_GETATTRLISTBULK(dvp, &al, vap, auio, NULL,
1313 0, &eofflag, count, ctx);
1314 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1315
1316 if (!error && !eofflag) {
1317 return ENOBUFS;
1318 }
1319
1320 if (is_graft(&base_dir)) {
1321 error = map_graft_dirents(&base_dir.graft_info, dirent_buf, *count);
1322 if (error) {
1323 goto out;
1324 }
1325 }
1326
1327 out:
1328 if (va_name) {
1329 zfree(ZV_NAMEI, va_name);
1330 }
1331 if (vap) {
1332 kfree_type(struct vnode_attr, vap);
1333 }
1334 if (dvp) {
1335 vnode_put(dvp);
1336 }
1337
1338 return error;
1339 }
1340
1341 int
vfs_exclave_fs_getsize(uint32_t fs_tag,uint64_t file_id,uint64_t * size)1342 vfs_exclave_fs_getsize(uint32_t fs_tag, uint64_t file_id, uint64_t *size)
1343 {
1344 vnode_t vp = NULLVP;
1345 base_dir_t base_dir;
1346 vfs_context_t ctx;
1347 struct vnode_attr *vap = NULL;
1348 int error;
1349
1350 if (!exclave_fs_started()) {
1351 return ENXIO;
1352 }
1353
1354 error = get_base_dir(fs_tag, &base_dir, NULL);
1355 if (error) {
1356 return error;
1357 }
1358
1359 error = get_open_vnode(&base_dir, file_id, &vp);
1360 if (error) {
1361 goto out;
1362 }
1363
1364 if (vnode_isdir(vp)) {
1365 error = EISDIR;
1366 goto out;
1367 }
1368
1369 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1370 if (!vap) {
1371 error = ENOMEM;
1372 goto out;
1373 }
1374
1375 VATTR_INIT(vap);
1376 VATTR_WANTED(vap, va_data_size);
1377
1378 ctx = vfs_context_kernel();
1379
1380 error = VNOP_GETATTR(vp, vap, ctx);
1381 if (error) {
1382 goto out;
1383 }
1384
1385 if (!VATTR_IS_SUPPORTED(vap, va_data_size)) {
1386 error = ENOTSUP;
1387 goto out;
1388 }
1389
1390 *size = vap->va_data_size;
1391
1392 out:
1393 if (vap) {
1394 kfree_type(struct vnode_attr, vap);
1395 }
1396 if (vp) {
1397 vnode_put(vp);
1398 }
1399
1400 return error;
1401 }
1402
1403