1 /*
2 * Copyright (c) 2022-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <string.h>
30 #include <sys/fcntl.h>
31 #include <sys/fsctl.h>
32 #include <sys/namei.h>
33 #include <sys/stat.h>
34 #include <sys/vnode.h>
35 #include <sys/vnode_internal.h>
36 #include <sys/uio_internal.h>
37 #include <sys/fsevents.h>
38 #include <kern/kalloc.h>
39 #include <vfs/vfs_exclave_fs.h>
40 #include <miscfs/devfs/devfs.h>
41 #include <pexpert/pexpert.h>
42
43 __private_extern__ int unlink1(vfs_context_t, vnode_t, user_addr_t,
44 enum uio_seg, int);
45
46 // Flags for open vnodes, currently used only in DEVELOPMENT or DEBUG builds
47 #define OV_EXCLAVE_BASE 1
48 #define OV_FORCE_ENOSPC 2 // When this flag is set, writes fail with ENOSPC
49
50 struct open_vnode {
51 LIST_ENTRY(open_vnode) chain;
52 vnode_t vp;
53 dev_t dev;
54 uint64_t file_id;
55 uint32_t open_count;
56 #if (DEVELOPMENT || DEBUG)
57 uint32_t flags;
58 #endif
59 };
60
61 #define ROOT_DIR_INO_NUM 2
62
63 #define VFS_EXCLAVE_FS_BASE_DIR_GRAFT 1
64 #define VFS_EXCLAVE_FS_BASE_DIR_SEALED 2
65
66 typedef struct registered_fs_tag {
67 LIST_ENTRY(registered_fs_tag) link;
68 uint32_t fstag;
69 uint32_t flags;
70 vnode_t vp;
71 dev_t dev;
72 fsioc_graft_info_t graft_info;
73 } registered_fs_tag_t;
74
75 /* hash table that maps from file_id to a vnode and its open count */
76 typedef LIST_HEAD(open_vnode_head, open_vnode) open_vnodes_list_head_t;
77 static open_vnodes_list_head_t *open_vnodes_hashtbl = NULL;
78 static u_long open_vnodes_hashmask = 0;
79 static int open_vnodes_hashsize = 0;
80 static uint32_t num_open_vnodes = 0;
81
82 /* registered base directories */
83 typedef LIST_HEAD(registered_tags_head, registered_fs_tag) registered_tags_head_t;
84 static registered_tags_head_t *registered_tags_hash = NULL;
85 static uint32_t num_tags_registered = 0;
86 static u_long rft_hashmask;
87
88 #define REGFSTAG_HASH_WIDTH 32
89
90 static LCK_GRP_DECLARE(vfs_exclave_lck_grp, "vfs_exclave");
91
92 /* protects registered_tags list and num_tags_registered counter */
93 static lck_mtx_t regtag_mtx;
94
95 /* protects open vnodes hash table */
96 static lck_mtx_t open_vnodes_mtx;
97
98 #define HASHFUNC(dev, file_id) (((dev) + (file_id)) & open_vnodes_hashmask)
99 #define OPEN_VNODES_HASH(dev, file_id) (&open_vnodes_hashtbl[HASHFUNC(dev, file_id)])
100
101 #if (DEVELOPMENT || DEBUG)
102 static bool integrity_checks_disabled = false;
103 #define EXCLAVE_INTEGRITY_CHECKS_DISABLED_BOOTARG "disable_integrity_checks"
104 static bool vfs_exclave_is_enospc_exclave(const char *exclave_id);
105 #endif
106
107 static int exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id,
108 const char *path, int flags, uint32_t ov_flags, uint64_t *file_id);
109 static int vfs_exclave_fs_unregister_internal(vnode_t vp, bool take_basedir_lock);
110
111 static uint32_t
hash_fstag(uint32_t tag)112 hash_fstag(uint32_t tag)
113 {
114 return tag % (rft_hashmask + 1);
115 }
116
117 static registered_tags_head_t *
get_registered_tags_chain(uint32_t tag)118 get_registered_tags_chain(uint32_t tag)
119 {
120 return registered_tags_hash + hash_fstag(tag);
121 }
122
123 /*
124 * Get the fsid and fileid attributes of the given vnode.
125 */
126 static int
get_vnode_info(vnode_t vp,dev_t * dev,fsid_t * fsid,uint64_t * file_id)127 get_vnode_info(vnode_t vp, dev_t *dev, fsid_t *fsid, uint64_t *file_id)
128 {
129 struct vnode_attr va;
130 int error;
131
132 memset(&va, 0, sizeof(va));
133 VATTR_INIT(&va);
134 if (dev) {
135 VATTR_WANTED(&va, va_fsid);
136 }
137 if (fsid) {
138 VATTR_WANTED(&va, va_fsid64);
139 }
140 if (file_id) {
141 VATTR_WANTED(&va, va_fileid);
142 }
143
144 error = vnode_getattr(vp, &va, vfs_context_kernel());
145 if (error) {
146 return error;
147 }
148
149 if (dev) {
150 if (!VATTR_IS_SUPPORTED(&va, va_fsid)) {
151 return ENOTSUP;
152 }
153 *dev = va.va_fsid;
154 }
155
156 if (fsid) {
157 if (!VATTR_IS_SUPPORTED(&va, va_fsid64)) {
158 return ENOTSUP;
159 }
160 *fsid = va.va_fsid64;
161 }
162
163 if (file_id) {
164 if (!VATTR_IS_SUPPORTED(&va, va_fileid)) {
165 return ENOTSUP;
166 }
167 *file_id = va.va_fileid;
168 }
169
170 return 0;
171 }
172
173 static inline bool
is_graft(registered_fs_tag_t * rft)174 is_graft(registered_fs_tag_t *rft)
175 {
176 return rft->flags & VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
177 }
178
179 static inline bool
is_sealed(registered_fs_tag_t * rft)180 is_sealed(registered_fs_tag_t *rft)
181 {
182 return rft->flags & VFS_EXCLAVE_FS_BASE_DIR_SEALED;
183 }
184
185 static int
graft_to_host_inum(fsioc_graft_info_t * gi,uint64_t graft_inum,uint64_t * host_inum)186 graft_to_host_inum(fsioc_graft_info_t *gi, uint64_t graft_inum, uint64_t *host_inum)
187 {
188 if (graft_inum == ROOT_DIR_INO_NUM) {
189 *host_inum = gi->gi_graft_dir;
190 } else if (graft_inum < gi->gi_inum_len) {
191 *host_inum = gi->gi_inum_base + graft_inum;
192 } else {
193 return ERANGE;
194 }
195
196 return 0;
197 }
198
199 static int
host_to_graft_inum(fsioc_graft_info_t * gi,uint64_t host_inum,uint64_t * graft_inum)200 host_to_graft_inum(fsioc_graft_info_t *gi, uint64_t host_inum, uint64_t *graft_inum)
201 {
202 if (host_inum == gi->gi_graft_dir) {
203 *graft_inum = ROOT_DIR_INO_NUM;
204 } else if ((host_inum >= gi->gi_inum_base) && (host_inum < gi->gi_inum_base + gi->gi_inum_len)) {
205 *graft_inum = host_inum - gi->gi_inum_base;
206 } else {
207 return ERANGE;
208 }
209
210 return 0;
211 }
212
213 /*
214 * Check if a vnode is in an APFS graft and if so obtain information about the graft.
215 */
216 static int
get_graft_info(vnode_t vp,bool * is_graft,fsioc_graft_info_t * graft_info)217 get_graft_info(vnode_t vp, bool *is_graft, fsioc_graft_info_t *graft_info)
218 {
219 fsioc_get_graft_info_t ggi = {0};
220 uint16_t alloc_count;
221 fsioc_graft_info_t *graft_infos = NULL;
222 int error = 0;
223
224 *is_graft = false;
225
226 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, (caddr_t)&ggi, 0, vfs_context_kernel());
227 if (error) {
228 return error;
229 }
230
231 if (!ggi.ggi_is_in_graft) {
232 return 0;
233 }
234
235 if (ggi.ggi_count == 0) {
236 return EINVAL;
237 }
238
239 alloc_count = ggi.ggi_count;
240
241 graft_infos = kalloc_type(fsioc_graft_info_t, alloc_count, Z_WAITOK | Z_ZERO);
242 if (!graft_infos) {
243 return ENOMEM;
244 }
245
246 memset(&ggi, 0, sizeof(ggi));
247 ggi.ggi_count = alloc_count;
248 ggi.ggi_buffer = (user64_addr_t)graft_infos;
249
250 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, (caddr_t)&ggi, 0, vfs_context_kernel());
251 if (error) {
252 goto out;
253 }
254
255 if (!ggi.ggi_is_in_graft) {
256 error = EAGAIN;
257 goto out;
258 }
259
260 if (ggi.ggi_graft_index >= alloc_count) {
261 error = ERANGE;
262 goto out;
263 }
264
265 *graft_info = graft_infos[ggi.ggi_graft_index];
266 *is_graft = true;
267
268 out:
269 if (graft_infos) {
270 kfree_type(fsioc_graft_info_t, alloc_count, graft_infos);
271 }
272
273 return error;
274 }
275
276 static bool
is_fs_writeable(uint32_t fs_tag)277 is_fs_writeable(uint32_t fs_tag)
278 {
279 return (fs_tag == EFT_EXCLAVE) || (fs_tag == EFT_EXCLAVE_MAIN);
280 }
281
282 /*
283 * Set a base directory for the given fs tag.
284 */
285 static int
set_base_dir(uint32_t fs_tag,vnode_t vp,fsioc_graft_info_t * graft_info,bool is_sealed)286 set_base_dir(uint32_t fs_tag, vnode_t vp, fsioc_graft_info_t *graft_info, bool is_sealed)
287 {
288 dev_t dev;
289 int error = 0;
290 registered_fs_tag_t *rft;
291
292 lck_mtx_lock(®tag_mtx);
293
294 registered_tags_head_t *rfthead = get_registered_tags_chain(fs_tag);
295
296 LIST_FOREACH(rft, rfthead, link) {
297 if (rft->fstag == fs_tag) {
298 // Check if the registered vp is DEAD, it can be the case in edu mode where the original location was unmounted
299 // if the vnode is dead unregister it, and continue with setting new base_dir
300 if (vnode_vtype(rft->vp) == VBAD) {
301 vfs_exclave_fs_unregister_internal(rft->vp, false);
302 break;
303 }
304
305 error = (rft->vp == vp) ? EALREADY : EBUSY;
306 goto out;
307 }
308 }
309
310 error = get_vnode_info(vp, &dev, NULL, NULL);
311 if (error) {
312 goto out;
313 }
314
315 #if !defined(XNU_TARGET_OS_OSX)
316 /*
317 * make sure that a writable fs does not share a dev_t with
318 * another non writable fs (and vice versa) since writable
319 * vnodes are opened RW whereas non writable fs vnodes are
320 * opened RO
321 */
322 int i;
323 bool is_writable_fs_tag = is_fs_writeable(fs_tag);
324 for (i = 0; i <= rft_hashmask; i++) {
325 registered_tags_head_t *head = registered_tags_hash + i;
326 LIST_FOREACH(rft, head, link) {
327 if ((is_fs_writeable(rft->fstag) != is_writable_fs_tag) && rft->dev == dev) {
328 printf("tag %u has same device %u.%u as tag %u\n", fs_tag, major(dev), minor(dev), rft->fstag);
329 error = EBUSY;
330 goto out;
331 }
332 }
333 }
334 #endif
335
336 rft = kalloc_type(registered_fs_tag_t, Z_WAITOK | Z_ZERO);
337 if (rft == NULL) {
338 error = ENOMEM;
339 goto out;
340 }
341
342 if (graft_info) {
343 rft->flags |= VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
344 if (is_sealed) {
345 rft->flags |= VFS_EXCLAVE_FS_BASE_DIR_SEALED;
346 }
347 rft->graft_info = *graft_info;
348 }
349
350 rft->fstag = fs_tag;
351 rft->vp = vp;
352 rft->dev = dev;
353 LIST_INSERT_HEAD(rfthead, rft, link);
354
355 num_tags_registered++;
356
357 out:
358 lck_mtx_unlock(®tag_mtx);
359 return error;
360 }
361
362 /*
363 * Get the base directory entry for the given fs tag. If vpp is passed, return
364 * with an iocount taken on the vnode.
365 */
366 static int
get_base_dir(uint32_t fs_tag,registered_fs_tag_t * base_dir,vnode_t * vpp)367 get_base_dir(uint32_t fs_tag, registered_fs_tag_t *base_dir, vnode_t *vpp)
368 {
369 int error = ENOENT;
370 registered_fs_tag_t *rft;
371
372 if (!base_dir && !vpp) {
373 return EINVAL;
374 }
375
376 lck_mtx_lock(®tag_mtx);
377
378 registered_tags_head_t *rfthead = get_registered_tags_chain(fs_tag);
379
380 LIST_FOREACH(rft, rfthead, link) {
381 if (rft->fstag == fs_tag) {
382 if (vpp) {
383 vnode_t base_vp = rft->vp;
384 error = vnode_getwithref(base_vp);
385 if (error) {
386 break;
387 }
388 *vpp = base_vp;
389 }
390
391 if (base_dir) {
392 *base_dir = *rft;
393 }
394 error = 0;
395 break;
396 }
397 }
398
399 lck_mtx_unlock(®tag_mtx);
400 return error;
401 }
402
403 int
vfs_exclave_fs_start(void)404 vfs_exclave_fs_start(void)
405 {
406 lck_mtx_init(®tag_mtx, &vfs_exclave_lck_grp, LCK_ATTR_NULL);
407 lck_mtx_init(&open_vnodes_mtx, &vfs_exclave_lck_grp, LCK_ATTR_NULL);
408
409 assert(open_vnodes_hashtbl == NULL);
410
411 open_vnodes_hashsize = desiredvnodes / 16;
412 open_vnodes_hashtbl = hashinit(open_vnodes_hashsize, M_VNODE, &open_vnodes_hashmask);
413 if (open_vnodes_hashtbl == NULL) {
414 open_vnodes_hashsize = open_vnodes_hashmask = 0;
415 return ENOMEM;
416 }
417
418 registered_tags_hash = hashinit(REGFSTAG_HASH_WIDTH, M_VNODE /*unused*/, &rft_hashmask);
419 if (registered_tags_hash == NULL) {
420 hashdestroy(open_vnodes_hashtbl, M_VNODE, open_vnodes_hashmask);
421 open_vnodes_hashtbl = NULL;
422 open_vnodes_hashmask = open_vnodes_hashsize = 0;
423 return ENOMEM;
424 }
425
426 #if (DEVELOPMENT || DEBUG)
427 uint32_t bootarg_val;
428 if (PE_parse_boot_argn(EXCLAVE_INTEGRITY_CHECKS_DISABLED_BOOTARG, &bootarg_val, sizeof(bootarg_val))) {
429 if (bootarg_val) {
430 integrity_checks_disabled = true;
431 }
432 }
433 #endif
434
435 return 0;
436 }
437
438 static bool
exclave_fs_started(void)439 exclave_fs_started(void)
440 {
441 return open_vnodes_hashtbl != NULL;
442 }
443
444 static void release_open_vnodes(registered_fs_tag_t *);
445
446 static void
drop_registered_tag(registered_fs_tag_t * rft)447 drop_registered_tag(registered_fs_tag_t *rft)
448 {
449 release_open_vnodes(rft);
450
451 vnode_rele(rft->vp);
452 LIST_REMOVE(rft, link);
453 kfree_type(registered_fs_tag_t, rft);
454 num_tags_registered--;
455 }
456
457 void
vfs_exclave_fs_stop(void)458 vfs_exclave_fs_stop(void)
459 {
460 registered_fs_tag_t *rft, *nxt;
461 int i;
462
463 if (!exclave_fs_started()) {
464 return;
465 }
466
467 /* No need to lock regtag_mtx - this function assumes
468 * single-threaded context */
469 for (i = 0; i <= rft_hashmask; i++) {
470 registered_tags_head_t *rfthead = registered_tags_hash + i;
471
472 LIST_FOREACH_SAFE(rft, rfthead, link, nxt) {
473 drop_registered_tag(rft);
474 }
475 }
476
477 hashdestroy(registered_tags_hash, M_VNODE, rft_hashmask);
478
479 assert(num_open_vnodes == 0);
480 assert(open_vnodes_hashtbl);
481
482 hashdestroy(open_vnodes_hashtbl, M_VNODE, open_vnodes_hashmask);
483 open_vnodes_hashtbl = NULL;
484 open_vnodes_hashmask = open_vnodes_hashsize = 0;
485
486 lck_mtx_destroy(®tag_mtx, &vfs_exclave_lck_grp);
487 lck_mtx_destroy(&open_vnodes_mtx, &vfs_exclave_lck_grp);
488
489 #if (DEVELOPMENT || DEBUG)
490 integrity_checks_disabled = false;
491 #endif
492 }
493
494 int
vfs_exclave_fs_register(uint32_t fs_tag,vnode_t vp)495 vfs_exclave_fs_register(uint32_t fs_tag, vnode_t vp)
496 {
497 char vfs_name[MFSNAMELEN];
498 bool is_graft;
499 fsioc_graft_info_t graft_info;
500 int error;
501
502 if (!exclave_fs_started()) {
503 return ENXIO;
504 }
505
506 #if !defined(XNU_TARGET_OS_OSX)
507 if (fs_tag == EFT_EXCLAVE_MAIN) {
508 return ENOTSUP;
509 }
510 #endif
511
512 vnode_vfsname(vp, vfs_name);
513 if (strcmp(vfs_name, "apfs")) {
514 return ENOTSUP;
515 }
516
517 if (!vnode_isdir(vp)) {
518 return ENOTDIR;
519 }
520
521 error = get_graft_info(vp, &is_graft, &graft_info);
522 if (error) {
523 return error;
524 }
525
526 if (is_graft && is_fs_writeable(fs_tag)) {
527 return EROFS;
528 }
529
530 error = vnode_ref(vp);
531 if (error) {
532 return error;
533 }
534
535 // Check if tag is sealed, RW tags are always not sealed
536 bool is_sealed = false;
537 if (!is_fs_writeable(fs_tag)) {
538 error = VNOP_IOCTL(vp, FSIOC_EVAL_ROOTAUTH, NULL, 0, vfs_context_kernel());
539 if (!error) {
540 is_sealed = true;
541 }
542 }
543
544 error = set_base_dir(fs_tag, vp, is_graft ? &graft_info : NULL, is_sealed);
545 if (error) {
546 vnode_rele(vp);
547 // if this directory is already registered in this tag do not consider it as an error
548 if (error == EALREADY) {
549 error = 0;
550 }
551 return error;
552 }
553
554 return 0;
555 }
556
557 int
vfs_exclave_fs_register_path(uint32_t fs_tag,const char * base_path)558 vfs_exclave_fs_register_path(uint32_t fs_tag, const char *base_path)
559 {
560 struct nameidata nd;
561 int error;
562
563 if (!exclave_fs_started()) {
564 return ENXIO;
565 }
566
567 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_SYSSPACE,
568 CAST_USER_ADDR_T(base_path), vfs_context_kernel());
569
570 error = namei(&nd);
571 if (error) {
572 return error;
573 }
574
575 error = vfs_exclave_fs_register(fs_tag, nd.ni_vp);
576
577 vnode_put(nd.ni_vp);
578 nameidone(&nd);
579
580 return error;
581 }
582
583 /*
584 * Release open vnodes for the given fs_tag.
585 * regtag_mtx must be locked by caller.
586 */
587 static void
release_open_vnodes(registered_fs_tag_t * base_dir)588 release_open_vnodes(registered_fs_tag_t *base_dir)
589 {
590 dev_t dev;
591 int i;
592
593 lck_mtx_lock(&open_vnodes_mtx);
594
595 if (num_open_vnodes == 0) {
596 goto done;
597 }
598
599 dev = base_dir->dev;
600
601 if (num_tags_registered > 1) {
602 /* skip release if another base dir has the same device */
603 for (i = 0; i <= rft_hashmask; i++) {
604 registered_tags_head_t *rfthead = registered_tags_hash + i;
605 registered_fs_tag_t *rft;
606
607 LIST_FOREACH(rft, rfthead, link) {
608 if ((rft != base_dir) && (rft->dev == dev)) {
609 goto done;
610 }
611 }
612 }
613 }
614
615 for (i = 0; i < open_vnodes_hashmask + 1; i++) {
616 struct open_vnode *entry, *temp_entry;
617
618 LIST_FOREACH_SAFE(entry, &open_vnodes_hashtbl[i], chain, temp_entry) {
619 if (entry->dev != dev) {
620 continue;
621 }
622 while (entry->open_count) {
623 vnode_rele(entry->vp);
624 entry->open_count--;
625 }
626 LIST_REMOVE(entry, chain);
627 kfree_type(struct open_vnode, entry);
628 num_open_vnodes--;
629 }
630 }
631
632 done:
633 lck_mtx_unlock(&open_vnodes_mtx);
634 }
635
636 static int
vfs_exclave_fs_unregister_internal(vnode_t vp,bool take_basedir_lock)637 vfs_exclave_fs_unregister_internal(vnode_t vp, bool take_basedir_lock)
638 {
639 int error = ENOENT;
640 int i;
641
642 if (!exclave_fs_started()) {
643 return ENXIO;
644 }
645
646 if (take_basedir_lock) {
647 lck_mtx_lock(®tag_mtx);
648 }
649
650 for (i = 0; i <= rft_hashmask; i++) {
651 registered_tags_head_t *rfthead = registered_tags_hash + i;
652 registered_fs_tag_t *rft, *nxt;
653
654 LIST_FOREACH_SAFE(rft, rfthead, link, nxt) {
655 if (rft->vp == vp) {
656 drop_registered_tag(rft);
657 error = 0;
658 goto done;
659 }
660 }
661 }
662
663 done:
664
665 if (take_basedir_lock) {
666 lck_mtx_unlock(®tag_mtx);
667 }
668
669 return error;
670 }
671
672 int
vfs_exclave_fs_unregister(vnode_t vp)673 vfs_exclave_fs_unregister(vnode_t vp)
674 {
675 return vfs_exclave_fs_unregister_internal(vp, true);
676 }
677
678 int
vfs_exclave_fs_get_base_dirs(void * buf,uint32_t * count)679 vfs_exclave_fs_get_base_dirs(void *buf, uint32_t *count)
680 {
681 int error = 0;
682 uint32_t num_copied = 0;
683 exclave_fs_base_dir_t *dirs = (exclave_fs_base_dir_t *)buf;
684 int i;
685
686 if (!count || (dirs && !*count)) {
687 return EINVAL;
688 }
689
690 lck_mtx_lock(®tag_mtx);
691
692 if (!dirs) {
693 *count = num_tags_registered;
694 goto out;
695 } else if (*count < num_tags_registered) {
696 error = ENOSPC;
697 goto out;
698 }
699
700 for (i = 0; i <= rft_hashmask; i++) {
701 registered_tags_head_t *rfthead = registered_tags_hash + i;
702 registered_fs_tag_t *base_dir;
703
704 LIST_FOREACH(base_dir, rfthead, link) {
705 exclave_fs_base_dir_t *out_dir = &dirs[num_copied];
706
707 memset(out_dir, 0, sizeof(exclave_fs_base_dir_t));
708
709 error = get_vnode_info(base_dir->vp, NULL, &out_dir->fsid, &out_dir->base_dir);
710 if (error) {
711 goto out;
712 }
713
714 out_dir->fs_tag = base_dir->fstag;
715 out_dir->graft_file = is_graft(base_dir) ? base_dir->graft_info.gi_graft_file : 0;
716 num_copied++;
717 }
718 }
719
720 *count = num_copied;
721
722 out:
723 lck_mtx_unlock(®tag_mtx);
724 return error;
725 }
726
727 static int
create_exclave_dir(vnode_t base_vp,const char * exclave_id)728 create_exclave_dir(vnode_t base_vp, const char *exclave_id)
729 {
730 vnode_t vp = NULLVP, dvp = NULLVP;
731 vfs_context_t ctx;
732 struct vnode_attr va, *vap = &va;
733 struct nameidata nd;
734 int update_flags = 0;
735 int error;
736
737 ctx = vfs_context_kernel();
738
739 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_SYSSPACE,
740 CAST_USER_ADDR_T(exclave_id), ctx);
741 nd.ni_cnd.cn_flags |= WILLBEDIR;
742
743 continue_lookup:
744 nd.ni_dvp = base_vp;
745 nd.ni_cnd.cn_flags |= USEDVP;
746
747 error = namei(&nd);
748 if (error) {
749 return error;
750 }
751
752 dvp = nd.ni_dvp;
753 vp = nd.ni_vp;
754
755 if (vp != NULLVP) {
756 error = EEXIST;
757 goto out;
758 }
759
760 nd.ni_cnd.cn_flags &= ~USEDVP;
761
762 VATTR_INIT(vap);
763 VATTR_SET(vap, va_mode, S_IRWXU | S_IRWXG);
764 VATTR_SET(vap, va_type, VDIR);
765
766 error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL);
767 if (error) {
768 goto out;
769 }
770
771 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
772 if (error == EKEEPLOOKING) {
773 nd.ni_vp = vp;
774 goto continue_lookup;
775 }
776
777 if (error) {
778 goto out;
779 }
780
781 if (vp->v_name == NULL) {
782 update_flags |= VNODE_UPDATE_NAME;
783 }
784 if (vp->v_parent == NULLVP) {
785 update_flags |= VNODE_UPDATE_PARENT;
786 }
787
788 if (update_flags) {
789 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr,
790 nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
791 }
792
793 out:
794 nameidone(&nd);
795 if (vp) {
796 vnode_put(vp);
797 }
798 if (dvp) {
799 vnode_put(dvp);
800 }
801
802 return error;
803 }
804
805 int
vfs_exclave_fs_root(const char * exclave_id,uint64_t * root_id)806 vfs_exclave_fs_root(const char *exclave_id, uint64_t *root_id)
807 {
808 return vfs_exclave_fs_root_ex(EFT_EXCLAVE, exclave_id, root_id);
809 }
810
811 int
vfs_exclave_fs_root_ex(uint32_t fs_tag,const char * exclave_id,uint64_t * root_id)812 vfs_exclave_fs_root_ex(uint32_t fs_tag, const char *exclave_id, uint64_t *root_id)
813 {
814 int error;
815 uint32_t ov_flags = 0;
816
817 if (!exclave_fs_started()) {
818 return ENXIO;
819 }
820
821 if (!is_fs_writeable(fs_tag)) {
822 /* root is valid only on RW tags */
823 return EINVAL;
824 }
825
826 if (strchr(exclave_id, '/') || !strcmp(exclave_id, ".") || !strcmp(exclave_id, "..")) {
827 /* don't allow an exclave_id that looks like a path */
828 return EINVAL;
829 }
830
831 #if (DEVELOPMENT || DEBUG)
832 if (vfs_exclave_is_enospc_exclave(exclave_id)) {
833 ov_flags = OV_EXCLAVE_BASE | OV_FORCE_ENOSPC;
834 }
835 #endif
836
837 error = exclave_fs_open_internal(fs_tag, EXCLAVE_FS_BASEDIR_ROOT_ID,
838 exclave_id, O_DIRECTORY, ov_flags, root_id);
839
840 if (error == ENOENT) {
841 vnode_t base_vp;
842
843 error = get_base_dir(fs_tag, NULL, &base_vp);
844 if (error) {
845 return error;
846 }
847
848 error = create_exclave_dir(base_vp, exclave_id);
849 if (!error) {
850 error = exclave_fs_open_internal(fs_tag, EXCLAVE_FS_BASEDIR_ROOT_ID,
851 exclave_id, O_DIRECTORY, ov_flags, root_id);
852 }
853
854 vnode_put(base_vp);
855 }
856
857 return error;
858 }
859
860 /*
861 * Find a vnode in the open vnodes hash table with the given file_id
862 * under a base dir, take an iocount on it and return it.
863 * If base dir is a graft, file_id should be the graft inode number.
864 */
865 static int
get_open_vnode(registered_fs_tag_t * base_dir,uint64_t file_id,vnode_t * vpp,uint32_t * ov_flags)866 get_open_vnode(registered_fs_tag_t *base_dir, uint64_t file_id, vnode_t *vpp, uint32_t *ov_flags)
867 {
868 uint64_t vp_file_id;
869 struct open_vnode *entry;
870 int error;
871
872 if (is_graft(base_dir)) {
873 error = graft_to_host_inum(&base_dir->graft_info, file_id, &vp_file_id);
874 if (error) {
875 return error;
876 }
877 } else {
878 vp_file_id = file_id;
879 }
880
881 error = ENOENT;
882
883 lck_mtx_lock(&open_vnodes_mtx);
884
885 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
886 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
887 error = vnode_getwithref(entry->vp);
888 if (!error) {
889 *vpp = entry->vp;
890 if (ov_flags) {
891 #if (DEVELOPMENT || DEBUG)
892 *ov_flags = entry->flags;
893 #else
894 *ov_flags = 0;
895 #endif
896 }
897 }
898 break;
899 }
900 }
901
902 lck_mtx_unlock(&open_vnodes_mtx);
903 return error;
904 }
905
906 /*
907 * Increment a vnode open count in the open vnodes hash table.
908 * If base dir is a graft, file_id should be the host inode number.
909 * Also update entry's flags
910 */
911 static int
increment_vnode_open_count(vnode_t vp,registered_fs_tag_t * base_dir,uint64_t file_id,uint32_t flags)912 increment_vnode_open_count(vnode_t vp, registered_fs_tag_t *base_dir, uint64_t file_id, uint32_t flags)
913 {
914 struct open_vnode *entry;
915 open_vnodes_list_head_t *list;
916 int error = 0;
917
918 lck_mtx_lock(&open_vnodes_mtx);
919
920 list = OPEN_VNODES_HASH(base_dir->dev, file_id);
921
922 LIST_FOREACH(entry, list, chain) {
923 if ((entry->dev == base_dir->dev) && (entry->file_id == file_id)) {
924 break;
925 }
926 }
927
928 if (!entry) {
929 entry = kalloc_type(struct open_vnode, Z_WAITOK | Z_ZERO);
930 if (!entry) {
931 error = ENOMEM;
932 goto out;
933 }
934 entry->vp = vp;
935 entry->dev = base_dir->dev;
936 entry->file_id = file_id;
937 LIST_INSERT_HEAD(list, entry, chain);
938 num_open_vnodes++;
939 }
940
941 entry->open_count++;
942 #if (DEVELOPMENT || DEBUG)
943 entry->flags |= flags;
944 #else
945 #pragma unused(flags)
946 #endif
947
948 out:
949 lck_mtx_unlock(&open_vnodes_mtx);
950 return error;
951 }
952
953 /*
954 * Decrement a vnode open count in the open vnodes hash table and
955 * return it with an iocount taken on it.
956 * If base dir is a graft, file_id should be the graft inode number.
957 */
958 static int
decrement_vnode_open_count(registered_fs_tag_t * base_dir,uint64_t file_id,vnode_t * vpp)959 decrement_vnode_open_count(registered_fs_tag_t *base_dir, uint64_t file_id, vnode_t *vpp)
960 {
961 struct open_vnode *entry;
962 vnode_t vp;
963 uint64_t vp_file_id;
964 int error = 0;
965
966 if (is_graft(base_dir)) {
967 error = graft_to_host_inum(&base_dir->graft_info, file_id, &vp_file_id);
968 if (error) {
969 return error;
970 }
971 } else {
972 vp_file_id = file_id;
973 }
974
975 lck_mtx_lock(&open_vnodes_mtx);
976
977 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
978 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
979 break;
980 }
981 }
982
983 if (!entry) {
984 error = ENOENT;
985 goto out;
986 }
987
988 vp = entry->vp;
989 entry->open_count--;
990
991 if (entry->open_count == 0) {
992 LIST_REMOVE(entry, chain);
993 kfree_type(struct open_vnode, entry);
994 num_open_vnodes--;
995 }
996
997 error = vnode_getwithref(vp);
998 if (!error) {
999 *vpp = vp;
1000 }
1001
1002 out:
1003 lck_mtx_unlock(&open_vnodes_mtx);
1004 return error;
1005 }
1006
1007 static int
exclave_fs_open_internal(uint32_t fs_tag,uint64_t root_id,const char * path,int flags,uint32_t ov_flags,uint64_t * file_id)1008 exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id, const char *path,
1009 int flags, uint32_t ov_flags, uint64_t *file_id)
1010 {
1011 vnode_t dvp = NULLVP, vp = NULLVP;
1012 registered_fs_tag_t base_dir;
1013 vfs_context_t ctx;
1014 struct nameidata *ndp = NULL;
1015 struct vnode_attr *vap = NULL;
1016 uint64_t vp_file_id;
1017 int error;
1018 uint32_t ndflags = NOCROSSMOUNT;
1019 uint32_t root_ov_flags = 0;
1020
1021 if (flags & ~(O_CREAT | O_DIRECTORY)) {
1022 return EINVAL;
1023 }
1024
1025 if (is_fs_writeable(fs_tag)) {
1026 ndflags |= NOFOLLOW;
1027 } else {
1028 ndflags |= FOLLOW;
1029 }
1030
1031 if ((flags & O_CREAT) && !is_fs_writeable(fs_tag)) {
1032 return EROFS;
1033 }
1034
1035 if (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID) {
1036 error = get_base_dir(fs_tag, &base_dir, &dvp);
1037 } else {
1038 error = get_base_dir(fs_tag, &base_dir, NULL);
1039 if (!error) {
1040 error = get_open_vnode(&base_dir, root_id, &dvp, &root_ov_flags);
1041 }
1042 }
1043
1044 #if (DEVELOPMENT || DEBUG)
1045 // inherit the ENOSPC flag from the root
1046 ov_flags |= (root_ov_flags & OV_FORCE_ENOSPC);
1047 #endif
1048
1049 if (error) {
1050 return error;
1051 }
1052
1053 // if we need to create the file, then delete it first (so that we won't reuse the same inode number)
1054 if ((flags & O_CREAT) && !(flags & O_DIRECTORY)) {
1055 error = unlink1(vfs_context_kernel(), dvp, CAST_USER_ADDR_T(path), UIO_SYSSPACE, 0);
1056 if (error) {
1057 if (error == ENOENT) {
1058 error = 0;
1059 } else {
1060 goto out;
1061 }
1062 }
1063
1064 // Add an O_EXCL flag so that create will fail if the file is already there after delete (a possible attack)
1065 flags |= O_EXCL;
1066 }
1067
1068 ndp = kalloc_type(struct nameidata, Z_WAITOK);
1069 if (!ndp) {
1070 error = ENOMEM;
1071 goto out;
1072 }
1073
1074 ctx = vfs_context_kernel();
1075
1076 NDINIT(ndp, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE,
1077 CAST_USER_ADDR_T(path), ctx);
1078
1079 ndp->ni_rootdir = dvp;
1080 ndp->ni_flag = NAMEI_ROOTDIR;
1081 ndp->ni_dvp = dvp;
1082 ndp->ni_cnd.cn_flags |= USEDVP;
1083
1084 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1085 if (!vap) {
1086 error = ENOMEM;
1087 goto out;
1088 }
1089
1090 VATTR_INIT(vap);
1091 VATTR_SET(vap, va_mode, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
1092
1093 flags |= FREAD;
1094
1095 if (is_fs_writeable(fs_tag) && (root_id != EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1096 flags |= FWRITE;
1097 }
1098
1099 error = vn_open_auth(ndp, &flags, vap, NULLVP);
1100 if (error) {
1101 goto out;
1102 }
1103
1104 vp = ndp->ni_vp;
1105
1106 error = get_vnode_info(vp, NULL, NULL, &vp_file_id);
1107 if (error) {
1108 goto out;
1109 }
1110
1111 if (is_graft(&base_dir)) {
1112 error = host_to_graft_inum(&base_dir.graft_info, vp_file_id, file_id);
1113 if (error) {
1114 goto out;
1115 }
1116 } else {
1117 *file_id = vp_file_id;
1118 }
1119
1120 error = increment_vnode_open_count(vp, &base_dir, vp_file_id, ov_flags);
1121
1122 out:
1123 if (dvp) {
1124 vnode_put(dvp);
1125 }
1126 if (vp) {
1127 vnode_put(vp);
1128 }
1129 if (ndp) {
1130 kfree_type(struct nameidata, ndp);
1131 }
1132 if (vap) {
1133 kfree_type(struct vnode_attr, vap);
1134 }
1135
1136 return error;
1137 }
1138
1139 int
vfs_exclave_fs_open(uint32_t fs_tag,uint64_t root_id,const char * name,uint64_t * file_id)1140 vfs_exclave_fs_open(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
1141 {
1142 if (!exclave_fs_started()) {
1143 return ENXIO;
1144 }
1145
1146 if (is_fs_writeable(fs_tag) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1147 return EINVAL;
1148 }
1149
1150 return exclave_fs_open_internal(fs_tag, root_id, name, 0, 0, file_id);
1151 }
1152
1153 int
vfs_exclave_fs_create(uint32_t fs_tag,uint64_t root_id,const char * name,uint64_t * file_id)1154 vfs_exclave_fs_create(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
1155 {
1156 if (!exclave_fs_started()) {
1157 return ENXIO;
1158 }
1159
1160 if (is_fs_writeable(fs_tag) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1161 return EINVAL;
1162 }
1163
1164 return exclave_fs_open_internal(fs_tag, root_id, name, O_CREAT, 0, file_id);
1165 }
1166
1167 int
vfs_exclave_fs_close(uint32_t fs_tag,uint64_t file_id)1168 vfs_exclave_fs_close(uint32_t fs_tag, uint64_t file_id)
1169 {
1170 vnode_t vp = NULLVP;
1171 registered_fs_tag_t base_dir;
1172 int flags = FREAD;
1173 int error;
1174
1175 if (!exclave_fs_started()) {
1176 return ENXIO;
1177 }
1178
1179 error = get_base_dir(fs_tag, &base_dir, NULL);
1180 if (error) {
1181 return error;
1182 }
1183
1184 error = decrement_vnode_open_count(&base_dir, file_id, &vp);
1185 if (error) {
1186 goto out;
1187 }
1188
1189 if (is_fs_writeable(fs_tag) && !vnode_isdir(vp)) {
1190 flags |= FWRITE;
1191 }
1192
1193 error = vn_close(vp, flags, vfs_context_kernel());
1194
1195 out:
1196 if (vp) {
1197 vnode_put(vp);
1198 }
1199
1200 return error;
1201 }
1202
1203 static int
exclave_fs_io(uint32_t fs_tag,uint64_t file_id,uint64_t offset,uint64_t length,uint8_t * data,bool read)1204 exclave_fs_io(uint32_t fs_tag, uint64_t file_id, uint64_t offset, uint64_t length, uint8_t *data, bool read)
1205 {
1206 vnode_t vp = NULLVP;
1207 registered_fs_tag_t base_dir;
1208 UIO_STACKBUF(uio_buf, 1);
1209 uio_t auio = NULL;
1210 int error = 0;
1211 uint32_t ov_flags = 0;
1212
1213 if (!read && !is_fs_writeable(fs_tag)) {
1214 return EROFS;
1215 }
1216
1217 error = get_base_dir(fs_tag, &base_dir, NULL);
1218 if (error) {
1219 return error;
1220 }
1221
1222 error = get_open_vnode(&base_dir, file_id, &vp, &ov_flags);
1223 if (error) {
1224 goto out;
1225 }
1226
1227 if (!read && (ov_flags & OV_FORCE_ENOSPC)) {
1228 error = ENOSPC;
1229 goto out;
1230 }
1231
1232 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, read ? UIO_READ : UIO_WRITE,
1233 &uio_buf[0], sizeof(uio_buf));
1234 if (!auio) {
1235 error = ENOMEM;
1236 goto out;
1237 }
1238
1239 error = uio_addiov(auio, (uintptr_t)data, length);
1240 if (error) {
1241 goto out;
1242 }
1243
1244 if (read) {
1245 error = VNOP_READ(vp, auio, 0, vfs_context_kernel());
1246 } else {
1247 error = VNOP_WRITE(vp, auio, 0, vfs_context_kernel());
1248 }
1249
1250 if (!error && uio_resid(auio)) {
1251 error = EIO;
1252 }
1253
1254 out:
1255 if (vp) {
1256 vnode_put(vp);
1257 }
1258
1259 return error;
1260 }
1261
1262 int
vfs_exclave_fs_read(uint32_t fs_tag,uint64_t file_id,uint64_t file_offset,uint64_t length,void * data)1263 vfs_exclave_fs_read(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1264 {
1265 if (!exclave_fs_started()) {
1266 return ENXIO;
1267 }
1268
1269 return exclave_fs_io(fs_tag, file_id, file_offset, length, data, true);
1270 }
1271
1272 int
vfs_exclave_fs_write(uint32_t fs_tag,uint64_t file_id,uint64_t file_offset,uint64_t length,void * data)1273 vfs_exclave_fs_write(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1274 {
1275 if (!exclave_fs_started()) {
1276 return ENXIO;
1277 }
1278
1279 return exclave_fs_io(fs_tag, file_id, file_offset, length, (void *)data, false);
1280 }
1281
1282 int
vfs_exclave_fs_remove(uint32_t fs_tag,uint64_t root_id,const char * name)1283 vfs_exclave_fs_remove(uint32_t fs_tag, uint64_t root_id, const char *name)
1284 {
1285 vnode_t rvp = NULLVP;
1286 registered_fs_tag_t base_dir;
1287 int error;
1288
1289 if (!exclave_fs_started()) {
1290 return ENXIO;
1291 }
1292
1293 if (!is_fs_writeable(fs_tag)) {
1294 return EROFS;
1295 }
1296
1297 error = get_base_dir(fs_tag, &base_dir, NULL);
1298 if (error) {
1299 return error;
1300 }
1301
1302 error = get_open_vnode(&base_dir, root_id, &rvp, NULL);
1303 if (error) {
1304 return error;
1305 }
1306
1307 error = unlink1(vfs_context_kernel(), rvp, CAST_USER_ADDR_T(name), UIO_SYSSPACE, 0);
1308
1309 if (rvp) {
1310 vnode_put(rvp);
1311 }
1312
1313 return error;
1314 }
1315
1316 int
vfs_exclave_fs_sync(uint32_t fs_tag,uint64_t file_id,uint64_t sync_op)1317 vfs_exclave_fs_sync(uint32_t fs_tag, uint64_t file_id, uint64_t sync_op)
1318 {
1319 vnode_t vp = NULLVP;
1320 registered_fs_tag_t base_dir;
1321 u_long command;
1322 int error;
1323
1324 if (!exclave_fs_started()) {
1325 return ENXIO;
1326 }
1327
1328 if (!is_fs_writeable(fs_tag)) {
1329 return EROFS;
1330 }
1331
1332 if (sync_op == EXCLAVE_FS_SYNC_OP_BARRIER) {
1333 command = F_BARRIERFSYNC;
1334 } else if (sync_op == EXCLAVE_FS_SYNC_OP_FULL) {
1335 command = F_FULLFSYNC;
1336 } else if (sync_op != EXCLAVE_FS_SYNC_OP_UBC) {
1337 return EINVAL;
1338 }
1339
1340 error = get_base_dir(fs_tag, &base_dir, NULL);
1341 if (error) {
1342 return error;
1343 }
1344
1345 error = get_open_vnode(&base_dir, file_id, &vp, NULL);
1346 if (error) {
1347 goto out;
1348 }
1349
1350 if (sync_op == EXCLAVE_FS_SYNC_OP_UBC) {
1351 error = VNOP_FSYNC(vp, MNT_WAIT, vfs_context_kernel());
1352 } else {
1353 error = VNOP_IOCTL(vp, command, (caddr_t)NULL, 0, vfs_context_kernel());
1354 }
1355
1356 out:
1357 if (vp) {
1358 vnode_put(vp);
1359 }
1360
1361 return error;
1362 }
1363
1364 static int
map_graft_dirents(fsioc_graft_info_t * graft_info,void * dirent_buf,int32_t count)1365 map_graft_dirents(fsioc_graft_info_t *graft_info, void *dirent_buf, int32_t count)
1366 {
1367 int i, error = 0;
1368
1369 for (i = 0; i < count; i++) {
1370 exclave_fs_dirent_t *dirent = (exclave_fs_dirent_t *)dirent_buf;
1371 uint64_t mapped_file_id;
1372
1373 error = host_to_graft_inum(graft_info, dirent->file_id, &mapped_file_id);
1374 if (error) {
1375 return error;
1376 }
1377 dirent->file_id = mapped_file_id;
1378 dirent_buf = (char *)dirent_buf + dirent->length;
1379 }
1380
1381 return 0;
1382 }
1383
1384 int
vfs_exclave_fs_readdir(uint32_t fs_tag,uint64_t file_id,void * dirent_buf,uint32_t buf_size,int32_t * count)1385 vfs_exclave_fs_readdir(uint32_t fs_tag, uint64_t file_id, void *dirent_buf,
1386 uint32_t buf_size, int32_t *count)
1387 {
1388 vnode_t dvp = NULLVP;
1389 registered_fs_tag_t base_dir;
1390 UIO_STACKBUF(uio_buf, 1);
1391 uio_t auio = NULL;
1392 vfs_context_t ctx;
1393 uthread_t ut;
1394 struct attrlist al;
1395 struct vnode_attr *vap = NULL;
1396 char *va_name = NULL;
1397 int32_t eofflag;
1398 int error;
1399
1400 if (!exclave_fs_started()) {
1401 return ENXIO;
1402 }
1403
1404 error = get_base_dir(fs_tag, &base_dir, NULL);
1405 if (error) {
1406 return error;
1407 }
1408
1409 /*
1410 * For ExclaveOS readdir through VFS is not permitted in RELEASE xnu
1411 * variants. Directory enumeration should be based on the data in the
1412 * integrity catalogue. Error out here if a request is routed here
1413 * in this circumstance.
1414 */
1415 if (fs_tag == EFT_SYSTEM) {
1416 #if (DEVELOPMENT || DEBUG)
1417 /*
1418 * For non-RELEASE xnu variants, we allow readdir to
1419 * be routed through VFS if the relevant integrity checks
1420 * are disabled, or if the underlying volume is not sealed.
1421 */
1422 if (!integrity_checks_disabled && is_sealed(&base_dir)) {
1423 return ENOTSUP;
1424 }
1425 #else
1426 // This is the RELEASE xnu case above
1427 return ENOTSUP;
1428 #endif
1429 }
1430
1431 error = get_open_vnode(&base_dir, file_id, &dvp, NULL);
1432 if (error) {
1433 goto out;
1434 }
1435
1436 if (!vnode_isdir(dvp)) {
1437 error = ENOTDIR;
1438 goto out;
1439 }
1440
1441 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
1442 &uio_buf[0], sizeof(uio_buf));
1443 if (!auio) {
1444 error = ENOMEM;
1445 goto out;
1446 }
1447
1448 error = uio_addiov(auio, (uintptr_t)dirent_buf, buf_size);
1449 if (error) {
1450 goto out;
1451 }
1452
1453 al.bitmapcount = ATTR_BIT_MAP_COUNT;
1454 al.commonattr = ATTR_CMN_RETURNED_ATTRS | ATTR_CMN_NAME | ATTR_CMN_OBJTYPE | ATTR_CMN_FILEID;
1455 al.fileattr = ATTR_FILE_DATALENGTH;
1456
1457 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1458 if (!vap) {
1459 error = ENOMEM;
1460 goto out;
1461 }
1462
1463 VATTR_INIT(vap);
1464 va_name = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_ZERO);
1465 if (!va_name) {
1466 error = ENOMEM;
1467 goto out;
1468 }
1469 vap->va_name = va_name;
1470
1471 VATTR_SET_ACTIVE(vap, va_name);
1472 VATTR_SET_ACTIVE(vap, va_objtype);
1473 VATTR_SET_ACTIVE(vap, va_fileid);
1474 VATTR_SET_ACTIVE(vap, va_total_size);
1475 VATTR_SET_ACTIVE(vap, va_data_size);
1476
1477 ctx = vfs_context_kernel();
1478 ut = current_uthread();
1479
1480 ut->uu_flag |= UT_KERN_RAGE_VNODES;
1481 error = VNOP_GETATTRLISTBULK(dvp, &al, vap, auio, NULL,
1482 0, &eofflag, count, ctx);
1483 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1484
1485 if (!error && !eofflag) {
1486 return ENOBUFS;
1487 }
1488
1489 if (is_graft(&base_dir)) {
1490 error = map_graft_dirents(&base_dir.graft_info, dirent_buf, *count);
1491 if (error) {
1492 goto out;
1493 }
1494 }
1495
1496 out:
1497 if (va_name) {
1498 zfree(ZV_NAMEI, va_name);
1499 }
1500 if (vap) {
1501 kfree_type(struct vnode_attr, vap);
1502 }
1503 if (dvp) {
1504 vnode_put(dvp);
1505 }
1506
1507 return error;
1508 }
1509
1510 int
vfs_exclave_fs_getsize(uint32_t fs_tag,uint64_t file_id,uint64_t * size)1511 vfs_exclave_fs_getsize(uint32_t fs_tag, uint64_t file_id, uint64_t *size)
1512 {
1513 vnode_t vp = NULLVP;
1514 registered_fs_tag_t base_dir;
1515 vfs_context_t ctx;
1516 struct vnode_attr *vap = NULL;
1517 int error;
1518
1519 if (!exclave_fs_started()) {
1520 return ENXIO;
1521 }
1522
1523 error = get_base_dir(fs_tag, &base_dir, NULL);
1524 if (error) {
1525 return error;
1526 }
1527
1528 error = get_open_vnode(&base_dir, file_id, &vp, NULL);
1529 if (error) {
1530 goto out;
1531 }
1532
1533 if (vnode_isdir(vp)) {
1534 error = EISDIR;
1535 goto out;
1536 }
1537
1538 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1539 if (!vap) {
1540 error = ENOMEM;
1541 goto out;
1542 }
1543
1544 VATTR_INIT(vap);
1545 VATTR_WANTED(vap, va_data_size);
1546
1547 ctx = vfs_context_kernel();
1548
1549 error = VNOP_GETATTR(vp, vap, ctx);
1550 if (error) {
1551 goto out;
1552 }
1553
1554 if (!VATTR_IS_SUPPORTED(vap, va_data_size)) {
1555 error = ENOTSUP;
1556 goto out;
1557 }
1558
1559 *size = vap->va_data_size;
1560
1561 out:
1562 if (vap) {
1563 kfree_type(struct vnode_attr, vap);
1564 }
1565 if (vp) {
1566 vnode_put(vp);
1567 }
1568
1569 return error;
1570 }
1571
1572 int
vfs_exclave_fs_sealstate(uint32_t fs_tag,bool * sealed)1573 vfs_exclave_fs_sealstate(uint32_t fs_tag, bool *sealed)
1574 {
1575 registered_fs_tag_t base_dir;
1576 int error;
1577
1578 if (!exclave_fs_started()) {
1579 return ENXIO;
1580 }
1581
1582 error = get_base_dir(fs_tag, &base_dir, NULL);
1583 if (error) {
1584 return error;
1585 }
1586
1587 *sealed = is_sealed(&base_dir);
1588
1589 return 0;
1590 }
1591
1592 #if DEVELOPMENT || DEBUG
1593
1594 #define ENOSPC_EXCLAVES_LEN 256
1595 static char enospc_exclaves[ENOSPC_EXCLAVES_LEN];
1596
1597 static bool
vfs_exclave_is_enospc_exclave(const char * exclave_id)1598 vfs_exclave_is_enospc_exclave(const char *exclave_id)
1599 {
1600 char *element;
1601 char *scratch_base;
1602 char *scratch;
1603 size_t buf_len = strlen(enospc_exclaves) + 1;
1604 bool is_enospc_exclave = false;
1605
1606 /* allocate a scratch buffer the size of the string */
1607 scratch_base = kalloc_data(buf_len, Z_WAITOK);
1608 if (scratch_base == NULL) {
1609 goto out;
1610 }
1611
1612 /* copy the elementlist to the scratch buffer */
1613 strlcpy(scratch_base, enospc_exclaves, buf_len);
1614
1615 /*
1616 * set up a temporary pointer that can be used to iterate the
1617 * scratch buffer without losing the allocation address
1618 */
1619 scratch = scratch_base;
1620
1621 /* iterate the scratch buffer; NOTE: buffer contents modified! */
1622 while ((element = strsep(&scratch, ",")) != NULL) {
1623 if (strcmp(element, exclave_id) == 0) {
1624 printf("%s is enospc exclave\n", exclave_id);
1625 is_enospc_exclave = true;
1626 goto out;
1627 }
1628 }
1629
1630 out:
1631 if (scratch_base != NULL) {
1632 kfree_data(scratch_base, buf_len);
1633 }
1634
1635 return is_enospc_exclave;
1636 }
1637
1638 SYSCTL_STRING(_kern, OID_AUTO, enospc_exclaves, CTLFLAG_RW | CTLFLAG_LOCKED, enospc_exclaves, sizeof(enospc_exclaves), "List of comma-separated exclave_ids for writing immediately returns ENOSPC");
1639
1640 #endif /* DEVELOPMENT || DEBUG */
1641