1 /*
2 * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/systm.h>
34 #include <sys/mount_internal.h>
35 #include <sys/fsctl.h>
36 #include <sys/filedesc.h>
37 #include <sys/vnode_internal.h>
38 #include <sys/imageboot.h>
39 #include <kern/assert.h>
40
41 #include <sys/namei.h>
42 #include <sys/fcntl.h>
43 #include <sys/vnode.h>
44 #include <sys/xattr.h>
45 #include <sys/sysproto.h>
46 #include <sys/csr.h>
47 #include <miscfs/devfs/devfsdefs.h>
48 #include <libkern/crypto/sha2.h>
49 #include <libkern/crypto/rsa.h>
50 #include <libkern/OSKextLibPrivate.h>
51 #include <sys/ubc_internal.h>
52
53 #if CONFIG_IMAGEBOOT_IMG4
54 #include <libkern/img4/interface.h>
55 #include <img4/firmware.h>
56 #endif
57
58 #include <kern/kalloc.h>
59 #include <os/overflow.h>
60 #include <vm/vm_kern_xnu.h>
61
62 #include <pexpert/pexpert.h>
63 #include <kern/chunklist.h>
64
65 extern int (*mountroot)(void);
66 extern char rootdevice[DEVMAXNAMESIZE];
67
68 #define DEBUG_IMAGEBOOT 0
69
70 #if DEBUG_IMAGEBOOT
71 #define DBG_TRACE(...) printf("imageboot: " __VA_ARGS__)
72 #else
73 #define DBG_TRACE(...) do {} while(0)
74 #endif
75
76 #define AUTHDBG(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
77 #define AUTHPRNT(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
78
79 extern int di_root_image_ext(const char *path, char *devname, size_t devsz, dev_t *dev_p, bool removable);
80 extern int di_root_image(const char *path, char *devname, size_t devsz, dev_t *dev_p);
81 extern int di_root_ramfile_buf(void *buf, size_t bufsz, char *devname, size_t devsz, dev_t *dev_p);
82
83 static boolean_t imageboot_setup_new(imageboot_type_t type);
84
85 void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size);
86
87 extern lck_rw_t rootvnode_rw_lock;
88
89 #define kIBFilePrefix "file://"
90
91 __private_extern__ int
imageboot_format_is_valid(const char * root_path)92 imageboot_format_is_valid(const char *root_path)
93 {
94 return strncmp(root_path, kIBFilePrefix,
95 strlen(kIBFilePrefix)) == 0;
96 }
97
98 static void
vnode_get_and_drop_always(vnode_t vp)99 vnode_get_and_drop_always(vnode_t vp)
100 {
101 vnode_getalways(vp);
102 vnode_rele(vp);
103 vnode_put(vp);
104 }
105
106 __private_extern__ bool
imageboot_desired(void)107 imageboot_desired(void)
108 {
109 bool do_imageboot = false;
110
111 char *root_path = NULL;
112 root_path = zalloc(ZV_NAMEI);
113 /*
114 * Check for first layer DMG rooting.
115 *
116 * Note that here we are principally concerned with whether or not we
117 * SHOULD try to imageboot, not whether or not we are going to be able to.
118 *
119 * If NONE of the boot-args are present, then assume that image-rooting
120 * is not requested.
121 *
122 * [!! Note parens guard the entire logically OR'd set of statements, below. It validates
123 * that NONE of the below-mentioned boot-args is present...!!]
124 */
125 if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) ||
126 #if CONFIG_IMAGEBOOT_IMG4
127 PE_parse_boot_argn("arp0", root_path, MAXPATHLEN) ||
128 #endif
129 PE_parse_boot_argn("rp", root_path, MAXPATHLEN) ||
130 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) ||
131 PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN))) {
132 /* explicitly set to false */
133 do_imageboot = false;
134 } else {
135 /* now sanity check the file-path format */
136 if (imageboot_format_is_valid(root_path)) {
137 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
138 /* root_path looks good and we have one of the aforementioned bootargs */
139 do_imageboot = true;
140 } else {
141 /* explicitly set to false */
142 do_imageboot = false;
143 }
144 }
145
146 zfree(ZV_NAMEI, root_path);
147 return do_imageboot;
148 }
149
150 __private_extern__ imageboot_type_t
imageboot_needed(void)151 imageboot_needed(void)
152 {
153 imageboot_type_t result = IMAGEBOOT_NONE;
154 char *root_path = NULL;
155
156 DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__);
157
158 if (!imageboot_desired()) {
159 goto out;
160 }
161
162 root_path = zalloc(ZV_NAMEI);
163 result = IMAGEBOOT_DMG;
164
165 /* Check for second layer */
166 if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) ||
167 PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) {
168 goto out;
169 }
170
171 /* Sanity-check second layer */
172 if (imageboot_format_is_valid(root_path)) {
173 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
174 } else {
175 panic("%s: Invalid URL scheme for %s",
176 __FUNCTION__, root_path);
177 }
178
179 out:
180 if (root_path != NULL) {
181 zfree(ZV_NAMEI, root_path);
182 }
183 return result;
184 }
185
186 extern bool IOBaseSystemARVRootHashAvailable(void);
187
188
189 /*
190 * Mounts new filesystem based on image path, and pivots it to the root.
191 * The image to be mounted is located at image_path.
192 * It will be mounted at mount_path.
193 * The vfs_switch_root operation will be performed.
194 * After the pivot, the outgoing root filesystem (the filesystem at root when
195 * this function begins) will be at outgoing_root_path. If `skip_signature_check` is true,
196 * then ignore the chunklisted or authAPFS checks on this image
197 */
198 __private_extern__ int
imageboot_pivot_image(const char * image_path,imageboot_type_t type,const char * mount_path,const char * outgoing_root_path,const bool rooted_dmg,const bool skip_signature_check)199 imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
200 const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check)
201 {
202 int error = 0;
203 boolean_t authenticated_dmg_chunklist = false;
204 vnode_t mount_vp = NULLVP;
205 errno_t rootauth;
206
207
208 if (type != IMAGEBOOT_DMG) {
209 panic("not supported");
210 }
211
212 /*
213 * Check that the image file actually exists.
214 * We also need to find the mount it's on, to mark it as backing the
215 * root.
216 */
217 vnode_t imagevp = NULLVP;
218 error = vnode_lookup(image_path, 0, &imagevp, vfs_context_kernel());
219 if (error) {
220 printf("%s: image file not found or couldn't be read: %d\n", __FUNCTION__, error);
221 /*
222 * bail out here to short-circuit out of panic logic below.
223 * Failure to find the pivot-image should not be a fatal condition (ENOENT)
224 * since it may result in natural consequences (ergo, cannot unlock filevault prompt).
225 */
226 return error;
227 }
228
229 /*
230 * load the disk image and obtain its device.
231 * di_root_image's name and the names of its arguments suggest it has
232 * to be mounted at the root, but that's not actually needed.
233 * We just need to obtain the device info.
234 */
235
236 dev_t dev;
237 char devname[DEVMAXNAMESIZE];
238 const char *error_func = NULL;
239 unsigned ramdisk_arg = 0;
240 (void) PE_parse_boot_argn("-bsdmgroot-ramdisk", &ramdisk_arg, sizeof(ramdisk_arg));
241
242 if (ramdisk_arg) {
243 size_t bufsz = 0;
244 void *buf = NULL;
245 error_func = "imageboot_read_file";
246 error = imageboot_read_file_pageable(image_path, &buf, &bufsz);
247 if (error == 0) {
248 error_func = "di_root_ramfile_buf";
249 error = di_root_ramfile_buf(buf, bufsz, devname, sizeof(devname), &dev);
250 }
251 if (error && (buf != NULL)) {
252 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
253 }
254 } else {
255 error_func = "di_root_image";
256 error = di_root_image_ext(image_path, devname, DEVMAXNAMESIZE, &dev, true);
257 }
258 if (error) {
259 panic("%s: %s failed: %d", __FUNCTION__, error_func, error);
260 }
261
262 printf("%s: attached disk image %s as %s\n", __FUNCTION__, image_path, devname);
263
264
265 #if CONFIG_IMAGEBOOT_CHUNKLIST
266 if ((rooted_dmg == false) && !IOBaseSystemARVRootHashAvailable()) {
267 error = authenticate_root_with_chunklist(image_path, NULL);
268 if (error == 0) {
269 printf("authenticated root-dmg via chunklist...\n");
270 authenticated_dmg_chunklist = true;
271 } else {
272 /* root hash was not available, and image is NOT chunklisted? */
273 printf("failed to chunklist-authenticate root-dmg @ %s\n", image_path);
274 }
275 }
276 #endif
277
278 char fulldevname[DEVMAXNAMESIZE + 5]; // "/dev/"
279 strlcpy(fulldevname, "/dev/", sizeof(fulldevname));
280 strlcat(fulldevname, devname, sizeof(fulldevname));
281
282 /*
283 * mount expects another layer of indirection (because it expects to
284 * be getting a user_addr_t of a char *.
285 * Make a pointer-to-pointer on our stack. It won't use this
286 * address after it returns so this should be safe.
287 */
288 char *fulldevnamep = &(fulldevname[0]);
289 char **fulldevnamepp = &fulldevnamep;
290
291 #define PIVOTMNT "/System/Volumes/BaseSystem"
292
293
294 /* Attempt to mount as HFS; if it fails, then try as APFS */
295 printf("%s: attempting to mount as hfs...\n", __FUNCTION__);
296 error = kernel_mount("hfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
297 if (error) {
298 printf("mount failed: %d\n", error);
299 printf("%s: attempting to mount as apfs...\n", __FUNCTION__);
300 error = kernel_mount("apfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
301 }
302
303 /* If we didn't mount as either HFS or APFS, then bail out */
304 if (error) {
305 /*
306 * Note that for this particular failure case (failure to mount), the disk image
307 * being attached may have failed to quiesce within the alloted time out (20-30 sec).
308 * For example, it may be still probing, or APFS container enumeration may have not
309 * completed. If so, then we may have fallen into this particular error case. However,
310 * failure to complete matching should be an exceptional case as 30 sec. is quite a
311 * long time to wait for matching to complete (which would have occurred in
312 * di_root_image_ext).
313 */
314 #if defined(__arm64__) && XNU_TARGET_OS_OSX
315 panic("%s: failed to mount pivot image(%d)!", __FUNCTION__, error);
316 #endif
317 printf("%s: failed to mount pivot image(%d) !", __FUNCTION__, error);
318 goto done;
319 }
320
321 /* otherwise, if the mount succeeded, then assert that the DMG is authenticated (either chunklist or authapfs) */
322 error = vnode_lookup(PIVOTMNT, 0, &mount_vp, vfs_context_kernel());
323 if (error) {
324 #if defined(__arm64__) && XNU_TARGET_OS_OSX
325 panic("%s: failed to lookup pivot root (%d) !", __FUNCTION__, error);
326 #endif
327 printf("%s: failed to lookup pivot root (%d)!", __FUNCTION__, error);
328 goto done;
329 }
330
331 /* the 0x1 implies base system */
332 rootauth = VNOP_IOCTL(mount_vp, FSIOC_KERNEL_ROOTAUTH, (caddr_t)0x1, 0, vfs_context_kernel());
333 if (rootauth) {
334 printf("BS-DMG failed to authenticate intra-FS \n");
335 /*
336 * If we are using a custom rooted DMG, or if we have already authenticated
337 * the DMG via chunklist, then it is permissible to use.
338 * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant.
339 */
340 if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) {
341 rootauth = 0;
342 }
343 error = rootauth;
344 }
345 vnode_put(mount_vp);
346 mount_vp = NULLVP;
347
348 if (error) {
349 /*
350 * Failure here exclusively means that the mount failed to authenticate.
351 * This means that the disk image either was not sealed (authapfs), or it was
352 * not hosted on a chunklisted DMG. Both scenarios may be fatal depending
353 * on the platform.
354 */
355 #if defined(__arm64__) && XNU_TARGET_OS_OSX
356 panic("%s: could not authenticate the pivot image: %d. giving up.", __FUNCTION__, error);
357 #endif
358 printf("%s: could not authenticate the pivot image: %d. giving up.\n", __FUNCTION__, error);
359 goto done;
360 }
361
362 if (rootvnode) {
363 mount_t root_mp = vnode_mount(rootvnode);
364 if (root_mp && (root_mp->mnt_kern_flag & MNTK_SSD)) {
365 rootvp_is_ssd = true;
366 }
367 }
368 /*
369 * pivot the incoming and outgoing filesystems
370 */
371 error = vfs_switch_root(mount_path, outgoing_root_path, 0);
372 if (error) {
373 panic("%s: vfs_switch_root failed: %d", __FUNCTION__, error);
374 }
375
376 /*
377 * Mark the filesystem containing the image as backing root, so it
378 * won't be unmountable.
379 *
380 * vfs_switch_root() clears this flag, so we have to set it after
381 * the pivot call.
382 * If the system later pivots out of the image, vfs_switch_root
383 * will clear it again, so the backing filesystem can be unmounted.
384 */
385 if (!ramdisk_arg) {
386 mount_t imagemp = imagevp->v_mount;
387 lck_rw_lock_exclusive(&imagemp->mnt_rwlock);
388 imagemp->mnt_kern_flag |= MNTK_BACKS_ROOT;
389 lck_rw_done(&imagemp->mnt_rwlock);
390 }
391
392 error = 0;
393
394 /*
395 * Note that we do NOT change kern.bootuuid here -
396 * imageboot_mount_image() does, but imageboot_pivot_image() doesn't.
397 * imageboot_mount_image() is used when the root volume uuid was
398 * "always supposed to be" the one inside the dmg. imageboot_pivot_
399 * image() is used when the true root volume just needs to be
400 * obscured for a moment by the dmg.
401 */
402
403 done:
404 if (imagevp != NULLVP) {
405 vnode_put(imagevp);
406 }
407 return error;
408 }
409
410 /* kern_sysctl.c */
411 extern uuid_string_t fake_bootuuid;
412
413 static void
set_fake_bootuuid(mount_t mp)414 set_fake_bootuuid(mount_t mp)
415 {
416 struct vfs_attr va;
417 VFSATTR_INIT(&va);
418 VFSATTR_WANTED(&va, f_uuid);
419
420 if (vfs_getattr(mp, &va, vfs_context_current()) != 0) {
421 return;
422 }
423
424 if (!VFSATTR_IS_SUPPORTED(&va, f_uuid)) {
425 return;
426 }
427
428 uuid_unparse(va.f_uuid, fake_bootuuid);
429 }
430
431 /*
432 * Swaps in new root filesystem based on image path.
433 * Current root filesystem is removed from mount list and
434 * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and
435 * "rootvnode" is reset. Root vnode of currentroot filesystem
436 * is returned with usecount (no iocount).
437 * kern.bootuuid is arranged to return the UUID of the mounted image. (If
438 * we did nothing here, it would be the UUID of the image source volume.)
439 */
440 __private_extern__ int
imageboot_mount_image(const char * root_path,int height,imageboot_type_t type)441 imageboot_mount_image(const char *root_path, int height, imageboot_type_t type)
442 {
443 dev_t dev;
444 int error;
445 /*
446 * Need to stash this here since we may do a kernel_mount() on /, which will
447 * automatically update the rootvnode global. Note that vfs_mountroot() does
448 * not update that global, which is a bit weird.
449 */
450 vnode_t old_rootvnode = rootvnode;
451 vnode_t newdp;
452 mount_t new_rootfs;
453 boolean_t update_rootvnode = FALSE;
454
455 if (type == IMAGEBOOT_DMG) {
456 error = di_root_image(root_path, rootdevice, DEVMAXNAMESIZE, &dev);
457 if (error) {
458 panic("%s: di_root_image failed: %d", __FUNCTION__, error);
459 }
460
461 rootdev = dev;
462 mountroot = NULL;
463 printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
464 error = vfs_mountroot();
465 if (error != 0) {
466 panic("vfs_mountroot() failed.");
467 }
468
469 update_rootvnode = TRUE;
470 } else {
471 panic("invalid imageboot type: %d", type);
472 }
473
474 /*
475 * Get the vnode for '/'.
476 * Set fdp->fd_fd.fd_cdir to reference it.
477 */
478 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
479 panic("%s: cannot find root vnode", __FUNCTION__);
480 }
481 DBG_TRACE("%s: old root fsname: %s\n", __FUNCTION__, old_rootvnode->v_mount->mnt_vtable->vfc_name);
482
483 if (old_rootvnode != NULL) {
484 /* remember the old rootvnode, but remove it from mountlist */
485 mount_t old_rootfs = old_rootvnode->v_mount;
486
487 mount_list_remove(old_rootfs);
488 mount_lock(old_rootfs);
489 old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
490 old_rootfs->mnt_flag &= ~MNT_ROOTFS;
491 mount_unlock(old_rootfs);
492 }
493
494 vnode_ref(newdp);
495 vnode_put(newdp);
496
497 lck_rw_lock_exclusive(&rootvnode_rw_lock);
498 /* switch to the new rootvnode */
499 if (update_rootvnode) {
500 rootvnode = newdp;
501 set_fake_bootuuid(rootvnode->v_mount);
502 }
503
504 new_rootfs = rootvnode->v_mount;
505 mount_lock(new_rootfs);
506 new_rootfs->mnt_flag |= MNT_ROOTFS;
507 mount_unlock(new_rootfs);
508
509 kernproc->p_fd.fd_cdir = newdp;
510 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
511
512 DBG_TRACE("%s: root switched\n", __FUNCTION__);
513
514 if (old_rootvnode != NULL) {
515 #ifdef CONFIG_IMGSRC_ACCESS
516 if (height >= 0) {
517 imgsrc_rootvnodes[height] = old_rootvnode;
518 } else {
519 vnode_get_and_drop_always(old_rootvnode);
520 }
521 #else
522 #pragma unused(height)
523 vnode_get_and_drop_always(old_rootvnode);
524 #endif /* CONFIG_IMGSRC_ACCESS */
525 }
526 return 0;
527 }
528
529 /*
530 * Return a memory object for given file path.
531 * Also returns a vnode reference for the given file path.
532 */
533 void *
ubc_getobject_from_filename(const char * filename,struct vnode ** vpp,off_t * file_size)534 ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size)
535 {
536 int err = 0;
537 struct nameidata ndp = {};
538 struct vnode *vp = NULL;
539 off_t fsize = 0;
540 vfs_context_t ctx = vfs_context_kernel();
541 void *control = NULL;
542
543 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(filename), ctx);
544 if ((err = namei(&ndp)) != 0) {
545 goto errorout;
546 }
547 nameidone(&ndp);
548 vp = ndp.ni_vp;
549
550 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
551 goto errorout;
552 }
553
554 if (fsize < 0) {
555 goto errorout;
556 }
557
558 control = ubc_getobject(vp, UBC_FLAGS_NONE);
559 if (control == NULL) {
560 goto errorout;
561 }
562
563 *file_size = fsize;
564 *vpp = vp;
565 vp = NULL;
566
567 errorout:
568 if (vp) {
569 vnode_put(vp);
570 }
571 return control;
572 }
573
574 static int
imageboot_read_file_internal(const char * path,const off_t offset,const bool pageable,void ** bufp,size_t * bufszp,off_t * fsizep)575 imageboot_read_file_internal(const char *path, const off_t offset, const bool pageable, void **bufp, size_t *bufszp, off_t *fsizep)
576 {
577 int err = 0;
578 struct nameidata ndp = {};
579 struct vnode *vp = NULL;
580 struct vnode *rsrc_vp = NULL;
581 char *readbuf = NULL;
582 off_t readsize = 0;
583 off_t readoff = 0;
584 off_t fsize = 0;
585 size_t maxsize = 0;
586 char *buf = NULL;
587 bool doclose = false;
588
589 vfs_context_t ctx = vfs_context_kernel();
590 proc_t p = vfs_context_proc(ctx);
591 kauth_cred_t kerncred = vfs_context_ucred(ctx);
592
593 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF | FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
594 if ((err = namei(&ndp)) != 0) {
595 AUTHPRNT("namei failed (%s) - %d", path, err);
596 goto out;
597 }
598 nameidone(&ndp);
599 vp = ndp.ni_vp;
600
601 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
602 AUTHPRNT("failed to get vnode size of %s - %d", path, err);
603 goto out;
604 }
605 if (fsize < 0) {
606 panic("negative file size");
607 }
608 if (offset < 0) {
609 AUTHPRNT("negative file offset");
610 err = EINVAL;
611 goto out;
612 }
613
614 if (fsizep) {
615 *fsizep = fsize;
616 }
617
618 if ((err = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
619 AUTHPRNT("failed to open %s - %d", path, err);
620 goto out;
621 }
622 doclose = true;
623
624 /* cap fsize to the amount that remains after offset */
625 if (os_sub_overflow(fsize, offset, &fsize)) {
626 fsize = 0;
627 } else if (fsize < 0) {
628 fsize = 0;
629 }
630
631 /* if bufsz is non-zero, cap the read at bufsz bytes */
632 maxsize = *bufszp;
633 if (maxsize && (maxsize < (size_t)fsize)) {
634 fsize = maxsize;
635 }
636
637 /* if fsize is larger than the specified limit (presently 2.5GB) or a NVRAM-configured limit, fail */
638 maxsize = IMAGEBOOT_MAX_FILESIZE;
639 PE_parse_boot_argn("rootdmg-maxsize", &maxsize, sizeof(maxsize));
640 if (maxsize && (maxsize < (size_t)fsize)) {
641 AUTHPRNT("file is too large (%lld > %lld)", (long long) fsize, (long long) maxsize);
642 err = ENOMEM;
643 goto out;
644 }
645
646 if (pageable) {
647 vm_offset_t addr = 0;
648 if (kmem_alloc(kernel_map, &addr, (vm_size_t)fsize,
649 KMA_PAGEABLE | KMA_DATA, VM_KERN_MEMORY_FILE) == KERN_SUCCESS) {
650 buf = (char *)addr;
651 } else {
652 buf = NULL;
653 }
654 } else {
655 //limit kalloc data calls to only 2GB.
656 if (fsize > IMAGEBOOT_MAX_KALLOCSIZE) {
657 AUTHPRNT("file is too large for non-pageable (%lld)", (long long) fsize);
658 err = ENOMEM;
659 goto out;
660 }
661 buf = (char *)kalloc_data((vm_size_t)fsize, Z_WAITOK);
662 }
663 if (buf == NULL) {
664 err = ENOMEM;
665 goto out;
666 }
667
668 #if NAMEDSTREAMS
669 /* find resource fork so we can evict cached decmpfs data */
670 if (VNOP_GETNAMEDSTREAM(vp, &rsrc_vp, XATTR_RESOURCEFORK_NAME, NS_OPEN, /*flags*/ 0, ctx) == 0) {
671 vnode_ref(rsrc_vp);
672 vnode_put(rsrc_vp);
673 AUTHDBG("Found resource fork for %s", path);
674 }
675 #endif
676
677 /* read data in chunks to handle (fsize > INT_MAX) */
678 readbuf = buf;
679 readsize = fsize;
680 readoff = offset;
681 while (readsize > 0) {
682 const off_t chunksize_max = 16 * 1024 * 1024; /* 16 MiB */
683 const off_t chunksize = MIN(readsize, chunksize_max);
684
685 /* read next chunk, pass IO_NOCACHE to clarify our intent (even if ignored) */
686 if ((err = vn_rdwr(UIO_READ, vp, (caddr_t)readbuf, (int)chunksize, readoff, UIO_SYSSPACE, IO_NODELOCKED | IO_NOCACHE | IO_RAOFF, kerncred, /*resid*/ NULL, p)) != 0) {
687 AUTHPRNT("Cannot read %lld bytes at offset %lld from %s - %d", (long long)chunksize, (long long)readoff, path, err);
688 goto out;
689 }
690
691 /* evict cached pages so they don't accumulate during early boot */
692 ubc_msync(vp, readoff, readoff + chunksize, NULL, UBC_INVALIDATE | UBC_PUSHALL);
693
694 /* evict potentially-cached decmpfs data if we have a resource fork */
695 if (rsrc_vp != NULL) {
696 if (vnode_getwithref(rsrc_vp) == 0) {
697 ubc_msync(rsrc_vp, 0, ubc_getsize(rsrc_vp), NULL, UBC_INVALIDATE | UBC_PUSHALL);
698 vnode_put(rsrc_vp);
699 }
700 }
701
702 readbuf = &readbuf[chunksize];
703 readsize -= chunksize;
704 readoff += chunksize;
705 }
706
707 out:
708 if (doclose) {
709 VNOP_CLOSE(vp, FREAD, ctx);
710 }
711 if (rsrc_vp) {
712 vnode_rele(rsrc_vp);
713 rsrc_vp = NULL;
714 }
715 if (vp) {
716 vnode_put(vp);
717 vp = NULL;
718 }
719
720 if (err) {
721 if (buf == NULL) {
722 /* nothing to free */
723 } else if (pageable) {
724 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)fsize);
725 } else {
726 kfree_data(buf, (vm_size_t)fsize);
727 }
728 } else {
729 *bufp = buf;
730 *bufszp = (size_t)fsize;
731 }
732
733 return err;
734 }
735
736 int
imageboot_read_file_pageable(const char * path,void ** bufp,size_t * bufszp)737 imageboot_read_file_pageable(const char *path, void **bufp, size_t *bufszp)
738 {
739 return imageboot_read_file_internal(path, 0, true, bufp, bufszp, NULL);
740 }
741
742 int
imageboot_read_file_from_offset(const char * path,const off_t offset,void ** bufp,size_t * bufszp)743 imageboot_read_file_from_offset(const char *path, const off_t offset, void **bufp, size_t *bufszp)
744 {
745 return imageboot_read_file_internal(path, offset, false, bufp, bufszp, NULL);
746 }
747
748 int
imageboot_read_file(const char * path,void ** bufp,size_t * bufszp,off_t * fsizep)749 imageboot_read_file(const char *path, void **bufp, size_t *bufszp, off_t *fsizep)
750 {
751 return imageboot_read_file_internal(path, 0, false, bufp, bufszp, fsizep);
752 }
753
754 #if CONFIG_IMAGEBOOT_IMG4 || CONFIG_IMAGEBOOT_CHUNKLIST
755 vnode_t
imgboot_get_image_file(const char * path,off_t * fsize,int * errp)756 imgboot_get_image_file(const char *path, off_t *fsize, int *errp)
757 {
758 struct nameidata ndp = {};
759 vnode_t vp = NULL;
760 vfs_context_t ctx = vfs_context_kernel();
761 int err;
762
763 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
764 if ((err = namei(&ndp)) != 0) {
765 AUTHPRNT("Cannot find %s - error %d", path, err);
766 } else {
767 nameidone(&ndp);
768 vp = ndp.ni_vp;
769
770 if (vp->v_type != VREG) {
771 err = EINVAL;
772 AUTHPRNT("%s it not a regular file", path);
773 } else if (fsize) {
774 if ((err = vnode_size(vp, fsize, ctx)) != 0) {
775 AUTHPRNT("Cannot get file size of %s - error %d", path, err);
776 }
777 }
778 }
779
780 if (err) {
781 if (vp) {
782 vnode_put(vp);
783 }
784 *errp = err;
785 vp = NULL;
786 }
787 return vp;
788 }
789 #endif /* CONFIG_IMAGEBOOT_CHUNKLIST || CONFIG_IMAGEBOOT_CHUNKLIST */
790
791 #if CONFIG_IMAGEBOOT_IMG4
792
793 #define APTICKET_NAME "apticket.der"
794
795 static char *
imgboot_get_apticket_path(const char * rootpath,size_t * sz)796 imgboot_get_apticket_path(const char *rootpath, size_t *sz)
797 {
798 size_t plen = strlen(rootpath) + sizeof(APTICKET_NAME) + 1;
799 char *path = (char *)kalloc_data(plen, Z_WAITOK);
800
801 if (path) {
802 char *slash;
803
804 strlcpy(path, rootpath, plen);
805 slash = strrchr(path, '/');
806 if (slash == NULL) {
807 slash = path;
808 } else {
809 slash++;
810 }
811 strlcpy(slash, APTICKET_NAME, sizeof(APTICKET_NAME) + 1);
812 }
813
814 *sz = plen;
815 return path;
816 }
817
818 static int
authenticate_root_with_img4(const char * rootpath)819 authenticate_root_with_img4(const char *rootpath)
820 {
821 errno_t rv;
822 vnode_t vp = NULLVP;
823 size_t ticket_pathsz = 0;
824 char *ticket_path;
825 img4_buff_t tck = IMG4_BUFF_INIT;
826 img4_firmware_execution_context_t exec = {
827 .i4fex_version = IMG4_FIRMWARE_EXECUTION_CONTEXT_STRUCT_VERSION,
828 .i4fex_execute = NULL,
829 .i4fex_context = NULL,
830 };
831 img4_firmware_t fw = NULL;
832 img4_firmware_flags_t fw_flags = IMG4_FIRMWARE_FLAG_BARE |
833 IMG4_FIRMWARE_FLAG_SUBSEQUENT_STAGE;
834
835 DBG_TRACE("Check %s\n", rootpath);
836
837 ticket_path = imgboot_get_apticket_path(rootpath, &ticket_pathsz);
838 if (ticket_path == NULL) {
839 AUTHPRNT("Cannot construct ticket path - out of memory");
840 return ENOMEM;
841 }
842
843 rv = imageboot_read_file(ticket_path, (void **)&tck.i4b_bytes, &tck.i4b_len, NULL);
844 if (rv) {
845 AUTHPRNT("Cannot get a ticket from %s - %d\n", ticket_path, rv);
846 goto out_with_ticket_path;
847 }
848
849 DBG_TRACE("Got %lu bytes of manifest from %s\n", tck.i4b_len, ticket_path);
850
851 vp = imgboot_get_image_file(rootpath, NULL, &rv);
852 if (vp == NULL) {
853 /* Error message had been printed already */
854 rv = EIO;
855 goto out_with_ticket_bytes;
856 }
857
858 fw = img4_firmware_new_from_vnode_4xnu(IMG4_RUNTIME_DEFAULT, &exec, 'rosi',
859 vp, fw_flags);
860 if (!fw) {
861 AUTHPRNT("Could not allocate new firmware");
862 rv = ENOMEM;
863 goto out_with_ticket_bytes;
864 }
865
866 img4_firmware_attach_manifest(fw, &tck);
867 rv = img4_firmware_evaluate(fw, img4_chip_select_personalized_ap(), NULL);
868
869 out_with_ticket_bytes:
870 kfree_data(tck.i4b_bytes, tck.i4b_len);
871 out_with_ticket_path:
872 kfree_data(ticket_path, ticket_pathsz);
873
874 img4_firmware_destroy(&fw);
875
876 if (vp) {
877 vnode_put(vp);
878 }
879 return rv;
880 }
881 #endif /* CONFIG_IMAGEBOOT_IMG4 */
882
883
884 /*
885 * Attach the image at 'path' as a ramdisk and mount it as our new rootfs.
886 * All existing mounts are first umounted.
887 */
888 static int
imageboot_mount_ramdisk(const char * path)889 imageboot_mount_ramdisk(const char *path)
890 {
891 int err = 0;
892 size_t bufsz = 0;
893 void *buf = NULL;
894 dev_t dev;
895 vnode_t newdp;
896 vnode_t tvp;
897 mount_t new_rootfs;
898
899 /* Read our target image from disk */
900 err = imageboot_read_file_pageable(path, &buf, &bufsz);
901 if (err) {
902 printf("%s: failed: imageboot_read_file_pageable() = %d\n", __func__, err);
903 goto out;
904 }
905 DBG_TRACE("%s: read '%s' sz = %lu\n", __func__, path, bufsz);
906
907 #if CONFIG_IMGSRC_ACCESS
908 /* Re-add all root mounts to the mount list in the correct order... */
909 mount_list_remove(rootvnode->v_mount);
910 for (int i = 0; i < MAX_IMAGEBOOT_NESTING; i++) {
911 struct vnode *vn = imgsrc_rootvnodes[i];
912 if (vn) {
913 vnode_getalways(vn);
914 imgsrc_rootvnodes[i] = NULLVP;
915
916 mount_t mnt = vn->v_mount;
917 mount_lock(mnt);
918 mnt->mnt_flag |= MNT_ROOTFS;
919 mount_list_add(mnt);
920 mount_unlock(mnt);
921
922 vnode_rele(vn);
923 vnode_put(vn);
924 }
925 }
926 mount_list_add(rootvnode->v_mount);
927 #endif
928
929 /* ... and unmount everything */
930 vfs_unmountall(FALSE);
931
932 lck_rw_lock_exclusive(&rootvnode_rw_lock);
933 kernproc->p_fd.fd_cdir = NULL;
934 tvp = rootvnode;
935 rootvnode = NULL;
936 rootvp = NULLVP;
937 rootdev = NODEV;
938 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
939 vnode_get_and_drop_always(tvp);
940
941 /* Attach the ramfs image ... */
942 err = di_root_ramfile_buf(buf, bufsz, rootdevice, DEVMAXNAMESIZE, &dev);
943 if (err) {
944 printf("%s: failed: di_root_ramfile_buf() = %d\n", __func__, err);
945 goto out;
946 }
947
948 /* ... and mount it */
949 rootdev = dev;
950 mountroot = NULL;
951 err = vfs_mountroot();
952 if (err) {
953 printf("%s: failed: vfs_mountroot() = %d\n", __func__, err);
954 goto out;
955 }
956
957 /* Switch to new root vnode */
958 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
959 panic("%s: cannot find root vnode", __func__);
960 }
961 vnode_ref(newdp);
962
963 lck_rw_lock_exclusive(&rootvnode_rw_lock);
964 rootvnode = newdp;
965 rootvnode->v_flag |= VROOT;
966 new_rootfs = rootvnode->v_mount;
967 mount_lock(new_rootfs);
968 new_rootfs->mnt_flag |= MNT_ROOTFS;
969 mount_unlock(new_rootfs);
970
971 set_fake_bootuuid(new_rootfs);
972
973 kernproc->p_fd.fd_cdir = newdp;
974 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
975
976 vnode_put(newdp);
977
978 DBG_TRACE("%s: root switched\n", __func__);
979
980 out:
981 if (err && (buf != NULL)) {
982 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
983 }
984 return err;
985 }
986
987 /*
988 * If the path is in <file://> URL format then we allocate memory and decode it,
989 * otherwise return the same pointer.
990 *
991 * Caller is expected to check if the pointers are different.
992 */
993 static char *
url_to_path(char * url_path,size_t * sz)994 url_to_path(char *url_path, size_t *sz)
995 {
996 char *path = url_path;
997 size_t len = strlen(kIBFilePrefix);
998
999 if (strncmp(kIBFilePrefix, url_path, len) == 0) {
1000 /* its a URL - remove the file:// prefix and percent-decode */
1001 url_path += len;
1002
1003 len = strlen(url_path);
1004 if (len) {
1005 /* Make a copy of the path to URL-decode */
1006 path = (char *)kalloc_data(len + 1, Z_WAITOK);
1007 if (path == NULL) {
1008 panic("imageboot path allocation failed - cannot allocate %d bytes", (int)len);
1009 }
1010
1011 strlcpy(path, url_path, len + 1);
1012 *sz = len + 1;
1013 url_decode(path);
1014 } else {
1015 panic("Bogus imageboot path URL - missing path");
1016 }
1017
1018 DBG_TRACE("%s: root image URL <%s> becomes %s\n", __func__, url_path, path);
1019 }
1020
1021 return path;
1022 }
1023
1024 static boolean_t
imageboot_setup_new(imageboot_type_t type)1025 imageboot_setup_new(imageboot_type_t type)
1026 {
1027 int error;
1028 char *root_path = NULL;
1029 int height = 0;
1030 boolean_t done = FALSE;
1031 boolean_t auth_root = TRUE;
1032 boolean_t ramdisk_root = FALSE;
1033
1034 root_path = zalloc(ZV_NAMEI);
1035 assert(root_path != NULL);
1036
1037 unsigned imgboot_arg;
1038 if (PE_parse_boot_argn("-rootdmg-ramdisk", &imgboot_arg, sizeof(imgboot_arg))) {
1039 ramdisk_root = TRUE;
1040 }
1041
1042 if (PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) {
1043 printf("%s: container image url is %s\n", __FUNCTION__, root_path);
1044 error = imageboot_mount_image(root_path, height, type);
1045 if (error != 0) {
1046 panic("Failed to mount container image.");
1047 }
1048
1049 height++;
1050 }
1051
1052 if (PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN) == FALSE &&
1053 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) {
1054 if (height > 0) {
1055 panic("%s specified without %s or %s?", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_AUTHROOT_ARG, IMAGEBOOT_ROOT_ARG);
1056 }
1057 goto out;
1058 }
1059
1060 printf("%s: root image URL is '%s'\n", __func__, root_path);
1061
1062 /* Make a copy of the path to URL-decode */
1063 size_t pathsz;
1064 char *path = url_to_path(root_path, &pathsz);
1065 assert(path);
1066
1067 #if CONFIG_IMAGEBOOT_CHUNKLIST
1068 if (auth_root) {
1069 /*
1070 * This updates auth_root to reflect whether chunklist was
1071 * actually enforced. In effect, this clears auth_root if
1072 * CSR_ALLOW_ANY_RECOVERY_OS allowed an invalid image.
1073 */
1074 AUTHDBG("authenticating root image at %s", path);
1075 error = authenticate_root_with_chunklist(path, &auth_root);
1076 if (error) {
1077 panic("root image authentication failed (err = %d)", error);
1078 }
1079 AUTHDBG("successfully authenticated %s", path);
1080 }
1081 #endif
1082
1083 if (ramdisk_root) {
1084 error = imageboot_mount_ramdisk(path);
1085 } else {
1086 error = imageboot_mount_image(root_path, height, type);
1087 }
1088
1089 if (path != root_path) {
1090 kfree_data(path, pathsz);
1091 }
1092
1093 if (error) {
1094 panic("Failed to mount root image (err=%d, auth=%d, ramdisk=%d)",
1095 error, auth_root, ramdisk_root);
1096 }
1097
1098 #if CONFIG_IMAGEBOOT_CHUNKLIST
1099 if (auth_root) {
1100 /* check that the image version matches the running kernel */
1101 AUTHDBG("checking root image version");
1102 error = authenticate_root_version_check();
1103 if (error) {
1104 panic("root image version check failed");
1105 } else {
1106 AUTHDBG("root image version matches kernel");
1107 }
1108 }
1109 #endif
1110
1111 done = TRUE;
1112
1113 out:
1114 zfree(ZV_NAMEI, root_path);
1115 return done;
1116 }
1117
1118 __private_extern__ void
imageboot_setup(imageboot_type_t type)1119 imageboot_setup(imageboot_type_t type)
1120 {
1121 int error = 0;
1122 char *root_path = NULL;
1123
1124 DBG_TRACE("%s: entry\n", __FUNCTION__);
1125
1126 if (rootvnode == NULL) {
1127 panic("imageboot_setup: rootvnode is NULL.");
1128 }
1129
1130 /*
1131 * New boot-arg scheme:
1132 * root-dmg : the dmg that will be the root filesystem, authenticated by default.
1133 * auth-root-dmg : same as root-dmg.
1134 * container-dmg : an optional dmg that contains the root-dmg.
1135 * locker : the locker that will be the root filesystem -- mutually
1136 * exclusive with any other boot-arg.
1137 */
1138 if (imageboot_setup_new(type)) {
1139 return;
1140 }
1141
1142 root_path = zalloc(ZV_NAMEI);
1143 assert(root_path != NULL);
1144
1145 /*
1146 * Look for outermost disk image to root from. If we're doing a nested boot,
1147 * there's some sense in which the outer image never needs to be the root filesystem,
1148 * but it does need very similar treatment: it must not be unmounted, needs a fake
1149 * device vnode created for it, and should not show up in getfsstat() until exposed
1150 * with MNT_IMGSRC. We just make it the temporary root.
1151 */
1152 #if CONFIG_IMAGEBOOT_IMG4
1153 if (PE_parse_boot_argn("arp0", root_path, MAXPATHLEN)) {
1154 size_t pathsz;
1155 char *path = url_to_path(root_path, &pathsz);
1156
1157 assert(path);
1158
1159 if (authenticate_root_with_img4(path)) {
1160 panic("Root image %s does not match the manifest", root_path);
1161 }
1162 if (path != root_path) {
1163 kfree_data(path, pathsz);
1164 }
1165 } else
1166 #endif /* CONFIG_IMAGEBOOT_IMG4 */
1167 if ((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) &&
1168 (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) {
1169 panic("%s: no valid path to image.", __FUNCTION__);
1170 }
1171
1172 DBG_TRACE("%s: root image url is %s\n", __FUNCTION__, root_path);
1173
1174 error = imageboot_mount_image(root_path, 0, type);
1175 if (error) {
1176 panic("Failed on first stage of imageboot.");
1177 }
1178
1179 /*
1180 * See if we are rooting from a nested image
1181 */
1182 if (PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) {
1183 goto done;
1184 }
1185
1186 printf("%s: second level root image url is %s\n", __FUNCTION__, root_path);
1187
1188 /*
1189 * If we fail to set up second image, it's not a given that we
1190 * can safely root off the first.
1191 */
1192 error = imageboot_mount_image(root_path, 1, type);
1193 if (error) {
1194 panic("Failed on second stage of imageboot.");
1195 }
1196
1197 done:
1198 zfree(ZV_NAMEI, root_path);
1199
1200 DBG_TRACE("%s: exit\n", __FUNCTION__);
1201
1202 return;
1203 }
1204