1 /*
2 * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/systm.h>
34 #include <sys/mount_internal.h>
35 #include <sys/fsctl.h>
36 #include <sys/filedesc.h>
37 #include <sys/vnode_internal.h>
38 #include <sys/imageboot.h>
39 #include <kern/assert.h>
40 #include <vm/vm_far.h>
41
42 #include <sys/namei.h>
43 #include <sys/fcntl.h>
44 #include <sys/vnode.h>
45 #include <sys/xattr.h>
46 #include <sys/sysproto.h>
47 #include <sys/csr.h>
48 #include <miscfs/devfs/devfsdefs.h>
49 #include <libkern/crypto/sha2.h>
50 #include <libkern/crypto/rsa.h>
51 #include <libkern/OSKextLibPrivate.h>
52 #include <sys/ubc_internal.h>
53
54 #if CONFIG_IMAGEBOOT_IMG4
55 #include <libkern/img4/interface.h>
56 #include <img4/firmware.h>
57 #endif
58
59 #include <kern/kalloc.h>
60 #include <os/overflow.h>
61 #include <vm/vm_kern_xnu.h>
62
63 #include <pexpert/pexpert.h>
64 #include <kern/chunklist.h>
65
66 extern int (*mountroot)(void);
67 extern char rootdevice[DEVMAXNAMESIZE];
68
69 #define DEBUG_IMAGEBOOT 0
70
71 #if DEBUG_IMAGEBOOT
72 #define DBG_TRACE(...) printf("imageboot: " __VA_ARGS__)
73 #else
74 #define DBG_TRACE(...) do {} while(0)
75 #endif
76
77 #define AUTHDBG(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
78 #define AUTHPRNT(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
79
80 extern int di_root_image_ext(const char *path, char *devname, size_t devsz, dev_t *dev_p, bool removable);
81 extern int di_root_image(const char *path, char *devname, size_t devsz, dev_t *dev_p);
82 extern int di_root_ramfile_buf(void *buf, size_t bufsz, char *devname, size_t devsz, dev_t *dev_p);
83
84 static boolean_t imageboot_setup_new(imageboot_type_t type);
85
86 void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size);
87
88 extern lck_rw_t rootvnode_rw_lock;
89
90 #define kIBFilePrefix "file://"
91
92 __private_extern__ int
imageboot_format_is_valid(const char * root_path)93 imageboot_format_is_valid(const char *root_path)
94 {
95 return strncmp(root_path, kIBFilePrefix,
96 strlen(kIBFilePrefix)) == 0;
97 }
98
99 static void
vnode_get_and_drop_always(vnode_t vp)100 vnode_get_and_drop_always(vnode_t vp)
101 {
102 vnode_getalways(vp);
103 vnode_rele(vp);
104 vnode_put(vp);
105 }
106
107 __private_extern__ bool
imageboot_desired(void)108 imageboot_desired(void)
109 {
110 bool do_imageboot = false;
111
112 char *root_path = NULL;
113 root_path = zalloc(ZV_NAMEI);
114 /*
115 * Check for first layer DMG rooting.
116 *
117 * Note that here we are principally concerned with whether or not we
118 * SHOULD try to imageboot, not whether or not we are going to be able to.
119 *
120 * If NONE of the boot-args are present, then assume that image-rooting
121 * is not requested.
122 *
123 * [!! Note parens guard the entire logically OR'd set of statements, below. It validates
124 * that NONE of the below-mentioned boot-args is present...!!]
125 */
126 if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) ||
127 #if CONFIG_IMAGEBOOT_IMG4
128 PE_parse_boot_argn("arp0", root_path, MAXPATHLEN) ||
129 #endif
130 PE_parse_boot_argn("rp", root_path, MAXPATHLEN) ||
131 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) ||
132 PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN))) {
133 /* explicitly set to false */
134 do_imageboot = false;
135 } else {
136 /* now sanity check the file-path format */
137 if (imageboot_format_is_valid(root_path)) {
138 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
139 /* root_path looks good and we have one of the aforementioned bootargs */
140 do_imageboot = true;
141 } else {
142 /* explicitly set to false */
143 do_imageboot = false;
144 }
145 }
146
147 zfree(ZV_NAMEI, root_path);
148 return do_imageboot;
149 }
150
151 __private_extern__ imageboot_type_t
imageboot_needed(void)152 imageboot_needed(void)
153 {
154 imageboot_type_t result = IMAGEBOOT_NONE;
155 char *root_path = NULL;
156
157 DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__);
158
159 if (!imageboot_desired()) {
160 goto out;
161 }
162
163 root_path = zalloc(ZV_NAMEI);
164 result = IMAGEBOOT_DMG;
165
166 /* Check for second layer */
167 if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) ||
168 PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) {
169 goto out;
170 }
171
172 /* Sanity-check second layer */
173 if (imageboot_format_is_valid(root_path)) {
174 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
175 } else {
176 panic("%s: Invalid URL scheme for %s",
177 __FUNCTION__, root_path);
178 }
179
180 out:
181 if (root_path != NULL) {
182 zfree(ZV_NAMEI, root_path);
183 }
184 return result;
185 }
186
187 extern bool IOBaseSystemARVRootHashAvailable(void);
188
189
190 /*
191 * Mounts new filesystem based on image path, and pivots it to the root.
192 * The image to be mounted is located at image_path.
193 * It will be mounted at mount_path.
194 * The vfs_switch_root operation will be performed.
195 * After the pivot, the outgoing root filesystem (the filesystem at root when
196 * this function begins) will be at outgoing_root_path. If `skip_signature_check` is true,
197 * then ignore the chunklisted or authAPFS checks on this image
198 */
199 __private_extern__ int
imageboot_pivot_image(const char * image_path,imageboot_type_t type,const char * mount_path,const char * outgoing_root_path,const bool rooted_dmg,const bool skip_signature_check)200 imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
201 const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check)
202 {
203 int error = 0;
204 boolean_t authenticated_dmg_chunklist = false;
205 vnode_t mount_vp = NULLVP;
206 errno_t rootauth;
207
208
209 if (type != IMAGEBOOT_DMG) {
210 panic("not supported");
211 }
212
213 /*
214 * Check that the image file actually exists.
215 * We also need to find the mount it's on, to mark it as backing the
216 * root.
217 */
218 vnode_t imagevp = NULLVP;
219 error = vnode_lookup(image_path, 0, &imagevp, vfs_context_kernel());
220 if (error) {
221 printf("%s: image file not found or couldn't be read: %d\n", __FUNCTION__, error);
222 /*
223 * bail out here to short-circuit out of panic logic below.
224 * Failure to find the pivot-image should not be a fatal condition (ENOENT)
225 * since it may result in natural consequences (ergo, cannot unlock filevault prompt).
226 */
227 return error;
228 }
229
230 /*
231 * load the disk image and obtain its device.
232 * di_root_image's name and the names of its arguments suggest it has
233 * to be mounted at the root, but that's not actually needed.
234 * We just need to obtain the device info.
235 */
236
237 dev_t dev;
238 char devname[DEVMAXNAMESIZE];
239 const char *error_func = NULL;
240 unsigned ramdisk_arg = 0;
241 (void) PE_parse_boot_argn("-bsdmgroot-ramdisk", &ramdisk_arg, sizeof(ramdisk_arg));
242
243 if (ramdisk_arg) {
244 size_t bufsz = 0;
245 void *buf = NULL;
246 error_func = "imageboot_read_file";
247 // no_softlimit: di_root_ramfile_buf is OK to handle a no_softlimit buffer
248 error = imageboot_read_file_pageable(image_path, &buf, &bufsz, /* no_softlimit */ true);
249 if (error == 0) {
250 error_func = "di_root_ramfile_buf";
251 error = di_root_ramfile_buf(buf, bufsz, devname, sizeof(devname), &dev);
252 }
253 if (error && (buf != NULL)) {
254 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
255 }
256 } else {
257 error_func = "di_root_image";
258 error = di_root_image_ext(image_path, devname, DEVMAXNAMESIZE, &dev, true);
259 }
260 if (error) {
261 panic("%s: %s failed: %d", __FUNCTION__, error_func, error);
262 }
263
264 printf("%s: attached disk image %s as %s\n", __FUNCTION__, image_path, devname);
265
266
267 #if CONFIG_IMAGEBOOT_CHUNKLIST
268 if ((rooted_dmg == false) && !IOBaseSystemARVRootHashAvailable()) {
269 error = authenticate_root_with_chunklist(image_path, NULL);
270 if (error == 0) {
271 printf("authenticated root-dmg via chunklist...\n");
272 authenticated_dmg_chunklist = true;
273 } else {
274 /* root hash was not available, and image is NOT chunklisted? */
275 printf("failed to chunklist-authenticate root-dmg @ %s\n", image_path);
276 }
277 }
278 #endif
279
280 char fulldevname[DEVMAXNAMESIZE + 5]; // "/dev/"
281 strlcpy(fulldevname, "/dev/", sizeof(fulldevname));
282 strlcat(fulldevname, devname, sizeof(fulldevname));
283
284 /*
285 * mount expects another layer of indirection (because it expects to
286 * be getting a user_addr_t of a char *.
287 * Make a pointer-to-pointer on our stack. It won't use this
288 * address after it returns so this should be safe.
289 */
290 char *fulldevnamep = &(fulldevname[0]);
291 char **fulldevnamepp = &fulldevnamep;
292
293 #define PIVOTMNT "/System/Volumes/BaseSystem"
294
295
296 /* Attempt to mount as HFS; if it fails, then try as APFS */
297 printf("%s: attempting to mount as hfs...\n", __FUNCTION__);
298 error = kernel_mount("hfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
299 if (error) {
300 printf("mount failed: %d\n", error);
301 printf("%s: attempting to mount as apfs...\n", __FUNCTION__);
302 error = kernel_mount("apfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
303 }
304
305 /* If we didn't mount as either HFS or APFS, then bail out */
306 if (error) {
307 /*
308 * Note that for this particular failure case (failure to mount), the disk image
309 * being attached may have failed to quiesce within the alloted time out (20-30 sec).
310 * For example, it may be still probing, or APFS container enumeration may have not
311 * completed. If so, then we may have fallen into this particular error case. However,
312 * failure to complete matching should be an exceptional case as 30 sec. is quite a
313 * long time to wait for matching to complete (which would have occurred in
314 * di_root_image_ext).
315 */
316 #if defined(__arm64__) && XNU_TARGET_OS_OSX
317 panic("%s: failed to mount pivot image(%d)!", __FUNCTION__, error);
318 #endif
319 printf("%s: failed to mount pivot image(%d) !", __FUNCTION__, error);
320 goto done;
321 }
322
323 /* otherwise, if the mount succeeded, then assert that the DMG is authenticated (either chunklist or authapfs) */
324 error = vnode_lookup(PIVOTMNT, 0, &mount_vp, vfs_context_kernel());
325 if (error) {
326 #if defined(__arm64__) && XNU_TARGET_OS_OSX
327 panic("%s: failed to lookup pivot root (%d) !", __FUNCTION__, error);
328 #endif
329 printf("%s: failed to lookup pivot root (%d)!", __FUNCTION__, error);
330 goto done;
331 }
332
333 /* the 0x1 implies base system */
334 rootauth = VNOP_IOCTL(mount_vp, FSIOC_KERNEL_ROOTAUTH, (caddr_t)0x1, 0, vfs_context_kernel());
335 if (rootauth) {
336 printf("BS-DMG failed to authenticate intra-FS \n");
337 /*
338 * If we are using a custom rooted DMG, or if we have already authenticated
339 * the DMG via chunklist, then it is permissible to use.
340 * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant.
341 */
342 if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) {
343 rootauth = 0;
344 }
345 error = rootauth;
346 }
347 vnode_put(mount_vp);
348 mount_vp = NULLVP;
349
350 if (error) {
351 /*
352 * Failure here exclusively means that the mount failed to authenticate.
353 * This means that the disk image either was not sealed (authapfs), or it was
354 * not hosted on a chunklisted DMG. Both scenarios may be fatal depending
355 * on the platform.
356 */
357 #if defined(__arm64__) && XNU_TARGET_OS_OSX
358 panic("%s: could not authenticate the pivot image: %d. giving up.", __FUNCTION__, error);
359 #endif
360 printf("%s: could not authenticate the pivot image: %d. giving up.\n", __FUNCTION__, error);
361 goto done;
362 }
363
364 if (rootvnode) {
365 mount_t root_mp = vnode_mount(rootvnode);
366 if (root_mp && (root_mp->mnt_kern_flag & MNTK_SSD)) {
367 rootvp_is_ssd = true;
368 }
369 }
370 /*
371 * pivot the incoming and outgoing filesystems
372 */
373 error = vfs_switch_root(mount_path, outgoing_root_path, 0);
374 if (error) {
375 panic("%s: vfs_switch_root failed: %d", __FUNCTION__, error);
376 }
377
378 /*
379 * Mark the filesystem containing the image as backing root, so it
380 * won't be unmountable.
381 *
382 * vfs_switch_root() clears this flag, so we have to set it after
383 * the pivot call.
384 * If the system later pivots out of the image, vfs_switch_root
385 * will clear it again, so the backing filesystem can be unmounted.
386 */
387 if (!ramdisk_arg) {
388 mount_t imagemp = imagevp->v_mount;
389 lck_rw_lock_exclusive(&imagemp->mnt_rwlock);
390 imagemp->mnt_kern_flag |= MNTK_BACKS_ROOT;
391 lck_rw_done(&imagemp->mnt_rwlock);
392 }
393
394 error = 0;
395
396 /*
397 * Note that we do NOT change kern.bootuuid here -
398 * imageboot_mount_image() does, but imageboot_pivot_image() doesn't.
399 * imageboot_mount_image() is used when the root volume uuid was
400 * "always supposed to be" the one inside the dmg. imageboot_pivot_
401 * image() is used when the true root volume just needs to be
402 * obscured for a moment by the dmg.
403 */
404
405 done:
406 if (imagevp != NULLVP) {
407 vnode_put(imagevp);
408 }
409 return error;
410 }
411
412 /* kern_sysctl.c */
413 extern uuid_string_t fake_bootuuid;
414
415 static void
set_fake_bootuuid(mount_t mp)416 set_fake_bootuuid(mount_t mp)
417 {
418 struct vfs_attr va;
419 VFSATTR_INIT(&va);
420 VFSATTR_WANTED(&va, f_uuid);
421
422 if (vfs_getattr(mp, &va, vfs_context_current()) != 0) {
423 return;
424 }
425
426 if (!VFSATTR_IS_SUPPORTED(&va, f_uuid)) {
427 return;
428 }
429
430 uuid_unparse(va.f_uuid, fake_bootuuid);
431 }
432
433 /*
434 * Swaps in new root filesystem based on image path.
435 * Current root filesystem is removed from mount list and
436 * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and
437 * "rootvnode" is reset. Root vnode of currentroot filesystem
438 * is returned with usecount (no iocount).
439 * kern.bootuuid is arranged to return the UUID of the mounted image. (If
440 * we did nothing here, it would be the UUID of the image source volume.)
441 */
442 __private_extern__ int
imageboot_mount_image(const char * root_path,int height,imageboot_type_t type)443 imageboot_mount_image(const char *root_path, int height, imageboot_type_t type)
444 {
445 dev_t dev;
446 int error;
447 /*
448 * Need to stash this here since we may do a kernel_mount() on /, which will
449 * automatically update the rootvnode global. Note that vfs_mountroot() does
450 * not update that global, which is a bit weird.
451 */
452 vnode_t old_rootvnode = rootvnode;
453 vnode_t newdp;
454 mount_t new_rootfs;
455 boolean_t update_rootvnode = FALSE;
456
457 if (type == IMAGEBOOT_DMG) {
458 error = di_root_image(root_path, rootdevice, DEVMAXNAMESIZE, &dev);
459 if (error) {
460 panic("%s: di_root_image failed: %d", __FUNCTION__, error);
461 }
462
463 rootdev = dev;
464 mountroot = NULL;
465 printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
466 error = vfs_mountroot();
467 if (error != 0) {
468 panic("vfs_mountroot() failed.");
469 }
470
471 update_rootvnode = TRUE;
472 } else {
473 panic("invalid imageboot type: %d", type);
474 }
475
476 /*
477 * Get the vnode for '/'.
478 * Set fdp->fd_fd.fd_cdir to reference it.
479 */
480 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
481 panic("%s: cannot find root vnode", __FUNCTION__);
482 }
483 DBG_TRACE("%s: old root fsname: %s\n", __FUNCTION__, old_rootvnode->v_mount->mnt_vtable->vfc_name);
484
485 if (old_rootvnode != NULL) {
486 /* remember the old rootvnode, but remove it from mountlist */
487 mount_t old_rootfs = old_rootvnode->v_mount;
488
489 mount_list_remove(old_rootfs);
490 mount_lock(old_rootfs);
491 old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
492 old_rootfs->mnt_flag &= ~MNT_ROOTFS;
493 mount_unlock(old_rootfs);
494 }
495
496 vnode_ref(newdp);
497 vnode_put(newdp);
498
499 lck_rw_lock_exclusive(&rootvnode_rw_lock);
500 /* switch to the new rootvnode */
501 if (update_rootvnode) {
502 rootvnode = newdp;
503 set_fake_bootuuid(rootvnode->v_mount);
504 }
505
506 new_rootfs = rootvnode->v_mount;
507 mount_lock(new_rootfs);
508 new_rootfs->mnt_flag |= MNT_ROOTFS;
509 mount_unlock(new_rootfs);
510
511 kernproc->p_fd.fd_cdir = newdp;
512 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
513
514 DBG_TRACE("%s: root switched\n", __FUNCTION__);
515
516 if (old_rootvnode != NULL) {
517 #ifdef CONFIG_IMGSRC_ACCESS
518 if (height >= 0) {
519 imgsrc_rootvnodes[height] = old_rootvnode;
520 } else {
521 vnode_get_and_drop_always(old_rootvnode);
522 }
523 #else
524 #pragma unused(height)
525 vnode_get_and_drop_always(old_rootvnode);
526 #endif /* CONFIG_IMGSRC_ACCESS */
527 }
528 return 0;
529 }
530
531 /*
532 * Return a memory object for given file path.
533 * Also returns a vnode reference for the given file path.
534 */
535 void *
ubc_getobject_from_filename(const char * filename,struct vnode ** vpp,off_t * file_size)536 ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size)
537 {
538 int err = 0;
539 struct nameidata ndp = {};
540 struct vnode *vp = NULL;
541 off_t fsize = 0;
542 vfs_context_t ctx = vfs_context_kernel();
543 void *control = NULL;
544
545 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(filename), ctx);
546 if ((err = namei(&ndp)) != 0) {
547 goto errorout;
548 }
549 nameidone(&ndp);
550 vp = ndp.ni_vp;
551
552 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
553 goto errorout;
554 }
555
556 if (fsize < 0) {
557 goto errorout;
558 }
559
560 control = ubc_getobject(vp, UBC_FLAGS_NONE);
561 if (control == NULL) {
562 goto errorout;
563 }
564
565 *file_size = fsize;
566 *vpp = vp;
567 vp = NULL;
568
569 errorout:
570 if (vp) {
571 vnode_put(vp);
572 }
573 return control;
574 }
575
576 static int
imageboot_read_file_internal(const char * path,const off_t offset,const bool pageable,void ** bufp,size_t * bufszp,off_t * fsizep,bool no_softlimit)577 imageboot_read_file_internal(const char *path, const off_t offset, const bool pageable, void **bufp, size_t *bufszp, off_t *fsizep, bool no_softlimit)
578 {
579 int err = 0;
580 struct nameidata ndp = {};
581 struct vnode *vp = NULL;
582 struct vnode *rsrc_vp = NULL;
583 char *readbuf = NULL;
584 off_t readsize = 0;
585 off_t readoff = 0;
586 off_t fsize = 0;
587 size_t maxsize = 0;
588 char *buf = NULL;
589 bool doclose = false;
590
591 vfs_context_t ctx = vfs_context_kernel();
592 proc_t p = vfs_context_proc(ctx);
593 kauth_cred_t kerncred = vfs_context_ucred(ctx);
594
595 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF | FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
596 if ((err = namei(&ndp)) != 0) {
597 AUTHPRNT("namei failed (%s) - %d", path, err);
598 goto out;
599 }
600 nameidone(&ndp);
601 vp = ndp.ni_vp;
602
603 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
604 AUTHPRNT("failed to get vnode size of %s - %d", path, err);
605 goto out;
606 }
607 if (fsize < 0) {
608 panic("negative file size");
609 }
610 if (offset < 0) {
611 AUTHPRNT("negative file offset");
612 err = EINVAL;
613 goto out;
614 }
615
616 if (fsizep) {
617 *fsizep = fsize;
618 }
619
620 if ((err = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
621 AUTHPRNT("failed to open %s - %d", path, err);
622 goto out;
623 }
624 doclose = true;
625
626 /* cap fsize to the amount that remains after offset */
627 if (os_sub_overflow(fsize, offset, &fsize)) {
628 fsize = 0;
629 } else if (fsize < 0) {
630 fsize = 0;
631 }
632
633 /* if bufsz is non-zero, cap the read at bufsz bytes */
634 maxsize = *bufszp;
635 if (maxsize && (maxsize < (size_t)fsize)) {
636 fsize = maxsize;
637 }
638
639 /* if fsize is larger than the specified limit (presently 2.5GB) or a NVRAM-configured limit, fail */
640 maxsize = IMAGEBOOT_MAX_FILESIZE;
641 PE_parse_boot_argn("rootdmg-maxsize", &maxsize, sizeof(maxsize));
642 if (maxsize && (maxsize < (size_t)fsize)) {
643 AUTHPRNT("file is too large (%lld > %lld)", (long long) fsize, (long long) maxsize);
644 err = EFBIG;
645 goto out;
646 }
647
648 if (pageable) {
649 vm_offset_t addr = 0;
650 kma_flags_t kma_flags = 0;
651
652 kma_flags = KMA_PAGEABLE | KMA_DATA_SHARED;
653 if (no_softlimit) {
654 kma_flags |= KMA_NOSOFTLIMIT;
655 }
656
657 if (kmem_alloc(kernel_map, &addr, (vm_size_t)fsize,
658 kma_flags, VM_KERN_MEMORY_FILE) == KERN_SUCCESS) {
659 buf = (char *)addr;
660 } else {
661 buf = NULL;
662 }
663 } else {
664 zalloc_flags_t zflags = 0;
665
666 //limit kalloc data calls to only 2GB.
667 if (fsize > IMAGEBOOT_MAX_KALLOCSIZE) {
668 AUTHPRNT("file is too large for non-pageable (%lld)", (long long) fsize);
669 err = ENOMEM;
670 goto out;
671 }
672
673 zflags = Z_WAITOK;
674 if (no_softlimit) {
675 zflags |= Z_NOSOFTLIMIT;
676 }
677
678 buf = (char *)kalloc_data((vm_size_t)fsize, zflags);
679 }
680 if (buf == NULL) {
681 err = ENOMEM;
682 goto out;
683 }
684
685 #if NAMEDSTREAMS
686 /* find resource fork so we can evict cached decmpfs data */
687 if (VNOP_GETNAMEDSTREAM(vp, &rsrc_vp, XATTR_RESOURCEFORK_NAME, NS_OPEN, /*flags*/ 0, ctx) == 0) {
688 vnode_ref(rsrc_vp);
689 vnode_put(rsrc_vp);
690 AUTHDBG("Found resource fork for %s", path);
691 }
692 #endif
693
694 /* read data in chunks to handle (fsize > INT_MAX) */
695 readbuf = buf;
696 readsize = fsize;
697 readoff = offset;
698 while (readsize > 0) {
699 const off_t chunksize_max = 16 * 1024 * 1024; /* 16 MiB */
700 const off_t chunksize = MIN(readsize, chunksize_max);
701
702 /* read next chunk, pass IO_NOCACHE to clarify our intent (even if ignored) */
703 if ((err = vn_rdwr(UIO_READ, vp, (caddr_t)readbuf, (int)chunksize, readoff, UIO_SYSSPACE, IO_NODELOCKED | IO_NOCACHE | IO_RAOFF, kerncred, /*resid*/ NULL, p)) != 0) {
704 AUTHPRNT("Cannot read %lld bytes at offset %lld from %s - %d", (long long)chunksize, (long long)readoff, path, err);
705 goto out;
706 }
707
708 /* evict cached pages so they don't accumulate during early boot */
709 ubc_msync(vp, readoff, readoff + chunksize, NULL, UBC_INVALIDATE | UBC_PUSHALL);
710
711 /* evict potentially-cached decmpfs data if we have a resource fork */
712 if (rsrc_vp != NULL) {
713 if (vnode_getwithref(rsrc_vp) == 0) {
714 ubc_msync(rsrc_vp, 0, ubc_getsize(rsrc_vp), NULL, UBC_INVALIDATE | UBC_PUSHALL);
715 vnode_put(rsrc_vp);
716 }
717 }
718
719 readbuf = VM_FAR_ADD_PTR_UNBOUNDED(readbuf, chunksize);
720 readsize -= chunksize;
721 readoff += chunksize;
722 }
723
724 out:
725 if (doclose) {
726 VNOP_CLOSE(vp, FREAD, ctx);
727 }
728 if (rsrc_vp) {
729 vnode_rele(rsrc_vp);
730 rsrc_vp = NULL;
731 }
732 if (vp) {
733 vnode_put(vp);
734 vp = NULL;
735 }
736
737 if (err) {
738 if (buf == NULL) {
739 /* nothing to free */
740 } else if (pageable) {
741 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)fsize);
742 } else {
743 kfree_data(buf, (vm_size_t)fsize);
744 }
745 } else {
746 *bufp = buf;
747 *bufszp = (size_t)fsize;
748 }
749
750 return err;
751 }
752
753 int
imageboot_read_file_pageable(const char * path,void ** bufp,size_t * bufszp,bool no_softlimit)754 imageboot_read_file_pageable(const char *path, void **bufp, size_t *bufszp, bool no_softlimit)
755 {
756 return imageboot_read_file_internal(path, 0, true, bufp, bufszp, NULL, no_softlimit);
757 }
758
759 int
imageboot_read_file_from_offset(const char * path,const off_t offset,void ** bufp,size_t * bufszp)760 imageboot_read_file_from_offset(const char *path, const off_t offset, void **bufp, size_t *bufszp)
761 {
762 return imageboot_read_file_internal(path, offset, false, bufp, bufszp, NULL, /* no_softlimit */ false);
763 }
764
765 int
imageboot_read_file(const char * path,void ** bufp,size_t * bufszp,off_t * fsizep)766 imageboot_read_file(const char *path, void **bufp, size_t *bufszp, off_t *fsizep)
767 {
768 return imageboot_read_file_internal(path, 0, false, bufp, bufszp, fsizep, /* no_softlimit */ false);
769 }
770
771 #if CONFIG_IMAGEBOOT_IMG4 || CONFIG_IMAGEBOOT_CHUNKLIST
772 vnode_t
imgboot_get_image_file(const char * path,off_t * fsize,int * errp)773 imgboot_get_image_file(const char *path, off_t *fsize, int *errp)
774 {
775 struct nameidata ndp = {};
776 vnode_t vp = NULL;
777 vfs_context_t ctx = vfs_context_kernel();
778 int err;
779
780 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
781 if ((err = namei(&ndp)) != 0) {
782 AUTHPRNT("Cannot find %s - error %d", path, err);
783 } else {
784 nameidone(&ndp);
785 vp = ndp.ni_vp;
786
787 if (vp->v_type != VREG) {
788 err = EINVAL;
789 AUTHPRNT("%s it not a regular file", path);
790 } else if (fsize) {
791 if ((err = vnode_size(vp, fsize, ctx)) != 0) {
792 AUTHPRNT("Cannot get file size of %s - error %d", path, err);
793 }
794 }
795 }
796
797 if (err) {
798 if (vp) {
799 vnode_put(vp);
800 }
801 *errp = err;
802 vp = NULL;
803 }
804 return vp;
805 }
806 #endif /* CONFIG_IMAGEBOOT_CHUNKLIST || CONFIG_IMAGEBOOT_CHUNKLIST */
807
808 #if CONFIG_IMAGEBOOT_IMG4
809
810 #define APTICKET_NAME "apticket.der"
811
812 static char *
imgboot_get_apticket_path(const char * rootpath,size_t * sz)813 imgboot_get_apticket_path(const char *rootpath, size_t *sz)
814 {
815 size_t plen = strlen(rootpath) + sizeof(APTICKET_NAME) + 1;
816 char *path = (char *)kalloc_data(plen, Z_WAITOK);
817
818 if (path) {
819 char *slash;
820
821 strlcpy(path, rootpath, plen);
822 slash = strrchr(path, '/');
823 if (slash == NULL) {
824 slash = path;
825 } else {
826 slash++;
827 }
828 strlcpy(slash, APTICKET_NAME, sizeof(APTICKET_NAME) + 1);
829 }
830
831 *sz = plen;
832 return path;
833 }
834
835 static int
authenticate_root_with_img4(const char * rootpath)836 authenticate_root_with_img4(const char *rootpath)
837 {
838 errno_t rv;
839 vnode_t vp = NULLVP;
840 size_t ticket_pathsz = 0;
841 char *ticket_path;
842 img4_buff_t tck = IMG4_BUFF_INIT;
843 img4_firmware_execution_context_t exec = {
844 .i4fex_version = IMG4_FIRMWARE_EXECUTION_CONTEXT_STRUCT_VERSION,
845 .i4fex_execute = NULL,
846 .i4fex_context = NULL,
847 };
848 img4_firmware_t fw = NULL;
849 img4_firmware_flags_t fw_flags = IMG4_FIRMWARE_FLAG_BARE |
850 IMG4_FIRMWARE_FLAG_SUBSEQUENT_STAGE;
851
852 DBG_TRACE("Check %s\n", rootpath);
853
854 ticket_path = imgboot_get_apticket_path(rootpath, &ticket_pathsz);
855 if (ticket_path == NULL) {
856 AUTHPRNT("Cannot construct ticket path - out of memory");
857 return ENOMEM;
858 }
859
860 rv = imageboot_read_file(ticket_path, (void **)&tck.i4b_bytes, &tck.i4b_len, NULL);
861 if (rv) {
862 AUTHPRNT("Cannot get a ticket from %s - %d\n", ticket_path, rv);
863 goto out_with_ticket_path;
864 }
865
866 DBG_TRACE("Got %lu bytes of manifest from %s\n", tck.i4b_len, ticket_path);
867
868 vp = imgboot_get_image_file(rootpath, NULL, &rv);
869 if (vp == NULL) {
870 /* Error message had been printed already */
871 rv = EIO;
872 goto out_with_ticket_bytes;
873 }
874
875 fw = img4_firmware_new_from_vnode_4xnu(IMG4_RUNTIME_DEFAULT, &exec, 'rosi',
876 vp, fw_flags);
877 if (!fw) {
878 AUTHPRNT("Could not allocate new firmware");
879 rv = ENOMEM;
880 goto out_with_ticket_bytes;
881 }
882
883 img4_firmware_attach_manifest(fw, &tck);
884 rv = img4_firmware_evaluate(fw, img4_chip_select_personalized_ap(), NULL);
885
886 out_with_ticket_bytes:
887 kfree_data(tck.i4b_bytes, tck.i4b_len);
888 out_with_ticket_path:
889 kfree_data(ticket_path, ticket_pathsz);
890
891 img4_firmware_destroy(&fw);
892
893 if (vp) {
894 vnode_put(vp);
895 }
896 return rv;
897 }
898 #endif /* CONFIG_IMAGEBOOT_IMG4 */
899
900
901 /*
902 * Attach the image at 'path' as a ramdisk and mount it as our new rootfs.
903 * All existing mounts are first umounted.
904 */
905 static int
imageboot_mount_ramdisk(const char * path)906 imageboot_mount_ramdisk(const char *path)
907 {
908 int err = 0;
909 size_t bufsz = 0;
910 void *buf = NULL;
911 dev_t dev;
912 vnode_t newdp;
913 vnode_t tvp;
914 mount_t new_rootfs;
915
916 /*
917 * Read our target image from disk
918 *
919 * We override the allocator soft-limit in order to allow booting large RAM
920 * disks. As a consequence, we are responsible for manipulating the
921 * buffer only through vm_far safe APIs.
922 */
923 err = imageboot_read_file_pageable(path, &buf, &bufsz, /* no_softlimit */ true);
924 if (err) {
925 printf("%s: failed: imageboot_read_file_pageable() = %d\n", __func__, err);
926 goto out;
927 }
928 DBG_TRACE("%s: read '%s' sz = %lu\n", __func__, path, bufsz);
929
930 #if CONFIG_IMGSRC_ACCESS
931 /* Re-add all root mounts to the mount list in the correct order... */
932 mount_list_remove(rootvnode->v_mount);
933 for (int i = 0; i < MAX_IMAGEBOOT_NESTING; i++) {
934 struct vnode *vn = imgsrc_rootvnodes[i];
935 if (vn) {
936 vnode_getalways(vn);
937 imgsrc_rootvnodes[i] = NULLVP;
938
939 mount_t mnt = vn->v_mount;
940 mount_lock(mnt);
941 mnt->mnt_flag |= MNT_ROOTFS;
942 mount_list_add(mnt);
943 mount_unlock(mnt);
944
945 vnode_rele(vn);
946 vnode_put(vn);
947 }
948 }
949 mount_list_add(rootvnode->v_mount);
950 #endif
951
952 /* ... and unmount everything */
953 vfs_unmountall(FALSE);
954
955 lck_rw_lock_exclusive(&rootvnode_rw_lock);
956 kernproc->p_fd.fd_cdir = NULL;
957 tvp = rootvnode;
958 rootvnode = NULL;
959 rootvp = NULLVP;
960 rootdev = NODEV;
961 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
962 vnode_get_and_drop_always(tvp);
963
964 /* Attach the ramfs image ... */
965 err = di_root_ramfile_buf(buf, bufsz, rootdevice, DEVMAXNAMESIZE, &dev);
966 if (err) {
967 printf("%s: failed: di_root_ramfile_buf() = %d\n", __func__, err);
968 goto out;
969 }
970
971 /* ... and mount it */
972 rootdev = dev;
973 mountroot = NULL;
974 err = vfs_mountroot();
975 if (err) {
976 printf("%s: failed: vfs_mountroot() = %d\n", __func__, err);
977 goto out;
978 }
979
980 /* Switch to new root vnode */
981 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
982 panic("%s: cannot find root vnode", __func__);
983 }
984 vnode_ref(newdp);
985
986 lck_rw_lock_exclusive(&rootvnode_rw_lock);
987 rootvnode = newdp;
988 rootvnode->v_flag |= VROOT;
989 new_rootfs = rootvnode->v_mount;
990 mount_lock(new_rootfs);
991 new_rootfs->mnt_flag |= MNT_ROOTFS;
992 mount_unlock(new_rootfs);
993
994 set_fake_bootuuid(new_rootfs);
995
996 kernproc->p_fd.fd_cdir = newdp;
997 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
998
999 vnode_put(newdp);
1000
1001 DBG_TRACE("%s: root switched\n", __func__);
1002
1003 out:
1004 if (err && (buf != NULL)) {
1005 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
1006 }
1007 return err;
1008 }
1009
1010 /*
1011 * If the path is in <file://> URL format then we allocate memory and decode it,
1012 * otherwise return the same pointer.
1013 *
1014 * Caller is expected to check if the pointers are different.
1015 */
1016 static char *
url_to_path(char * url_path,size_t * sz)1017 url_to_path(char *url_path, size_t *sz)
1018 {
1019 char *path = url_path;
1020 size_t len = strlen(kIBFilePrefix);
1021
1022 if (strncmp(kIBFilePrefix, url_path, len) == 0) {
1023 /* its a URL - remove the file:// prefix and percent-decode */
1024 url_path += len;
1025
1026 len = strlen(url_path);
1027 if (len) {
1028 /* Make a copy of the path to URL-decode */
1029 path = (char *)kalloc_data(len + 1, Z_WAITOK);
1030 if (path == NULL) {
1031 panic("imageboot path allocation failed - cannot allocate %d bytes", (int)len);
1032 }
1033
1034 strlcpy(path, url_path, len + 1);
1035 *sz = len + 1;
1036 url_decode(path);
1037 } else {
1038 panic("Bogus imageboot path URL - missing path");
1039 }
1040
1041 DBG_TRACE("%s: root image URL <%s> becomes %s\n", __func__, url_path, path);
1042 }
1043
1044 return path;
1045 }
1046
1047 static boolean_t
imageboot_setup_new(imageboot_type_t type)1048 imageboot_setup_new(imageboot_type_t type)
1049 {
1050 int error;
1051 char *root_path = NULL;
1052 int height = 0;
1053 boolean_t done = FALSE;
1054 boolean_t auth_root = TRUE;
1055 boolean_t ramdisk_root = FALSE;
1056
1057 root_path = zalloc(ZV_NAMEI);
1058 assert(root_path != NULL);
1059
1060 unsigned imgboot_arg;
1061 if (PE_parse_boot_argn("-rootdmg-ramdisk", &imgboot_arg, sizeof(imgboot_arg))) {
1062 ramdisk_root = TRUE;
1063 }
1064
1065 if (PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) {
1066 printf("%s: container image url is %s\n", __FUNCTION__, root_path);
1067 error = imageboot_mount_image(root_path, height, type);
1068 if (error != 0) {
1069 panic("Failed to mount container image.");
1070 }
1071
1072 height++;
1073 }
1074
1075 if (PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN) == FALSE &&
1076 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) {
1077 if (height > 0) {
1078 panic("%s specified without %s or %s?", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_AUTHROOT_ARG, IMAGEBOOT_ROOT_ARG);
1079 }
1080 goto out;
1081 }
1082
1083 printf("%s: root image URL is '%s'\n", __func__, root_path);
1084
1085 /* Make a copy of the path to URL-decode */
1086 size_t pathsz;
1087 char *path = url_to_path(root_path, &pathsz);
1088 assert(path);
1089
1090 #if CONFIG_IMAGEBOOT_CHUNKLIST
1091 if (auth_root) {
1092 /*
1093 * This updates auth_root to reflect whether chunklist was
1094 * actually enforced. In effect, this clears auth_root if
1095 * CSR_ALLOW_ANY_RECOVERY_OS allowed an invalid image.
1096 */
1097 AUTHDBG("authenticating root image at %s", path);
1098 error = authenticate_root_with_chunklist(path, &auth_root);
1099 if (error) {
1100 panic("root image authentication failed (err = %d)", error);
1101 }
1102 AUTHDBG("successfully authenticated %s", path);
1103 }
1104 #endif
1105
1106 if (ramdisk_root) {
1107 error = imageboot_mount_ramdisk(path);
1108 } else {
1109 error = imageboot_mount_image(root_path, height, type);
1110 }
1111
1112 if (path != root_path) {
1113 kfree_data(path, pathsz);
1114 }
1115
1116 if (error) {
1117 if (error == EFBIG) {
1118 panic("root imagefile is too large (err=%d, auth=%d, ramdisk=%d)",
1119 error, auth_root, ramdisk_root);
1120 } else {
1121 panic("Failed to mount root image (err=%d, auth=%d, ramdisk=%d)",
1122 error, auth_root, ramdisk_root);
1123 }
1124 }
1125
1126 #if CONFIG_IMAGEBOOT_CHUNKLIST
1127 if (auth_root) {
1128 /* check that the image version matches the running kernel */
1129 AUTHDBG("checking root image version");
1130 error = authenticate_root_version_check();
1131 if (error) {
1132 panic("root image version check failed");
1133 } else {
1134 AUTHDBG("root image version matches kernel");
1135 }
1136 }
1137 #endif
1138
1139 done = TRUE;
1140
1141 out:
1142 zfree(ZV_NAMEI, root_path);
1143 return done;
1144 }
1145
1146 __private_extern__ void
imageboot_setup(imageboot_type_t type)1147 imageboot_setup(imageboot_type_t type)
1148 {
1149 int error = 0;
1150 char *root_path = NULL;
1151
1152 DBG_TRACE("%s: entry\n", __FUNCTION__);
1153
1154 if (rootvnode == NULL) {
1155 panic("imageboot_setup: rootvnode is NULL.");
1156 }
1157
1158 /*
1159 * New boot-arg scheme:
1160 * root-dmg : the dmg that will be the root filesystem, authenticated by default.
1161 * auth-root-dmg : same as root-dmg.
1162 * container-dmg : an optional dmg that contains the root-dmg.
1163 * locker : the locker that will be the root filesystem -- mutually
1164 * exclusive with any other boot-arg.
1165 */
1166 if (imageboot_setup_new(type)) {
1167 return;
1168 }
1169
1170 root_path = zalloc(ZV_NAMEI);
1171 assert(root_path != NULL);
1172
1173 /*
1174 * Look for outermost disk image to root from. If we're doing a nested boot,
1175 * there's some sense in which the outer image never needs to be the root filesystem,
1176 * but it does need very similar treatment: it must not be unmounted, needs a fake
1177 * device vnode created for it, and should not show up in getfsstat() until exposed
1178 * with MNT_IMGSRC. We just make it the temporary root.
1179 */
1180 #if CONFIG_IMAGEBOOT_IMG4
1181 if (PE_parse_boot_argn("arp0", root_path, MAXPATHLEN)) {
1182 size_t pathsz;
1183 char *path = url_to_path(root_path, &pathsz);
1184
1185 assert(path);
1186
1187 if (authenticate_root_with_img4(path)) {
1188 panic("Root image %s does not match the manifest", root_path);
1189 }
1190 if (path != root_path) {
1191 kfree_data(path, pathsz);
1192 }
1193 } else
1194 #endif /* CONFIG_IMAGEBOOT_IMG4 */
1195 if ((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) &&
1196 (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) {
1197 panic("%s: no valid path to image.", __FUNCTION__);
1198 }
1199
1200 DBG_TRACE("%s: root image url is %s\n", __FUNCTION__, root_path);
1201
1202 error = imageboot_mount_image(root_path, 0, type);
1203 if (error) {
1204 panic("Failed on first stage of imageboot.");
1205 }
1206
1207 /*
1208 * See if we are rooting from a nested image
1209 */
1210 if (PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) {
1211 goto done;
1212 }
1213
1214 printf("%s: second level root image url is %s\n", __FUNCTION__, root_path);
1215
1216 /*
1217 * If we fail to set up second image, it's not a given that we
1218 * can safely root off the first.
1219 */
1220 error = imageboot_mount_image(root_path, 1, type);
1221 if (error) {
1222 panic("Failed on second stage of imageboot.");
1223 }
1224
1225 done:
1226 zfree(ZV_NAMEI, root_path);
1227
1228 DBG_TRACE("%s: exit\n", __FUNCTION__);
1229
1230 return;
1231 }
1232