1 /*
2 * Copyright (c) 2006-2020 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/systm.h>
34 #include <sys/mount_internal.h>
35 #include <sys/fsctl.h>
36 #include <sys/filedesc.h>
37 #include <sys/vnode_internal.h>
38 #include <sys/imageboot.h>
39 #include <kern/assert.h>
40
41 #include <sys/namei.h>
42 #include <sys/fcntl.h>
43 #include <sys/vnode.h>
44 #include <sys/xattr.h>
45 #include <sys/sysproto.h>
46 #include <sys/csr.h>
47 #include <miscfs/devfs/devfsdefs.h>
48 #include <libkern/crypto/sha2.h>
49 #include <libkern/crypto/rsa.h>
50 #include <libkern/OSKextLibPrivate.h>
51 #include <sys/ubc_internal.h>
52
53 #if CONFIG_IMAGEBOOT_IMG4
54 #include <libkern/img4/interface.h>
55 #include <img4/firmware.h>
56 #endif
57
58 #include <kern/kalloc.h>
59 #include <os/overflow.h>
60 #include <vm/vm_kern.h>
61
62 #include <pexpert/pexpert.h>
63 #include <kern/chunklist.h>
64
65 extern int (*mountroot)(void);
66 extern char rootdevice[DEVMAXNAMESIZE];
67
68 #define DEBUG_IMAGEBOOT 0
69
70 #if DEBUG_IMAGEBOOT
71 #define DBG_TRACE(...) printf("imageboot: " __VA_ARGS__)
72 #else
73 #define DBG_TRACE(...) do {} while(0)
74 #endif
75
76 #define AUTHDBG(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
77 #define AUTHPRNT(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
78
79 extern int di_root_image_ext(const char *path, char *devname, size_t devsz, dev_t *dev_p, bool removable);
80 extern int di_root_image(const char *path, char *devname, size_t devsz, dev_t *dev_p);
81 extern int di_root_ramfile_buf(void *buf, size_t bufsz, char *devname, size_t devsz, dev_t *dev_p);
82
83 static boolean_t imageboot_setup_new(imageboot_type_t type);
84
85 void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size);
86
87 extern lck_rw_t rootvnode_rw_lock;
88
89 #define kIBFilePrefix "file://"
90
91 __private_extern__ int
imageboot_format_is_valid(const char * root_path)92 imageboot_format_is_valid(const char *root_path)
93 {
94 return strncmp(root_path, kIBFilePrefix,
95 strlen(kIBFilePrefix)) == 0;
96 }
97
98 static void
vnode_get_and_drop_always(vnode_t vp)99 vnode_get_and_drop_always(vnode_t vp)
100 {
101 vnode_getalways(vp);
102 vnode_rele(vp);
103 vnode_put(vp);
104 }
105
106 __private_extern__ bool
imageboot_desired(void)107 imageboot_desired(void)
108 {
109 bool do_imageboot = false;
110
111 char *root_path = NULL;
112 root_path = zalloc(ZV_NAMEI);
113 /*
114 * Check for first layer DMG rooting.
115 *
116 * Note that here we are principally concerned with whether or not we
117 * SHOULD try to imageboot, not whether or not we are going to be able to.
118 *
119 * If NONE of the boot-args are present, then assume that image-rooting
120 * is not requested.
121 *
122 * [!! Note parens guard the entire logically OR'd set of statements, below. It validates
123 * that NONE of the below-mentioned boot-args is present...!!]
124 */
125 if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) ||
126 #if CONFIG_IMAGEBOOT_IMG4
127 PE_parse_boot_argn("arp0", root_path, MAXPATHLEN) ||
128 #endif
129 PE_parse_boot_argn("rp", root_path, MAXPATHLEN) ||
130 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) ||
131 PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN))) {
132 /* explicitly set to false */
133 do_imageboot = false;
134 } else {
135 /* now sanity check the file-path format */
136 if (imageboot_format_is_valid(root_path)) {
137 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
138 /* root_path looks good and we have one of the aforementioned bootargs */
139 do_imageboot = true;
140 } else {
141 /* explicitly set to false */
142 do_imageboot = false;
143 }
144 }
145
146 zfree(ZV_NAMEI, root_path);
147 return do_imageboot;
148 }
149
150 __private_extern__ imageboot_type_t
imageboot_needed(void)151 imageboot_needed(void)
152 {
153 imageboot_type_t result = IMAGEBOOT_NONE;
154 char *root_path = NULL;
155
156 DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__);
157
158 if (!imageboot_desired()) {
159 goto out;
160 }
161
162 root_path = zalloc(ZV_NAMEI);
163 result = IMAGEBOOT_DMG;
164
165 /* Check for second layer */
166 if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) ||
167 PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) {
168 goto out;
169 }
170
171 /* Sanity-check second layer */
172 if (imageboot_format_is_valid(root_path)) {
173 DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
174 } else {
175 panic("%s: Invalid URL scheme for %s",
176 __FUNCTION__, root_path);
177 }
178
179 out:
180 if (root_path != NULL) {
181 zfree(ZV_NAMEI, root_path);
182 }
183 return result;
184 }
185
186 extern bool IOBaseSystemARVRootHashAvailable(void);
187
188
189 /*
190 * Mounts new filesystem based on image path, and pivots it to the root.
191 * The image to be mounted is located at image_path.
192 * It will be mounted at mount_path.
193 * The vfs_switch_root operation will be performed.
194 * After the pivot, the outgoing root filesystem (the filesystem at root when
195 * this function begins) will be at outgoing_root_path. If `skip_signature_check` is true,
196 * then ignore the chunklisted or authAPFS checks on this image
197 */
198 __private_extern__ int
imageboot_pivot_image(const char * image_path,imageboot_type_t type,const char * mount_path,const char * outgoing_root_path,const bool rooted_dmg,const bool skip_signature_check)199 imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
200 const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check)
201 {
202 int error;
203 boolean_t authenticated_dmg_chunklist = false;
204 vnode_t mount_vp = NULLVP;
205 errno_t rootauth;
206
207
208 if (type != IMAGEBOOT_DMG) {
209 panic("not supported");
210 }
211
212 /*
213 * Check that the image file actually exists.
214 * We also need to find the mount it's on, to mark it as backing the
215 * root.
216 */
217 vnode_t imagevp = NULLVP;
218 error = vnode_lookup(image_path, 0, &imagevp, vfs_context_kernel());
219 if (error) {
220 printf("%s: image file not found or couldn't be read: %d\n", __FUNCTION__, error);
221 /*
222 * bail out here to short-circuit out of panic logic below.
223 * Failure to find the pivot-image should not be a fatal condition (ENOENT)
224 * since it may result in natural consequences (ergo, cannot unlock filevault prompt).
225 */
226 return error;
227 }
228
229 /*
230 * load the disk image and obtain its device.
231 * di_root_image's name and the names of its arguments suggest it has
232 * to be mounted at the root, but that's not actually needed.
233 * We just need to obtain the device info.
234 */
235
236 dev_t dev;
237 char devname[DEVMAXNAMESIZE];
238
239 error = di_root_image_ext(image_path, devname, DEVMAXNAMESIZE, &dev, true);
240 if (error) {
241 panic("%s: di_root_image failed: %d", __FUNCTION__, error);
242 }
243
244 printf("%s: attached disk image %s as %s\n", __FUNCTION__, image_path, devname);
245
246
247 #if CONFIG_IMAGEBOOT_CHUNKLIST
248 if ((rooted_dmg == false) && !IOBaseSystemARVRootHashAvailable()) {
249 error = authenticate_root_with_chunklist(image_path, NULL);
250 if (error == 0) {
251 printf("authenticated root-dmg via chunklist...\n");
252 authenticated_dmg_chunklist = true;
253 } else {
254 /* root hash was not available, and image is NOT chunklisted? */
255 printf("failed to chunklist-authenticate root-dmg @ %s\n", image_path);
256 }
257 }
258 #endif
259
260 char fulldevname[DEVMAXNAMESIZE + 5]; // "/dev/"
261 strlcpy(fulldevname, "/dev/", sizeof(fulldevname));
262 strlcat(fulldevname, devname, sizeof(fulldevname));
263
264 /*
265 * mount expects another layer of indirection (because it expects to
266 * be getting a user_addr_t of a char *.
267 * Make a pointer-to-pointer on our stack. It won't use this
268 * address after it returns so this should be safe.
269 */
270 char *fulldevnamep = &(fulldevname[0]);
271 char **fulldevnamepp = &fulldevnamep;
272
273 #define PIVOTMNT "/System/Volumes/BaseSystem"
274
275
276 /* Attempt to mount as HFS; if it fails, then try as APFS */
277 printf("%s: attempting to mount as hfs...\n", __FUNCTION__);
278 error = kernel_mount("hfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
279 if (error) {
280 printf("mount failed: %d\n", error);
281 printf("%s: attempting to mount as apfs...\n", __FUNCTION__);
282 error = kernel_mount("apfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
283 }
284
285 /* If we didn't mount as either HFS or APFS, then bail out */
286 if (error) {
287 /*
288 * Note that for this particular failure case (failure to mount), the disk image
289 * being attached may have failed to quiesce within the alloted time out (20-30 sec).
290 * For example, it may be still probing, or APFS container enumeration may have not
291 * completed. If so, then we may have fallen into this particular error case. However,
292 * failure to complete matching should be an exceptional case as 30 sec. is quite a
293 * long time to wait for matching to complete (which would have occurred in
294 * di_root_image_ext).
295 */
296 #if defined(__arm64__) && XNU_TARGET_OS_OSX
297 panic("%s: failed to mount pivot image(%d)!", __FUNCTION__, error);
298 #endif
299 printf("%s: failed to mount pivot image(%d) !", __FUNCTION__, error);
300 goto done;
301 }
302
303 /* otherwise, if the mount succeeded, then assert that the DMG is authenticated (either chunklist or authapfs) */
304 error = vnode_lookup(PIVOTMNT, 0, &mount_vp, vfs_context_kernel());
305 if (error) {
306 #if defined(__arm64__) && XNU_TARGET_OS_OSX
307 panic("%s: failed to lookup pivot root (%d) !", __FUNCTION__, error);
308 #endif
309 printf("%s: failed to lookup pivot root (%d)!", __FUNCTION__, error);
310 goto done;
311 }
312
313 /* the 0x1 implies base system */
314 rootauth = VNOP_IOCTL(mount_vp, FSIOC_KERNEL_ROOTAUTH, (caddr_t)0x1, 0, vfs_context_kernel());
315 if (rootauth) {
316 printf("BS-DMG failed to authenticate intra-FS \n");
317 /*
318 * If we are using a custom rooted DMG, or if we have already authenticated
319 * the DMG via chunklist, then it is permissible to use.
320 * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant.
321 */
322 if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) {
323 rootauth = 0;
324 }
325 error = rootauth;
326 }
327 vnode_put(mount_vp);
328 mount_vp = NULLVP;
329
330 if (error) {
331 /*
332 * Failure here exclusively means that the mount failed to authenticate.
333 * This means that the disk image either was not sealed (authapfs), or it was
334 * not hosted on a chunklisted DMG. Both scenarios may be fatal depending
335 * on the platform.
336 */
337 #if defined(__arm64__) && XNU_TARGET_OS_OSX
338 panic("%s: could not authenticate the pivot image: %d. giving up.", __FUNCTION__, error);
339 #endif
340 printf("%s: could not authenticate the pivot image: %d. giving up.\n", __FUNCTION__, error);
341 goto done;
342 }
343
344 if (rootvnode) {
345 mount_t root_mp = vnode_mount(rootvnode);
346 if (root_mp && (root_mp->mnt_kern_flag & MNTK_SSD)) {
347 rootvp_is_ssd = true;
348 }
349 }
350 /*
351 * pivot the incoming and outgoing filesystems
352 */
353 error = vfs_switch_root(mount_path, outgoing_root_path, 0);
354 if (error) {
355 panic("%s: vfs_switch_root failed: %d", __FUNCTION__, error);
356 }
357
358 /*
359 * Mark the filesystem containing the image as backing root, so it
360 * won't be unmountable.
361 *
362 * vfs_switch_root() clears this flag, so we have to set it after
363 * the pivot call.
364 * If the system later pivots out of the image, vfs_switch_root
365 * will clear it again, so the backing filesystem can be unmounted.
366 */
367 mount_t imagemp = imagevp->v_mount;
368 lck_rw_lock_exclusive(&imagemp->mnt_rwlock);
369 imagemp->mnt_kern_flag |= MNTK_BACKS_ROOT;
370 lck_rw_done(&imagemp->mnt_rwlock);
371
372 error = 0;
373
374 /*
375 * Note that we do NOT change kern.bootuuid here -
376 * imageboot_mount_image() does, but imageboot_pivot_image() doesn't.
377 * imageboot_mount_image() is used when the root volume uuid was
378 * "always supposed to be" the one inside the dmg. imageboot_pivot_
379 * image() is used when the true root volume just needs to be
380 * obscured for a moment by the dmg.
381 */
382
383 done:
384 if (imagevp != NULLVP) {
385 vnode_put(imagevp);
386 }
387 return error;
388 }
389
390 /* kern_sysctl.c */
391 extern uuid_string_t fake_bootuuid;
392
393 static void
set_fake_bootuuid(mount_t mp)394 set_fake_bootuuid(mount_t mp)
395 {
396 struct vfs_attr va;
397 VFSATTR_INIT(&va);
398 VFSATTR_WANTED(&va, f_uuid);
399
400 if (vfs_getattr(mp, &va, vfs_context_current()) != 0) {
401 return;
402 }
403
404 if (!VFSATTR_IS_SUPPORTED(&va, f_uuid)) {
405 return;
406 }
407
408 uuid_unparse(va.f_uuid, fake_bootuuid);
409 }
410
411 /*
412 * Swaps in new root filesystem based on image path.
413 * Current root filesystem is removed from mount list and
414 * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and
415 * "rootvnode" is reset. Root vnode of currentroot filesystem
416 * is returned with usecount (no iocount).
417 * kern.bootuuid is arranged to return the UUID of the mounted image. (If
418 * we did nothing here, it would be the UUID of the image source volume.)
419 */
420 __private_extern__ int
imageboot_mount_image(const char * root_path,int height,imageboot_type_t type)421 imageboot_mount_image(const char *root_path, int height, imageboot_type_t type)
422 {
423 dev_t dev;
424 int error;
425 /*
426 * Need to stash this here since we may do a kernel_mount() on /, which will
427 * automatically update the rootvnode global. Note that vfs_mountroot() does
428 * not update that global, which is a bit weird.
429 */
430 vnode_t old_rootvnode = rootvnode;
431 vnode_t newdp;
432 mount_t new_rootfs;
433 boolean_t update_rootvnode = FALSE;
434
435 if (type == IMAGEBOOT_DMG) {
436 error = di_root_image(root_path, rootdevice, DEVMAXNAMESIZE, &dev);
437 if (error) {
438 panic("%s: di_root_image failed: %d", __FUNCTION__, error);
439 }
440
441 rootdev = dev;
442 mountroot = NULL;
443 printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
444 error = vfs_mountroot();
445 if (error != 0) {
446 panic("vfs_mountroot() failed.");
447 }
448
449 update_rootvnode = TRUE;
450 } else {
451 panic("invalid imageboot type: %d", type);
452 }
453
454 /*
455 * Get the vnode for '/'.
456 * Set fdp->fd_fd.fd_cdir to reference it.
457 */
458 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
459 panic("%s: cannot find root vnode", __FUNCTION__);
460 }
461 DBG_TRACE("%s: old root fsname: %s\n", __FUNCTION__, old_rootvnode->v_mount->mnt_vtable->vfc_name);
462
463 if (old_rootvnode != NULL) {
464 /* remember the old rootvnode, but remove it from mountlist */
465 mount_t old_rootfs = old_rootvnode->v_mount;
466
467 mount_list_remove(old_rootfs);
468 mount_lock(old_rootfs);
469 old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
470 old_rootfs->mnt_flag &= ~MNT_ROOTFS;
471 mount_unlock(old_rootfs);
472 }
473
474 vnode_ref(newdp);
475 vnode_put(newdp);
476
477 lck_rw_lock_exclusive(&rootvnode_rw_lock);
478 /* switch to the new rootvnode */
479 if (update_rootvnode) {
480 rootvnode = newdp;
481 set_fake_bootuuid(rootvnode->v_mount);
482 }
483
484 new_rootfs = rootvnode->v_mount;
485 mount_lock(new_rootfs);
486 new_rootfs->mnt_flag |= MNT_ROOTFS;
487 mount_unlock(new_rootfs);
488
489 kernproc->p_fd.fd_cdir = newdp;
490 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
491
492 DBG_TRACE("%s: root switched\n", __FUNCTION__);
493
494 if (old_rootvnode != NULL) {
495 #ifdef CONFIG_IMGSRC_ACCESS
496 if (height >= 0) {
497 imgsrc_rootvnodes[height] = old_rootvnode;
498 } else {
499 vnode_get_and_drop_always(old_rootvnode);
500 }
501 #else
502 #pragma unused(height)
503 vnode_get_and_drop_always(old_rootvnode);
504 #endif /* CONFIG_IMGSRC_ACCESS */
505 }
506 return 0;
507 }
508
509 /*
510 * Return a memory object for given file path.
511 * Also returns a vnode reference for the given file path.
512 */
513 void *
ubc_getobject_from_filename(const char * filename,struct vnode ** vpp,off_t * file_size)514 ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size)
515 {
516 int err = 0;
517 struct nameidata ndp = {};
518 struct vnode *vp = NULL;
519 off_t fsize = 0;
520 vfs_context_t ctx = vfs_context_kernel();
521 void *control = NULL;
522
523 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(filename), ctx);
524 if ((err = namei(&ndp)) != 0) {
525 goto errorout;
526 }
527 nameidone(&ndp);
528 vp = ndp.ni_vp;
529
530 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
531 goto errorout;
532 }
533
534 if (fsize < 0) {
535 goto errorout;
536 }
537
538 control = ubc_getobject(vp, UBC_FLAGS_NONE);
539 if (control == NULL) {
540 goto errorout;
541 }
542
543 *file_size = fsize;
544 *vpp = vp;
545 vp = NULL;
546
547 errorout:
548 if (vp) {
549 vnode_put(vp);
550 }
551 return control;
552 }
553
554 static int
imageboot_read_file_internal(const char * path,const off_t offset,const bool pageable,void ** bufp,size_t * bufszp,off_t * fsizep)555 imageboot_read_file_internal(const char *path, const off_t offset, const bool pageable, void **bufp, size_t *bufszp, off_t *fsizep)
556 {
557 int err = 0;
558 struct nameidata ndp = {};
559 struct vnode *vp = NULL;
560 struct vnode *rsrc_vp = NULL;
561 char *readbuf = NULL;
562 off_t readsize = 0;
563 off_t readoff = 0;
564 off_t fsize = 0;
565 size_t maxsize = 0;
566 char *buf = NULL;
567 bool doclose = false;
568
569 vfs_context_t ctx = vfs_context_kernel();
570 proc_t p = vfs_context_proc(ctx);
571 kauth_cred_t kerncred = vfs_context_ucred(ctx);
572
573 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF | FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
574 if ((err = namei(&ndp)) != 0) {
575 AUTHPRNT("namei failed (%s) - %d", path, err);
576 goto out;
577 }
578 nameidone(&ndp);
579 vp = ndp.ni_vp;
580
581 if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
582 AUTHPRNT("failed to get vnode size of %s - %d", path, err);
583 goto out;
584 }
585 if (fsize < 0) {
586 panic("negative file size");
587 }
588 if (offset < 0) {
589 AUTHPRNT("negative file offset");
590 err = EINVAL;
591 goto out;
592 }
593
594 if (fsizep) {
595 *fsizep = fsize;
596 }
597
598 if ((err = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
599 AUTHPRNT("failed to open %s - %d", path, err);
600 goto out;
601 }
602 doclose = true;
603
604 /* cap fsize to the amount that remains after offset */
605 if (os_sub_overflow(fsize, offset, &fsize)) {
606 fsize = 0;
607 } else if (fsize < 0) {
608 fsize = 0;
609 }
610
611 /* if bufsz is non-zero, cap the read at bufsz bytes */
612 maxsize = *bufszp;
613 if (maxsize && (maxsize < (size_t)fsize)) {
614 fsize = maxsize;
615 }
616
617 /* if fsize is larger than 2 GiB (or a configured limit), fail */
618 maxsize = INT_MAX;
619 PE_parse_boot_argn("rootdmg-maxsize", &maxsize, sizeof(maxsize));
620 if (maxsize && (maxsize < (size_t)fsize)) {
621 AUTHPRNT("file is too large (%lld > %lld)", (long long) fsize, (long long) maxsize);
622 err = ENOMEM;
623 goto out;
624 }
625
626 if (pageable) {
627 vm_offset_t addr = 0;
628 if (kmem_alloc(kernel_map, &addr, (vm_size_t)fsize,
629 KMA_PAGEABLE | KMA_DATA, VM_KERN_MEMORY_FILE) == KERN_SUCCESS) {
630 buf = (char *)addr;
631 } else {
632 buf = NULL;
633 }
634 } else {
635 buf = (char *)kalloc_data((vm_size_t)fsize, Z_WAITOK);
636 }
637 if (buf == NULL) {
638 err = ENOMEM;
639 goto out;
640 }
641
642 #if NAMEDSTREAMS
643 /* find resource fork so we can evict cached decmpfs data */
644 if (VNOP_GETNAMEDSTREAM(vp, &rsrc_vp, XATTR_RESOURCEFORK_NAME, NS_OPEN, /*flags*/ 0, ctx) == 0) {
645 vnode_ref(rsrc_vp);
646 vnode_put(rsrc_vp);
647 AUTHDBG("Found resource fork for %s", path);
648 }
649 #endif
650
651 /* read data in chunks to handle (fsize > INT_MAX) */
652 readbuf = buf;
653 readsize = fsize;
654 readoff = offset;
655 while (readsize > 0) {
656 const off_t chunksize_max = 16 * 1024 * 1024; /* 16 MiB */
657 const off_t chunksize = MIN(readsize, chunksize_max);
658
659 /* read next chunk, pass IO_NOCACHE to clarify our intent (even if ignored) */
660 if ((err = vn_rdwr(UIO_READ, vp, (caddr_t)readbuf, (int)chunksize, readoff, UIO_SYSSPACE, IO_NODELOCKED | IO_NOCACHE | IO_RAOFF, kerncred, /*resid*/ NULL, p)) != 0) {
661 AUTHPRNT("Cannot read %lld bytes at offset %lld from %s - %d", (long long)chunksize, (long long)readoff, path, err);
662 goto out;
663 }
664
665 /* evict cached pages so they don't accumulate during early boot */
666 ubc_msync(vp, readoff, readoff + chunksize, NULL, UBC_INVALIDATE | UBC_PUSHALL);
667
668 /* evict potentially-cached decmpfs data if we have a resource fork */
669 if (rsrc_vp != NULL) {
670 if (vnode_getwithref(rsrc_vp) == 0) {
671 ubc_msync(rsrc_vp, 0, ubc_getsize(rsrc_vp), NULL, UBC_INVALIDATE | UBC_PUSHALL);
672 vnode_put(rsrc_vp);
673 }
674 }
675
676 readbuf = &readbuf[chunksize];
677 readsize -= chunksize;
678 readoff += chunksize;
679 }
680
681 out:
682 if (doclose) {
683 VNOP_CLOSE(vp, FREAD, ctx);
684 }
685 if (rsrc_vp) {
686 vnode_rele(rsrc_vp);
687 rsrc_vp = NULL;
688 }
689 if (vp) {
690 vnode_put(vp);
691 vp = NULL;
692 }
693
694 if (err) {
695 if (buf == NULL) {
696 /* nothing to free */
697 } else if (pageable) {
698 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)fsize);
699 } else {
700 kfree_data(buf, (vm_size_t)fsize);
701 }
702 } else {
703 *bufp = buf;
704 *bufszp = (size_t)fsize;
705 }
706
707 return err;
708 }
709
710 int
imageboot_read_file_pageable(const char * path,void ** bufp,size_t * bufszp)711 imageboot_read_file_pageable(const char *path, void **bufp, size_t *bufszp)
712 {
713 return imageboot_read_file_internal(path, 0, true, bufp, bufszp, NULL);
714 }
715
716 int
imageboot_read_file_from_offset(const char * path,const off_t offset,void ** bufp,size_t * bufszp)717 imageboot_read_file_from_offset(const char *path, const off_t offset, void **bufp, size_t *bufszp)
718 {
719 return imageboot_read_file_internal(path, offset, false, bufp, bufszp, NULL);
720 }
721
722 int
imageboot_read_file(const char * path,void ** bufp,size_t * bufszp,off_t * fsizep)723 imageboot_read_file(const char *path, void **bufp, size_t *bufszp, off_t *fsizep)
724 {
725 return imageboot_read_file_internal(path, 0, false, bufp, bufszp, fsizep);
726 }
727
728 #if CONFIG_IMAGEBOOT_IMG4 || CONFIG_IMAGEBOOT_CHUNKLIST
729 vnode_t
imgboot_get_image_file(const char * path,off_t * fsize,int * errp)730 imgboot_get_image_file(const char *path, off_t *fsize, int *errp)
731 {
732 struct nameidata ndp = {};
733 vnode_t vp = NULL;
734 vfs_context_t ctx = vfs_context_kernel();
735 int err;
736
737 NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
738 if ((err = namei(&ndp)) != 0) {
739 AUTHPRNT("Cannot find %s - error %d", path, err);
740 } else {
741 nameidone(&ndp);
742 vp = ndp.ni_vp;
743
744 if (vp->v_type != VREG) {
745 err = EINVAL;
746 AUTHPRNT("%s it not a regular file", path);
747 } else if (fsize) {
748 if ((err = vnode_size(vp, fsize, ctx)) != 0) {
749 AUTHPRNT("Cannot get file size of %s - error %d", path, err);
750 }
751 }
752 }
753
754 if (err) {
755 if (vp) {
756 vnode_put(vp);
757 }
758 *errp = err;
759 vp = NULL;
760 }
761 return vp;
762 }
763 #endif /* CONFIG_IMAGEBOOT_CHUNKLIST || CONFIG_IMAGEBOOT_CHUNKLIST */
764
765 #if CONFIG_IMAGEBOOT_IMG4
766
767 #define APTICKET_NAME "apticket.der"
768
769 static char *
imgboot_get_apticket_path(const char * rootpath,size_t * sz)770 imgboot_get_apticket_path(const char *rootpath, size_t *sz)
771 {
772 size_t plen = strlen(rootpath) + sizeof(APTICKET_NAME) + 1;
773 char *path = (char *)kalloc_data(plen, Z_WAITOK);
774
775 if (path) {
776 char *slash;
777
778 strlcpy(path, rootpath, plen);
779 slash = strrchr(path, '/');
780 if (slash == NULL) {
781 slash = path;
782 } else {
783 slash++;
784 }
785 strlcpy(slash, APTICKET_NAME, sizeof(APTICKET_NAME) + 1);
786 }
787
788 *sz = plen;
789 return path;
790 }
791
792 static int
authenticate_root_with_img4(const char * rootpath)793 authenticate_root_with_img4(const char *rootpath)
794 {
795 errno_t rv;
796 vnode_t vp;
797 size_t ticket_pathsz = 0;
798 char *ticket_path;
799 img4_buff_t tck = IMG4_BUFF_INIT;
800 img4_firmware_execution_context_t exec = {
801 .i4fex_version = IMG4_FIRMWARE_EXECUTION_CONTEXT_STRUCT_VERSION,
802 .i4fex_execute = NULL,
803 .i4fex_context = NULL,
804 };
805 img4_firmware_t fw = NULL;
806 img4_firmware_flags_t fw_flags = IMG4_FIRMWARE_FLAG_BARE |
807 IMG4_FIRMWARE_FLAG_SUBSEQUENT_STAGE;
808
809 DBG_TRACE("Check %s\n", rootpath);
810
811 if (img4if == NULL) {
812 AUTHPRNT("AppleImage4 is not ready");
813 return EAGAIN;
814 }
815
816 ticket_path = imgboot_get_apticket_path(rootpath, &ticket_pathsz);
817 if (ticket_path == NULL) {
818 AUTHPRNT("Cannot construct ticket path - out of memory");
819 return ENOMEM;
820 }
821
822 rv = imageboot_read_file(ticket_path, (void **)&tck.i4b_bytes, &tck.i4b_len, NULL);
823 if (rv) {
824 AUTHPRNT("Cannot get a ticket from %s - %d\n", ticket_path, rv);
825 goto out_with_ticket_path;
826 }
827
828 DBG_TRACE("Got %lu bytes of manifest from %s\n", tck.i4b_len, ticket_path);
829
830 vp = imgboot_get_image_file(rootpath, NULL, &rv);
831 if (vp == NULL) {
832 /* Error message had been printed already */
833 rv = EIO;
834 goto out_with_ticket_bytes;
835 }
836
837 fw = img4_firmware_new_from_vnode_4xnu(IMG4_RUNTIME_DEFAULT, &exec, 'rosi',
838 vp, fw_flags);
839 if (!fw) {
840 AUTHPRNT("Could not allocate new firmware");
841 rv = ENOMEM;
842 goto out_with_ticket_bytes;
843 }
844
845 img4_firmware_attach_manifest(fw, &tck);
846 rv = img4_firmware_evaluate(fw, img4_chip_select_personalized_ap(), NULL);
847
848 out_with_ticket_bytes:
849 kfree_data(tck.i4b_bytes, tck.i4b_len);
850 out_with_ticket_path:
851 kfree_data(ticket_path, ticket_pathsz);
852
853 img4_firmware_destroy(&fw);
854 return rv;
855 }
856 #endif /* CONFIG_IMAGEBOOT_IMG4 */
857
858
859 /*
860 * Attach the image at 'path' as a ramdisk and mount it as our new rootfs.
861 * All existing mounts are first umounted.
862 */
863 static int
imageboot_mount_ramdisk(const char * path)864 imageboot_mount_ramdisk(const char *path)
865 {
866 int err = 0;
867 size_t bufsz = 0;
868 void *buf = NULL;
869 dev_t dev;
870 vnode_t newdp;
871 vnode_t tvp;
872 mount_t new_rootfs;
873
874 /* Read our target image from disk */
875 err = imageboot_read_file_pageable(path, &buf, &bufsz);
876 if (err) {
877 printf("%s: failed: imageboot_read_file_pageable() = %d\n", __func__, err);
878 goto out;
879 }
880 DBG_TRACE("%s: read '%s' sz = %lu\n", __func__, path, bufsz);
881
882 #if CONFIG_IMGSRC_ACCESS
883 /* Re-add all root mounts to the mount list in the correct order... */
884 mount_list_remove(rootvnode->v_mount);
885 for (int i = 0; i < MAX_IMAGEBOOT_NESTING; i++) {
886 struct vnode *vn = imgsrc_rootvnodes[i];
887 if (vn) {
888 vnode_getalways(vn);
889 imgsrc_rootvnodes[i] = NULLVP;
890
891 mount_t mnt = vn->v_mount;
892 mount_lock(mnt);
893 mnt->mnt_flag |= MNT_ROOTFS;
894 mount_list_add(mnt);
895 mount_unlock(mnt);
896
897 vnode_rele(vn);
898 vnode_put(vn);
899 }
900 }
901 mount_list_add(rootvnode->v_mount);
902 #endif
903
904 /* ... and unmount everything */
905 vfs_unmountall(FALSE);
906
907 lck_rw_lock_exclusive(&rootvnode_rw_lock);
908 kernproc->p_fd.fd_cdir = NULL;
909 tvp = rootvnode;
910 rootvnode = NULL;
911 rootvp = NULLVP;
912 rootdev = NODEV;
913 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
914 vnode_get_and_drop_always(tvp);
915
916 /* Attach the ramfs image ... */
917 err = di_root_ramfile_buf(buf, bufsz, rootdevice, DEVMAXNAMESIZE, &dev);
918 if (err) {
919 printf("%s: failed: di_root_ramfile_buf() = %d\n", __func__, err);
920 goto out;
921 }
922
923 /* ... and mount it */
924 rootdev = dev;
925 mountroot = NULL;
926 err = vfs_mountroot();
927 if (err) {
928 printf("%s: failed: vfs_mountroot() = %d\n", __func__, err);
929 goto out;
930 }
931
932 /* Switch to new root vnode */
933 if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
934 panic("%s: cannot find root vnode", __func__);
935 }
936 vnode_ref(newdp);
937
938 lck_rw_lock_exclusive(&rootvnode_rw_lock);
939 rootvnode = newdp;
940 rootvnode->v_flag |= VROOT;
941 new_rootfs = rootvnode->v_mount;
942 mount_lock(new_rootfs);
943 new_rootfs->mnt_flag |= MNT_ROOTFS;
944 mount_unlock(new_rootfs);
945
946 set_fake_bootuuid(new_rootfs);
947
948 kernproc->p_fd.fd_cdir = newdp;
949 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
950
951 vnode_put(newdp);
952
953 DBG_TRACE("%s: root switched\n", __func__);
954
955 out:
956 if (err && (buf != NULL)) {
957 kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
958 }
959 return err;
960 }
961
962 /*
963 * If the path is in <file://> URL format then we allocate memory and decode it,
964 * otherwise return the same pointer.
965 *
966 * Caller is expected to check if the pointers are different.
967 */
968 static char *
url_to_path(char * url_path,size_t * sz)969 url_to_path(char *url_path, size_t *sz)
970 {
971 char *path = url_path;
972 size_t len = strlen(kIBFilePrefix);
973
974 if (strncmp(kIBFilePrefix, url_path, len) == 0) {
975 /* its a URL - remove the file:// prefix and percent-decode */
976 url_path += len;
977
978 len = strlen(url_path);
979 if (len) {
980 /* Make a copy of the path to URL-decode */
981 path = (char *)kalloc_data(len + 1, Z_WAITOK);
982 if (path == NULL) {
983 panic("imageboot path allocation failed - cannot allocate %d bytes", (int)len);
984 }
985
986 strlcpy(path, url_path, len + 1);
987 *sz = len + 1;
988 url_decode(path);
989 } else {
990 panic("Bogus imageboot path URL - missing path");
991 }
992
993 DBG_TRACE("%s: root image URL <%s> becomes %s\n", __func__, url_path, path);
994 }
995
996 return path;
997 }
998
999 static boolean_t
imageboot_setup_new(imageboot_type_t type)1000 imageboot_setup_new(imageboot_type_t type)
1001 {
1002 int error;
1003 char *root_path = NULL;
1004 int height = 0;
1005 boolean_t done = FALSE;
1006 boolean_t auth_root = TRUE;
1007 boolean_t ramdisk_root = FALSE;
1008
1009 root_path = zalloc(ZV_NAMEI);
1010 assert(root_path != NULL);
1011
1012 unsigned imgboot_arg;
1013 if (PE_parse_boot_argn("-rootdmg-ramdisk", &imgboot_arg, sizeof(imgboot_arg))) {
1014 ramdisk_root = TRUE;
1015 }
1016
1017 if (PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) {
1018 printf("%s: container image url is %s\n", __FUNCTION__, root_path);
1019 error = imageboot_mount_image(root_path, height, type);
1020 if (error != 0) {
1021 panic("Failed to mount container image.");
1022 }
1023
1024 height++;
1025 }
1026
1027 if (PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN) == FALSE &&
1028 PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) {
1029 if (height > 0) {
1030 panic("%s specified without %s or %s?", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_AUTHROOT_ARG, IMAGEBOOT_ROOT_ARG);
1031 }
1032 goto out;
1033 }
1034
1035 printf("%s: root image URL is '%s'\n", __func__, root_path);
1036
1037 /* Make a copy of the path to URL-decode */
1038 size_t pathsz;
1039 char *path = url_to_path(root_path, &pathsz);
1040 assert(path);
1041
1042 #if CONFIG_IMAGEBOOT_CHUNKLIST
1043 if (auth_root) {
1044 /*
1045 * This updates auth_root to reflect whether chunklist was
1046 * actually enforced. In effect, this clears auth_root if
1047 * CSR_ALLOW_ANY_RECOVERY_OS allowed an invalid image.
1048 */
1049 AUTHDBG("authenticating root image at %s", path);
1050 error = authenticate_root_with_chunklist(path, &auth_root);
1051 if (error) {
1052 panic("root image authentication failed (err = %d)", error);
1053 }
1054 AUTHDBG("successfully authenticated %s", path);
1055 }
1056 #endif
1057
1058 if (ramdisk_root) {
1059 error = imageboot_mount_ramdisk(path);
1060 } else {
1061 error = imageboot_mount_image(root_path, height, type);
1062 }
1063
1064 if (path != root_path) {
1065 kfree_data(path, pathsz);
1066 }
1067
1068 if (error) {
1069 panic("Failed to mount root image (err=%d, auth=%d, ramdisk=%d)",
1070 error, auth_root, ramdisk_root);
1071 }
1072
1073 #if CONFIG_IMAGEBOOT_CHUNKLIST
1074 if (auth_root) {
1075 /* check that the image version matches the running kernel */
1076 AUTHDBG("checking root image version");
1077 error = authenticate_root_version_check();
1078 if (error) {
1079 panic("root image version check failed");
1080 } else {
1081 AUTHDBG("root image version matches kernel");
1082 }
1083 }
1084 #endif
1085
1086 done = TRUE;
1087
1088 out:
1089 zfree(ZV_NAMEI, root_path);
1090 return done;
1091 }
1092
1093 __private_extern__ void
imageboot_setup(imageboot_type_t type)1094 imageboot_setup(imageboot_type_t type)
1095 {
1096 int error = 0;
1097 char *root_path = NULL;
1098
1099 DBG_TRACE("%s: entry\n", __FUNCTION__);
1100
1101 if (rootvnode == NULL) {
1102 panic("imageboot_setup: rootvnode is NULL.");
1103 }
1104
1105 /*
1106 * New boot-arg scheme:
1107 * root-dmg : the dmg that will be the root filesystem, authenticated by default.
1108 * auth-root-dmg : same as root-dmg.
1109 * container-dmg : an optional dmg that contains the root-dmg.
1110 * locker : the locker that will be the root filesystem -- mutually
1111 * exclusive with any other boot-arg.
1112 */
1113 if (imageboot_setup_new(type)) {
1114 return;
1115 }
1116
1117 root_path = zalloc(ZV_NAMEI);
1118 assert(root_path != NULL);
1119
1120 /*
1121 * Look for outermost disk image to root from. If we're doing a nested boot,
1122 * there's some sense in which the outer image never needs to be the root filesystem,
1123 * but it does need very similar treatment: it must not be unmounted, needs a fake
1124 * device vnode created for it, and should not show up in getfsstat() until exposed
1125 * with MNT_IMGSRC. We just make it the temporary root.
1126 */
1127 #if CONFIG_IMAGEBOOT_IMG4
1128 if (PE_parse_boot_argn("arp0", root_path, MAXPATHLEN)) {
1129 size_t pathsz;
1130 char *path = url_to_path(root_path, &pathsz);
1131
1132 assert(path);
1133
1134 if (authenticate_root_with_img4(path)) {
1135 panic("Root image %s does not match the manifest", root_path);
1136 }
1137 if (path != root_path) {
1138 kfree_data(path, pathsz);
1139 }
1140 } else
1141 #endif /* CONFIG_IMAGEBOOT_IMG4 */
1142 if ((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) &&
1143 (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) {
1144 panic("%s: no valid path to image.", __FUNCTION__);
1145 }
1146
1147 DBG_TRACE("%s: root image url is %s\n", __FUNCTION__, root_path);
1148
1149 error = imageboot_mount_image(root_path, 0, type);
1150 if (error) {
1151 panic("Failed on first stage of imageboot.");
1152 }
1153
1154 /*
1155 * See if we are rooting from a nested image
1156 */
1157 if (PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) {
1158 goto done;
1159 }
1160
1161 printf("%s: second level root image url is %s\n", __FUNCTION__, root_path);
1162
1163 /*
1164 * If we fail to set up second image, it's not a given that we
1165 * can safely root off the first.
1166 */
1167 error = imageboot_mount_image(root_path, 1, type);
1168 if (error) {
1169 panic("Failed on second stage of imageboot.");
1170 }
1171
1172 done:
1173 zfree(ZV_NAMEI, root_path);
1174
1175 DBG_TRACE("%s: exit\n", __FUNCTION__);
1176
1177 return;
1178 }
1179