xref: /xnu-10002.41.9/bsd/kern/imageboot.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/systm.h>
34 #include <sys/mount_internal.h>
35 #include <sys/fsctl.h>
36 #include <sys/filedesc.h>
37 #include <sys/vnode_internal.h>
38 #include <sys/imageboot.h>
39 #include <kern/assert.h>
40 
41 #include <sys/namei.h>
42 #include <sys/fcntl.h>
43 #include <sys/vnode.h>
44 #include <sys/xattr.h>
45 #include <sys/sysproto.h>
46 #include <sys/csr.h>
47 #include <miscfs/devfs/devfsdefs.h>
48 #include <libkern/crypto/sha2.h>
49 #include <libkern/crypto/rsa.h>
50 #include <libkern/OSKextLibPrivate.h>
51 #include <sys/ubc_internal.h>
52 
53 #if CONFIG_IMAGEBOOT_IMG4
54 #include <libkern/img4/interface.h>
55 #include <img4/firmware.h>
56 #endif
57 
58 #include <kern/kalloc.h>
59 #include <os/overflow.h>
60 #include <vm/vm_kern.h>
61 
62 #include <pexpert/pexpert.h>
63 #include <kern/chunklist.h>
64 
65 extern int (*mountroot)(void);
66 extern char rootdevice[DEVMAXNAMESIZE];
67 
68 #define DEBUG_IMAGEBOOT 0
69 
70 #if DEBUG_IMAGEBOOT
71 #define DBG_TRACE(...) printf("imageboot: " __VA_ARGS__)
72 #else
73 #define DBG_TRACE(...) do {} while(0)
74 #endif
75 
76 #define AUTHDBG(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
77 #define AUTHPRNT(fmt, args...) do { printf("%s: " fmt "\n", __func__, ##args); } while (0)
78 
79 extern int di_root_image_ext(const char *path, char *devname, size_t devsz, dev_t *dev_p, bool removable);
80 extern int di_root_image(const char *path, char *devname, size_t devsz, dev_t *dev_p);
81 extern int di_root_ramfile_buf(void *buf, size_t bufsz, char *devname, size_t devsz, dev_t *dev_p);
82 
83 static boolean_t imageboot_setup_new(imageboot_type_t type);
84 
85 void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size);
86 
87 extern lck_rw_t rootvnode_rw_lock;
88 
89 #define kIBFilePrefix "file://"
90 
91 __private_extern__ int
imageboot_format_is_valid(const char * root_path)92 imageboot_format_is_valid(const char *root_path)
93 {
94 	return strncmp(root_path, kIBFilePrefix,
95 	           strlen(kIBFilePrefix)) == 0;
96 }
97 
98 static void
vnode_get_and_drop_always(vnode_t vp)99 vnode_get_and_drop_always(vnode_t vp)
100 {
101 	vnode_getalways(vp);
102 	vnode_rele(vp);
103 	vnode_put(vp);
104 }
105 
106 __private_extern__ bool
imageboot_desired(void)107 imageboot_desired(void)
108 {
109 	bool do_imageboot = false;
110 
111 	char *root_path = NULL;
112 	root_path = zalloc(ZV_NAMEI);
113 	/*
114 	 * Check for first layer DMG rooting.
115 	 *
116 	 * Note that here we are principally concerned with whether or not we
117 	 * SHOULD try to imageboot, not whether or not we are going to be able to.
118 	 *
119 	 * If NONE of the boot-args are present, then assume that image-rooting
120 	 * is not requested.
121 	 *
122 	 * [!! Note parens guard the entire logically OR'd set of statements, below. It validates
123 	 * that NONE of the below-mentioned boot-args is present...!!]
124 	 */
125 	if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) ||
126 #if CONFIG_IMAGEBOOT_IMG4
127 	    PE_parse_boot_argn("arp0", root_path, MAXPATHLEN) ||
128 #endif
129 	    PE_parse_boot_argn("rp", root_path, MAXPATHLEN) ||
130 	    PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) ||
131 	    PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN))) {
132 		/* explicitly set to false */
133 		do_imageboot = false;
134 	} else {
135 		/* now sanity check the file-path format */
136 		if (imageboot_format_is_valid(root_path)) {
137 			DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
138 			/* root_path looks good and we have one of the aforementioned bootargs */
139 			do_imageboot = true;
140 		} else {
141 			/* explicitly set to false */
142 			do_imageboot = false;
143 		}
144 	}
145 
146 	zfree(ZV_NAMEI, root_path);
147 	return do_imageboot;
148 }
149 
150 __private_extern__ imageboot_type_t
imageboot_needed(void)151 imageboot_needed(void)
152 {
153 	imageboot_type_t result = IMAGEBOOT_NONE;
154 	char *root_path = NULL;
155 
156 	DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__);
157 
158 	if (!imageboot_desired()) {
159 		goto out;
160 	}
161 
162 	root_path = zalloc(ZV_NAMEI);
163 	result = IMAGEBOOT_DMG;
164 
165 	/* Check for second layer */
166 	if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) ||
167 	    PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) {
168 		goto out;
169 	}
170 
171 	/* Sanity-check second layer */
172 	if (imageboot_format_is_valid(root_path)) {
173 		DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
174 	} else {
175 		panic("%s: Invalid URL scheme for %s",
176 		    __FUNCTION__, root_path);
177 	}
178 
179 out:
180 	if (root_path != NULL) {
181 		zfree(ZV_NAMEI, root_path);
182 	}
183 	return result;
184 }
185 
186 extern bool IOBaseSystemARVRootHashAvailable(void);
187 
188 
189 /*
190  * Mounts new filesystem based on image path, and pivots it to the root.
191  * The image to be mounted is located at image_path.
192  * It will be mounted at mount_path.
193  * The vfs_switch_root operation will be performed.
194  * After the pivot, the outgoing root filesystem (the filesystem at root when
195  * this function begins) will be at outgoing_root_path.  If `skip_signature_check` is true,
196  * then ignore the chunklisted or authAPFS checks on this image
197  */
198 __private_extern__ int
imageboot_pivot_image(const char * image_path,imageboot_type_t type,const char * mount_path,const char * outgoing_root_path,const bool rooted_dmg,const bool skip_signature_check)199 imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
200     const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check)
201 {
202 	int error;
203 	boolean_t authenticated_dmg_chunklist = false;
204 	vnode_t mount_vp = NULLVP;
205 	errno_t rootauth;
206 
207 
208 	if (type != IMAGEBOOT_DMG) {
209 		panic("not supported");
210 	}
211 
212 	/*
213 	 * Check that the image file actually exists.
214 	 * We also need to find the mount it's on, to mark it as backing the
215 	 * root.
216 	 */
217 	vnode_t imagevp = NULLVP;
218 	error = vnode_lookup(image_path, 0, &imagevp, vfs_context_kernel());
219 	if (error) {
220 		printf("%s: image file not found or couldn't be read: %d\n", __FUNCTION__, error);
221 		/*
222 		 * bail out here to short-circuit out of panic logic below.
223 		 * Failure to find the pivot-image should not be a fatal condition (ENOENT)
224 		 * since it may result in natural consequences (ergo, cannot unlock filevault prompt).
225 		 */
226 		return error;
227 	}
228 
229 	/*
230 	 * load the disk image and obtain its device.
231 	 * di_root_image's name and the names of its arguments suggest it has
232 	 * to be mounted at the root, but that's not actually needed.
233 	 * We just need to obtain the device info.
234 	 */
235 
236 	dev_t dev;
237 	char devname[DEVMAXNAMESIZE];
238 
239 	error = di_root_image_ext(image_path, devname, DEVMAXNAMESIZE, &dev, true);
240 	if (error) {
241 		panic("%s: di_root_image failed: %d", __FUNCTION__, error);
242 	}
243 
244 	printf("%s: attached disk image %s as %s\n", __FUNCTION__, image_path, devname);
245 
246 
247 #if CONFIG_IMAGEBOOT_CHUNKLIST
248 	if ((rooted_dmg == false) && !IOBaseSystemARVRootHashAvailable()) {
249 		error = authenticate_root_with_chunklist(image_path, NULL);
250 		if (error == 0) {
251 			printf("authenticated root-dmg via chunklist...\n");
252 			authenticated_dmg_chunklist = true;
253 		} else {
254 			/* root hash was not available, and image is NOT chunklisted? */
255 			printf("failed to chunklist-authenticate root-dmg @ %s\n", image_path);
256 		}
257 	}
258 #endif
259 
260 	char fulldevname[DEVMAXNAMESIZE + 5]; // "/dev/"
261 	strlcpy(fulldevname, "/dev/", sizeof(fulldevname));
262 	strlcat(fulldevname, devname, sizeof(fulldevname));
263 
264 	/*
265 	 * mount expects another layer of indirection (because it expects to
266 	 * be getting a user_addr_t of a char *.
267 	 * Make a pointer-to-pointer on our stack. It won't use this
268 	 * address after it returns so this should be safe.
269 	 */
270 	char *fulldevnamep = &(fulldevname[0]);
271 	char **fulldevnamepp = &fulldevnamep;
272 
273 #define PIVOTMNT "/System/Volumes/BaseSystem"
274 
275 
276 	/* Attempt to mount as HFS; if it fails, then try as APFS */
277 	printf("%s: attempting to mount as hfs...\n", __FUNCTION__);
278 	error = kernel_mount("hfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
279 	if (error) {
280 		printf("mount failed: %d\n", error);
281 		printf("%s: attempting to mount as apfs...\n", __FUNCTION__);
282 		error = kernel_mount("apfs", NULLVP, NULLVP, PIVOTMNT, fulldevnamepp, 0, (MNT_RDONLY | MNT_DONTBROWSE), (KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_BASESYSTEMROOT), vfs_context_kernel());
283 	}
284 
285 	/* If we didn't mount as either HFS or APFS, then bail out */
286 	if (error) {
287 		/*
288 		 * Note that for this particular failure case (failure to mount), the disk image
289 		 * being attached may have failed to quiesce within the alloted time out (20-30 sec).
290 		 * For example, it may be still probing, or APFS container enumeration may have not
291 		 * completed. If so, then we may have fallen into this particular error case. However,
292 		 * failure to complete matching should be an exceptional case as 30 sec. is quite a
293 		 * long time to wait for matching to complete (which would have occurred in
294 		 * di_root_image_ext).
295 		 */
296 #if defined(__arm64__) && XNU_TARGET_OS_OSX
297 		panic("%s: failed to mount pivot image(%d)!", __FUNCTION__, error);
298 #endif
299 		printf("%s: failed to mount pivot image(%d) !", __FUNCTION__, error);
300 		goto done;
301 	}
302 
303 	/* otherwise, if the mount succeeded, then assert that the DMG is authenticated (either chunklist or authapfs) */
304 	error = vnode_lookup(PIVOTMNT, 0, &mount_vp, vfs_context_kernel());
305 	if (error) {
306 #if defined(__arm64__) && XNU_TARGET_OS_OSX
307 		panic("%s: failed to lookup pivot root (%d) !", __FUNCTION__, error);
308 #endif
309 		printf("%s: failed to lookup pivot root (%d)!", __FUNCTION__, error);
310 		goto done;
311 	}
312 
313 	/* the 0x1 implies base system */
314 	rootauth = VNOP_IOCTL(mount_vp, FSIOC_KERNEL_ROOTAUTH, (caddr_t)0x1, 0, vfs_context_kernel());
315 	if (rootauth) {
316 		printf("BS-DMG failed to authenticate intra-FS \n");
317 		/*
318 		 * If we are using a custom rooted DMG, or if we have already authenticated
319 		 * the DMG via chunklist, then it is permissible to use.
320 		 * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant.
321 		 */
322 		if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) {
323 			rootauth = 0;
324 		}
325 		error = rootauth;
326 	}
327 	vnode_put(mount_vp);
328 	mount_vp = NULLVP;
329 
330 	if (error) {
331 		/*
332 		 * Failure here exclusively means that the mount failed to authenticate.
333 		 * This means that the disk image either was not sealed (authapfs), or it was
334 		 * not hosted on a chunklisted DMG.  Both scenarios may be fatal depending
335 		 * on the platform.
336 		 */
337 #if defined(__arm64__) && XNU_TARGET_OS_OSX
338 		panic("%s: could not authenticate the pivot image: %d. giving up.", __FUNCTION__, error);
339 #endif
340 		printf("%s: could not authenticate the pivot image: %d. giving up.\n", __FUNCTION__, error);
341 		goto done;
342 	}
343 
344 	if (rootvnode) {
345 		mount_t root_mp = vnode_mount(rootvnode);
346 		if (root_mp && (root_mp->mnt_kern_flag & MNTK_SSD)) {
347 			rootvp_is_ssd = true;
348 		}
349 	}
350 	/*
351 	 * pivot the incoming and outgoing filesystems
352 	 */
353 	error = vfs_switch_root(mount_path, outgoing_root_path, 0);
354 	if (error) {
355 		panic("%s: vfs_switch_root failed: %d", __FUNCTION__, error);
356 	}
357 
358 	/*
359 	 * Mark the filesystem containing the image as backing root, so it
360 	 * won't be unmountable.
361 	 *
362 	 * vfs_switch_root() clears this flag, so we have to set it after
363 	 * the pivot call.
364 	 * If the system later pivots out of the image, vfs_switch_root
365 	 * will clear it again, so the backing filesystem can be unmounted.
366 	 */
367 	mount_t imagemp = imagevp->v_mount;
368 	lck_rw_lock_exclusive(&imagemp->mnt_rwlock);
369 	imagemp->mnt_kern_flag |= MNTK_BACKS_ROOT;
370 	lck_rw_done(&imagemp->mnt_rwlock);
371 
372 	error = 0;
373 
374 	/*
375 	 * Note that we do NOT change kern.bootuuid here -
376 	 * imageboot_mount_image() does, but imageboot_pivot_image() doesn't.
377 	 * imageboot_mount_image() is used when the root volume uuid was
378 	 * "always supposed to be" the one inside the dmg. imageboot_pivot_
379 	 * image() is used when the true root volume just needs to be
380 	 * obscured for a moment by the dmg.
381 	 */
382 
383 done:
384 	if (imagevp != NULLVP) {
385 		vnode_put(imagevp);
386 	}
387 	return error;
388 }
389 
390 /* kern_sysctl.c */
391 extern uuid_string_t fake_bootuuid;
392 
393 static void
set_fake_bootuuid(mount_t mp)394 set_fake_bootuuid(mount_t mp)
395 {
396 	struct vfs_attr va;
397 	VFSATTR_INIT(&va);
398 	VFSATTR_WANTED(&va, f_uuid);
399 
400 	if (vfs_getattr(mp, &va, vfs_context_current()) != 0) {
401 		return;
402 	}
403 
404 	if (!VFSATTR_IS_SUPPORTED(&va, f_uuid)) {
405 		return;
406 	}
407 
408 	uuid_unparse(va.f_uuid, fake_bootuuid);
409 }
410 
411 /*
412  * Swaps in new root filesystem based on image path.
413  * Current root filesystem is removed from mount list and
414  * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and
415  * "rootvnode" is reset.  Root vnode of currentroot filesystem
416  * is returned with usecount (no iocount).
417  * kern.bootuuid is arranged to return the UUID of the mounted image. (If
418  * we did nothing here, it would be the UUID of the image source volume.)
419  */
420 __private_extern__ int
imageboot_mount_image(const char * root_path,int height,imageboot_type_t type)421 imageboot_mount_image(const char *root_path, int height, imageboot_type_t type)
422 {
423 	dev_t           dev;
424 	int             error;
425 	/*
426 	 * Need to stash this here since we may do a kernel_mount() on /, which will
427 	 * automatically update the rootvnode global. Note that vfs_mountroot() does
428 	 * not update that global, which is a bit weird.
429 	 */
430 	vnode_t         old_rootvnode = rootvnode;
431 	vnode_t         newdp;
432 	mount_t         new_rootfs;
433 	boolean_t update_rootvnode = FALSE;
434 
435 	if (type == IMAGEBOOT_DMG) {
436 		error = di_root_image(root_path, rootdevice, DEVMAXNAMESIZE, &dev);
437 		if (error) {
438 			panic("%s: di_root_image failed: %d", __FUNCTION__, error);
439 		}
440 
441 		rootdev = dev;
442 		mountroot = NULL;
443 		printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
444 		error = vfs_mountroot();
445 		if (error != 0) {
446 			panic("vfs_mountroot() failed.");
447 		}
448 
449 		update_rootvnode = TRUE;
450 	} else {
451 		panic("invalid imageboot type: %d", type);
452 	}
453 
454 	/*
455 	 * Get the vnode for '/'.
456 	 * Set fdp->fd_fd.fd_cdir to reference it.
457 	 */
458 	if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
459 		panic("%s: cannot find root vnode", __FUNCTION__);
460 	}
461 	DBG_TRACE("%s: old root fsname: %s\n", __FUNCTION__, old_rootvnode->v_mount->mnt_vtable->vfc_name);
462 
463 	if (old_rootvnode != NULL) {
464 		/* remember the old rootvnode, but remove it from mountlist */
465 		mount_t old_rootfs = old_rootvnode->v_mount;
466 
467 		mount_list_remove(old_rootfs);
468 		mount_lock(old_rootfs);
469 		old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
470 		old_rootfs->mnt_flag &= ~MNT_ROOTFS;
471 		mount_unlock(old_rootfs);
472 	}
473 
474 	vnode_ref(newdp);
475 	vnode_put(newdp);
476 
477 	lck_rw_lock_exclusive(&rootvnode_rw_lock);
478 	/* switch to the new rootvnode */
479 	if (update_rootvnode) {
480 		rootvnode = newdp;
481 		set_fake_bootuuid(rootvnode->v_mount);
482 	}
483 
484 	new_rootfs = rootvnode->v_mount;
485 	mount_lock(new_rootfs);
486 	new_rootfs->mnt_flag |= MNT_ROOTFS;
487 	mount_unlock(new_rootfs);
488 
489 	kernproc->p_fd.fd_cdir = newdp;
490 	lck_rw_unlock_exclusive(&rootvnode_rw_lock);
491 
492 	DBG_TRACE("%s: root switched\n", __FUNCTION__);
493 
494 	if (old_rootvnode != NULL) {
495 #ifdef CONFIG_IMGSRC_ACCESS
496 		if (height >= 0) {
497 			imgsrc_rootvnodes[height] = old_rootvnode;
498 		} else {
499 			vnode_get_and_drop_always(old_rootvnode);
500 		}
501 #else
502 #pragma unused(height)
503 		vnode_get_and_drop_always(old_rootvnode);
504 #endif /* CONFIG_IMGSRC_ACCESS */
505 	}
506 	return 0;
507 }
508 
509 /*
510  * Return a memory object for given file path.
511  * Also returns a vnode reference for the given file path.
512  */
513 void *
ubc_getobject_from_filename(const char * filename,struct vnode ** vpp,off_t * file_size)514 ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size)
515 {
516 	int err = 0;
517 	struct nameidata ndp = {};
518 	struct vnode *vp = NULL;
519 	off_t fsize = 0;
520 	vfs_context_t ctx = vfs_context_kernel();
521 	void *control = NULL;
522 
523 	NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(filename), ctx);
524 	if ((err = namei(&ndp)) != 0) {
525 		goto errorout;
526 	}
527 	nameidone(&ndp);
528 	vp = ndp.ni_vp;
529 
530 	if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
531 		goto errorout;
532 	}
533 
534 	if (fsize < 0) {
535 		goto errorout;
536 	}
537 
538 	control = ubc_getobject(vp, UBC_FLAGS_NONE);
539 	if (control == NULL) {
540 		goto errorout;
541 	}
542 
543 	*file_size = fsize;
544 	*vpp = vp;
545 	vp = NULL;
546 
547 errorout:
548 	if (vp) {
549 		vnode_put(vp);
550 	}
551 	return control;
552 }
553 
554 static int
imageboot_read_file_internal(const char * path,const off_t offset,const bool pageable,void ** bufp,size_t * bufszp,off_t * fsizep)555 imageboot_read_file_internal(const char *path, const off_t offset, const bool pageable, void **bufp, size_t *bufszp, off_t *fsizep)
556 {
557 	int err = 0;
558 	struct nameidata ndp = {};
559 	struct vnode *vp = NULL;
560 	struct vnode *rsrc_vp = NULL;
561 	char *readbuf = NULL;
562 	off_t readsize = 0;
563 	off_t readoff = 0;
564 	off_t fsize = 0;
565 	size_t maxsize = 0;
566 	char *buf = NULL;
567 	bool doclose = false;
568 
569 	vfs_context_t ctx = vfs_context_kernel();
570 	proc_t p = vfs_context_proc(ctx);
571 	kauth_cred_t kerncred = vfs_context_ucred(ctx);
572 
573 	NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF | FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
574 	if ((err = namei(&ndp)) != 0) {
575 		AUTHPRNT("namei failed (%s) - %d", path, err);
576 		goto out;
577 	}
578 	nameidone(&ndp);
579 	vp = ndp.ni_vp;
580 
581 	if ((err = vnode_size(vp, &fsize, ctx)) != 0) {
582 		AUTHPRNT("failed to get vnode size of %s - %d", path, err);
583 		goto out;
584 	}
585 	if (fsize < 0) {
586 		panic("negative file size");
587 	}
588 	if (offset < 0) {
589 		AUTHPRNT("negative file offset");
590 		err = EINVAL;
591 		goto out;
592 	}
593 
594 	if (fsizep) {
595 		*fsizep = fsize;
596 	}
597 
598 	if ((err = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
599 		AUTHPRNT("failed to open %s - %d", path, err);
600 		goto out;
601 	}
602 	doclose = true;
603 
604 	/* cap fsize to the amount that remains after offset */
605 	if (os_sub_overflow(fsize, offset, &fsize)) {
606 		fsize = 0;
607 	} else if (fsize < 0) {
608 		fsize = 0;
609 	}
610 
611 	/* if bufsz is non-zero, cap the read at bufsz bytes */
612 	maxsize = *bufszp;
613 	if (maxsize && (maxsize < (size_t)fsize)) {
614 		fsize = maxsize;
615 	}
616 
617 	/* if fsize is larger than the specified limit (presently 2.5GB) or a NVRAM-configured limit, fail */
618 	maxsize = IMAGEBOOT_MAX_FILESIZE;
619 	PE_parse_boot_argn("rootdmg-maxsize", &maxsize, sizeof(maxsize));
620 	if (maxsize && (maxsize < (size_t)fsize)) {
621 		AUTHPRNT("file is too large (%lld > %lld)", (long long) fsize, (long long) maxsize);
622 		err = ENOMEM;
623 		goto out;
624 	}
625 
626 	if (pageable) {
627 		vm_offset_t addr = 0;
628 		if (kmem_alloc(kernel_map, &addr, (vm_size_t)fsize,
629 		    KMA_PAGEABLE | KMA_DATA, VM_KERN_MEMORY_FILE) == KERN_SUCCESS) {
630 			buf = (char *)addr;
631 		} else {
632 			buf = NULL;
633 		}
634 	} else {
635 		//limit kalloc data calls to only 2GB.
636 		if (fsize > IMAGEBOOT_MAX_KALLOCSIZE) {
637 			AUTHPRNT("file is too large for non-pageable (%lld)", (long long) fsize);
638 			err = ENOMEM;
639 			goto out;
640 		}
641 		buf = (char *)kalloc_data((vm_size_t)fsize, Z_WAITOK);
642 	}
643 	if (buf == NULL) {
644 		err = ENOMEM;
645 		goto out;
646 	}
647 
648 #if NAMEDSTREAMS
649 	/* find resource fork so we can evict cached decmpfs data */
650 	if (VNOP_GETNAMEDSTREAM(vp, &rsrc_vp, XATTR_RESOURCEFORK_NAME, NS_OPEN, /*flags*/ 0, ctx) == 0) {
651 		vnode_ref(rsrc_vp);
652 		vnode_put(rsrc_vp);
653 		AUTHDBG("Found resource fork for %s", path);
654 	}
655 #endif
656 
657 	/* read data in chunks to handle (fsize > INT_MAX) */
658 	readbuf = buf;
659 	readsize = fsize;
660 	readoff = offset;
661 	while (readsize > 0) {
662 		const off_t chunksize_max = 16 * 1024 * 1024; /* 16 MiB */
663 		const off_t chunksize = MIN(readsize, chunksize_max);
664 
665 		/* read next chunk, pass IO_NOCACHE to clarify our intent (even if ignored) */
666 		if ((err = vn_rdwr(UIO_READ, vp, (caddr_t)readbuf, (int)chunksize, readoff, UIO_SYSSPACE, IO_NODELOCKED | IO_NOCACHE | IO_RAOFF, kerncred, /*resid*/ NULL, p)) != 0) {
667 			AUTHPRNT("Cannot read %lld bytes at offset %lld from %s - %d", (long long)chunksize, (long long)readoff, path, err);
668 			goto out;
669 		}
670 
671 		/* evict cached pages so they don't accumulate during early boot */
672 		ubc_msync(vp, readoff, readoff + chunksize, NULL, UBC_INVALIDATE | UBC_PUSHALL);
673 
674 		/* evict potentially-cached decmpfs data if we have a resource fork */
675 		if (rsrc_vp != NULL) {
676 			if (vnode_getwithref(rsrc_vp) == 0) {
677 				ubc_msync(rsrc_vp, 0, ubc_getsize(rsrc_vp), NULL, UBC_INVALIDATE | UBC_PUSHALL);
678 				vnode_put(rsrc_vp);
679 			}
680 		}
681 
682 		readbuf = &readbuf[chunksize];
683 		readsize -= chunksize;
684 		readoff += chunksize;
685 	}
686 
687 out:
688 	if (doclose) {
689 		VNOP_CLOSE(vp, FREAD, ctx);
690 	}
691 	if (rsrc_vp) {
692 		vnode_rele(rsrc_vp);
693 		rsrc_vp = NULL;
694 	}
695 	if (vp) {
696 		vnode_put(vp);
697 		vp = NULL;
698 	}
699 
700 	if (err) {
701 		if (buf == NULL) {
702 			/* nothing to free */
703 		} else if (pageable) {
704 			kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)fsize);
705 		} else {
706 			kfree_data(buf, (vm_size_t)fsize);
707 		}
708 	} else {
709 		*bufp = buf;
710 		*bufszp = (size_t)fsize;
711 	}
712 
713 	return err;
714 }
715 
716 int
imageboot_read_file_pageable(const char * path,void ** bufp,size_t * bufszp)717 imageboot_read_file_pageable(const char *path, void **bufp, size_t *bufszp)
718 {
719 	return imageboot_read_file_internal(path, 0, true, bufp, bufszp, NULL);
720 }
721 
722 int
imageboot_read_file_from_offset(const char * path,const off_t offset,void ** bufp,size_t * bufszp)723 imageboot_read_file_from_offset(const char *path, const off_t offset, void **bufp, size_t *bufszp)
724 {
725 	return imageboot_read_file_internal(path, offset, false, bufp, bufszp, NULL);
726 }
727 
728 int
imageboot_read_file(const char * path,void ** bufp,size_t * bufszp,off_t * fsizep)729 imageboot_read_file(const char *path, void **bufp, size_t *bufszp, off_t *fsizep)
730 {
731 	return imageboot_read_file_internal(path, 0, false, bufp, bufszp, fsizep);
732 }
733 
734 #if CONFIG_IMAGEBOOT_IMG4 || CONFIG_IMAGEBOOT_CHUNKLIST
735 vnode_t
imgboot_get_image_file(const char * path,off_t * fsize,int * errp)736 imgboot_get_image_file(const char *path, off_t *fsize, int *errp)
737 {
738 	struct nameidata ndp = {};
739 	vnode_t vp = NULL;
740 	vfs_context_t ctx = vfs_context_kernel();
741 	int err;
742 
743 	NDINIT(&ndp, LOOKUP, OP_OPEN, LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
744 	if ((err = namei(&ndp)) != 0) {
745 		AUTHPRNT("Cannot find %s - error %d", path, err);
746 	} else {
747 		nameidone(&ndp);
748 		vp = ndp.ni_vp;
749 
750 		if (vp->v_type != VREG) {
751 			err = EINVAL;
752 			AUTHPRNT("%s it not a regular file", path);
753 		} else if (fsize) {
754 			if ((err = vnode_size(vp, fsize, ctx)) != 0) {
755 				AUTHPRNT("Cannot get file size of %s - error %d", path, err);
756 			}
757 		}
758 	}
759 
760 	if (err) {
761 		if (vp) {
762 			vnode_put(vp);
763 		}
764 		*errp = err;
765 		vp = NULL;
766 	}
767 	return vp;
768 }
769 #endif /* CONFIG_IMAGEBOOT_CHUNKLIST || CONFIG_IMAGEBOOT_CHUNKLIST */
770 
771 #if CONFIG_IMAGEBOOT_IMG4
772 
773 #define APTICKET_NAME "apticket.der"
774 
775 static char *
imgboot_get_apticket_path(const char * rootpath,size_t * sz)776 imgboot_get_apticket_path(const char *rootpath, size_t *sz)
777 {
778 	size_t plen = strlen(rootpath) + sizeof(APTICKET_NAME) + 1;
779 	char *path = (char *)kalloc_data(plen, Z_WAITOK);
780 
781 	if (path) {
782 		char *slash;
783 
784 		strlcpy(path, rootpath, plen);
785 		slash = strrchr(path, '/');
786 		if (slash == NULL) {
787 			slash = path;
788 		} else {
789 			slash++;
790 		}
791 		strlcpy(slash, APTICKET_NAME, sizeof(APTICKET_NAME) + 1);
792 	}
793 
794 	*sz = plen;
795 	return path;
796 }
797 
798 static int
authenticate_root_with_img4(const char * rootpath)799 authenticate_root_with_img4(const char *rootpath)
800 {
801 	errno_t rv;
802 	vnode_t vp = NULLVP;
803 	size_t ticket_pathsz = 0;
804 	char *ticket_path;
805 	img4_buff_t tck = IMG4_BUFF_INIT;
806 	img4_firmware_execution_context_t exec = {
807 		.i4fex_version = IMG4_FIRMWARE_EXECUTION_CONTEXT_STRUCT_VERSION,
808 		.i4fex_execute = NULL,
809 		.i4fex_context = NULL,
810 	};
811 	img4_firmware_t fw = NULL;
812 	img4_firmware_flags_t fw_flags = IMG4_FIRMWARE_FLAG_BARE |
813 	    IMG4_FIRMWARE_FLAG_SUBSEQUENT_STAGE;
814 
815 	DBG_TRACE("Check %s\n", rootpath);
816 
817 	ticket_path = imgboot_get_apticket_path(rootpath, &ticket_pathsz);
818 	if (ticket_path == NULL) {
819 		AUTHPRNT("Cannot construct ticket path - out of memory");
820 		return ENOMEM;
821 	}
822 
823 	rv = imageboot_read_file(ticket_path, (void **)&tck.i4b_bytes, &tck.i4b_len, NULL);
824 	if (rv) {
825 		AUTHPRNT("Cannot get a ticket from %s - %d\n", ticket_path, rv);
826 		goto out_with_ticket_path;
827 	}
828 
829 	DBG_TRACE("Got %lu bytes of manifest from %s\n", tck.i4b_len, ticket_path);
830 
831 	vp = imgboot_get_image_file(rootpath, NULL, &rv);
832 	if (vp == NULL) {
833 		/* Error message had been printed already */
834 		rv = EIO;
835 		goto out_with_ticket_bytes;
836 	}
837 
838 	fw = img4_firmware_new_from_vnode_4xnu(IMG4_RUNTIME_DEFAULT, &exec, 'rosi',
839 	    vp, fw_flags);
840 	if (!fw) {
841 		AUTHPRNT("Could not allocate new firmware");
842 		rv = ENOMEM;
843 		goto out_with_ticket_bytes;
844 	}
845 
846 	img4_firmware_attach_manifest(fw, &tck);
847 	rv = img4_firmware_evaluate(fw, img4_chip_select_personalized_ap(), NULL);
848 
849 out_with_ticket_bytes:
850 	kfree_data(tck.i4b_bytes, tck.i4b_len);
851 out_with_ticket_path:
852 	kfree_data(ticket_path, ticket_pathsz);
853 
854 	img4_firmware_destroy(&fw);
855 
856 	if (vp) {
857 		vnode_put(vp);
858 	}
859 	return rv;
860 }
861 #endif /* CONFIG_IMAGEBOOT_IMG4 */
862 
863 
864 /*
865  * Attach the image at 'path' as a ramdisk and mount it as our new rootfs.
866  * All existing mounts are first umounted.
867  */
868 static int
imageboot_mount_ramdisk(const char * path)869 imageboot_mount_ramdisk(const char *path)
870 {
871 	int err = 0;
872 	size_t bufsz = 0;
873 	void *buf = NULL;
874 	dev_t dev;
875 	vnode_t newdp;
876 	vnode_t tvp;
877 	mount_t new_rootfs;
878 
879 	/* Read our target image from disk */
880 	err = imageboot_read_file_pageable(path, &buf, &bufsz);
881 	if (err) {
882 		printf("%s: failed: imageboot_read_file_pageable() = %d\n", __func__, err);
883 		goto out;
884 	}
885 	DBG_TRACE("%s: read '%s' sz = %lu\n", __func__, path, bufsz);
886 
887 #if CONFIG_IMGSRC_ACCESS
888 	/* Re-add all root mounts to the mount list in the correct order... */
889 	mount_list_remove(rootvnode->v_mount);
890 	for (int i = 0; i < MAX_IMAGEBOOT_NESTING; i++) {
891 		struct vnode *vn = imgsrc_rootvnodes[i];
892 		if (vn) {
893 			vnode_getalways(vn);
894 			imgsrc_rootvnodes[i] = NULLVP;
895 
896 			mount_t mnt = vn->v_mount;
897 			mount_lock(mnt);
898 			mnt->mnt_flag |= MNT_ROOTFS;
899 			mount_list_add(mnt);
900 			mount_unlock(mnt);
901 
902 			vnode_rele(vn);
903 			vnode_put(vn);
904 		}
905 	}
906 	mount_list_add(rootvnode->v_mount);
907 #endif
908 
909 	/* ... and unmount everything */
910 	vfs_unmountall(FALSE);
911 
912 	lck_rw_lock_exclusive(&rootvnode_rw_lock);
913 	kernproc->p_fd.fd_cdir = NULL;
914 	tvp = rootvnode;
915 	rootvnode = NULL;
916 	rootvp = NULLVP;
917 	rootdev = NODEV;
918 	lck_rw_unlock_exclusive(&rootvnode_rw_lock);
919 	vnode_get_and_drop_always(tvp);
920 
921 	/* Attach the ramfs image ... */
922 	err = di_root_ramfile_buf(buf, bufsz, rootdevice, DEVMAXNAMESIZE, &dev);
923 	if (err) {
924 		printf("%s: failed: di_root_ramfile_buf() = %d\n", __func__, err);
925 		goto out;
926 	}
927 
928 	/* ... and mount it */
929 	rootdev = dev;
930 	mountroot = NULL;
931 	err = vfs_mountroot();
932 	if (err) {
933 		printf("%s: failed: vfs_mountroot() = %d\n", __func__, err);
934 		goto out;
935 	}
936 
937 	/* Switch to new root vnode */
938 	if (VFS_ROOT(TAILQ_LAST(&mountlist, mntlist), &newdp, vfs_context_kernel())) {
939 		panic("%s: cannot find root vnode", __func__);
940 	}
941 	vnode_ref(newdp);
942 
943 	lck_rw_lock_exclusive(&rootvnode_rw_lock);
944 	rootvnode = newdp;
945 	rootvnode->v_flag |= VROOT;
946 	new_rootfs = rootvnode->v_mount;
947 	mount_lock(new_rootfs);
948 	new_rootfs->mnt_flag |= MNT_ROOTFS;
949 	mount_unlock(new_rootfs);
950 
951 	set_fake_bootuuid(new_rootfs);
952 
953 	kernproc->p_fd.fd_cdir = newdp;
954 	lck_rw_unlock_exclusive(&rootvnode_rw_lock);
955 
956 	vnode_put(newdp);
957 
958 	DBG_TRACE("%s: root switched\n", __func__);
959 
960 out:
961 	if (err && (buf != NULL)) {
962 		kmem_free(kernel_map, (vm_offset_t)buf, (vm_size_t)bufsz);
963 	}
964 	return err;
965 }
966 
967 /*
968  * If the path is in <file://> URL format then we allocate memory and decode it,
969  * otherwise return the same pointer.
970  *
971  * Caller is expected to check if the pointers are different.
972  */
973 static char *
url_to_path(char * url_path,size_t * sz)974 url_to_path(char *url_path, size_t *sz)
975 {
976 	char *path = url_path;
977 	size_t len = strlen(kIBFilePrefix);
978 
979 	if (strncmp(kIBFilePrefix, url_path, len) == 0) {
980 		/* its a URL - remove the file:// prefix and percent-decode */
981 		url_path += len;
982 
983 		len = strlen(url_path);
984 		if (len) {
985 			/* Make a copy of the path to URL-decode */
986 			path = (char *)kalloc_data(len + 1, Z_WAITOK);
987 			if (path == NULL) {
988 				panic("imageboot path allocation failed - cannot allocate %d bytes", (int)len);
989 			}
990 
991 			strlcpy(path, url_path, len + 1);
992 			*sz = len + 1;
993 			url_decode(path);
994 		} else {
995 			panic("Bogus imageboot path URL - missing path");
996 		}
997 
998 		DBG_TRACE("%s: root image URL <%s> becomes %s\n", __func__, url_path, path);
999 	}
1000 
1001 	return path;
1002 }
1003 
1004 static boolean_t
imageboot_setup_new(imageboot_type_t type)1005 imageboot_setup_new(imageboot_type_t type)
1006 {
1007 	int error;
1008 	char *root_path = NULL;
1009 	int height = 0;
1010 	boolean_t done = FALSE;
1011 	boolean_t auth_root = TRUE;
1012 	boolean_t ramdisk_root = FALSE;
1013 
1014 	root_path = zalloc(ZV_NAMEI);
1015 	assert(root_path != NULL);
1016 
1017 	unsigned imgboot_arg;
1018 	if (PE_parse_boot_argn("-rootdmg-ramdisk", &imgboot_arg, sizeof(imgboot_arg))) {
1019 		ramdisk_root = TRUE;
1020 	}
1021 
1022 	if (PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) {
1023 		printf("%s: container image url is %s\n", __FUNCTION__, root_path);
1024 		error = imageboot_mount_image(root_path, height, type);
1025 		if (error != 0) {
1026 			panic("Failed to mount container image.");
1027 		}
1028 
1029 		height++;
1030 	}
1031 
1032 	if (PE_parse_boot_argn(IMAGEBOOT_AUTHROOT_ARG, root_path, MAXPATHLEN) == FALSE &&
1033 	    PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) {
1034 		if (height > 0) {
1035 			panic("%s specified without %s or %s?", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_AUTHROOT_ARG, IMAGEBOOT_ROOT_ARG);
1036 		}
1037 		goto out;
1038 	}
1039 
1040 	printf("%s: root image URL is '%s'\n", __func__, root_path);
1041 
1042 	/* Make a copy of the path to URL-decode */
1043 	size_t pathsz;
1044 	char *path = url_to_path(root_path, &pathsz);
1045 	assert(path);
1046 
1047 #if CONFIG_IMAGEBOOT_CHUNKLIST
1048 	if (auth_root) {
1049 		/*
1050 		 * This updates auth_root to reflect whether chunklist was
1051 		 * actually enforced. In effect, this clears auth_root if
1052 		 * CSR_ALLOW_ANY_RECOVERY_OS allowed an invalid image.
1053 		 */
1054 		AUTHDBG("authenticating root image at %s", path);
1055 		error = authenticate_root_with_chunklist(path, &auth_root);
1056 		if (error) {
1057 			panic("root image authentication failed (err = %d)", error);
1058 		}
1059 		AUTHDBG("successfully authenticated %s", path);
1060 	}
1061 #endif
1062 
1063 	if (ramdisk_root) {
1064 		error = imageboot_mount_ramdisk(path);
1065 	} else {
1066 		error = imageboot_mount_image(root_path, height, type);
1067 	}
1068 
1069 	if (path != root_path) {
1070 		kfree_data(path, pathsz);
1071 	}
1072 
1073 	if (error) {
1074 		panic("Failed to mount root image (err=%d, auth=%d, ramdisk=%d)",
1075 		    error, auth_root, ramdisk_root);
1076 	}
1077 
1078 #if CONFIG_IMAGEBOOT_CHUNKLIST
1079 	if (auth_root) {
1080 		/* check that the image version matches the running kernel */
1081 		AUTHDBG("checking root image version");
1082 		error = authenticate_root_version_check();
1083 		if (error) {
1084 			panic("root image version check failed");
1085 		} else {
1086 			AUTHDBG("root image version matches kernel");
1087 		}
1088 	}
1089 #endif
1090 
1091 	done = TRUE;
1092 
1093 out:
1094 	zfree(ZV_NAMEI, root_path);
1095 	return done;
1096 }
1097 
1098 __private_extern__ void
imageboot_setup(imageboot_type_t type)1099 imageboot_setup(imageboot_type_t type)
1100 {
1101 	int         error = 0;
1102 	char *root_path = NULL;
1103 
1104 	DBG_TRACE("%s: entry\n", __FUNCTION__);
1105 
1106 	if (rootvnode == NULL) {
1107 		panic("imageboot_setup: rootvnode is NULL.");
1108 	}
1109 
1110 	/*
1111 	 * New boot-arg scheme:
1112 	 *      root-dmg : the dmg that will be the root filesystem, authenticated by default.
1113 	 *      auth-root-dmg : same as root-dmg.
1114 	 *      container-dmg : an optional dmg that contains the root-dmg.
1115 	 *  locker : the locker that will be the root filesystem -- mutually
1116 	 *           exclusive with any other boot-arg.
1117 	 */
1118 	if (imageboot_setup_new(type)) {
1119 		return;
1120 	}
1121 
1122 	root_path = zalloc(ZV_NAMEI);
1123 	assert(root_path != NULL);
1124 
1125 	/*
1126 	 * Look for outermost disk image to root from.  If we're doing a nested boot,
1127 	 * there's some sense in which the outer image never needs to be the root filesystem,
1128 	 * but it does need very similar treatment: it must not be unmounted, needs a fake
1129 	 * device vnode created for it, and should not show up in getfsstat() until exposed
1130 	 * with MNT_IMGSRC. We just make it the temporary root.
1131 	 */
1132 #if CONFIG_IMAGEBOOT_IMG4
1133 	if (PE_parse_boot_argn("arp0", root_path, MAXPATHLEN)) {
1134 		size_t pathsz;
1135 		char *path = url_to_path(root_path, &pathsz);
1136 
1137 		assert(path);
1138 
1139 		if (authenticate_root_with_img4(path)) {
1140 			panic("Root image %s does not match the manifest", root_path);
1141 		}
1142 		if (path != root_path) {
1143 			kfree_data(path, pathsz);
1144 		}
1145 	} else
1146 #endif /* CONFIG_IMAGEBOOT_IMG4 */
1147 	if ((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) &&
1148 	    (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) {
1149 		panic("%s: no valid path to image.", __FUNCTION__);
1150 	}
1151 
1152 	DBG_TRACE("%s: root image url is %s\n", __FUNCTION__, root_path);
1153 
1154 	error = imageboot_mount_image(root_path, 0, type);
1155 	if (error) {
1156 		panic("Failed on first stage of imageboot.");
1157 	}
1158 
1159 	/*
1160 	 * See if we are rooting from a nested image
1161 	 */
1162 	if (PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) {
1163 		goto done;
1164 	}
1165 
1166 	printf("%s: second level root image url is %s\n", __FUNCTION__, root_path);
1167 
1168 	/*
1169 	 * If we fail to set up second image, it's not a given that we
1170 	 * can safely root off the first.
1171 	 */
1172 	error = imageboot_mount_image(root_path, 1, type);
1173 	if (error) {
1174 		panic("Failed on second stage of imageboot.");
1175 	}
1176 
1177 done:
1178 	zfree(ZV_NAMEI, root_path);
1179 
1180 	DBG_TRACE("%s: exit\n", __FUNCTION__);
1181 
1182 	return;
1183 }
1184