xref: /xnu-8020.101.4/bsd/kern/bsd_init.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  *
29  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  * (c) UNIX System Laboratories, Inc.
32  * All or some portions of this file are derived from material licensed
33  * to the University of California by American Telephone and Telegraph
34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35  * the permission of UNIX System Laboratories, Inc.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
66  */
67 
68 /*
69  *
70  * Mach Operating System
71  * Copyright (c) 1987 Carnegie-Mellon University
72  * All rights reserved.  The CMU software License Agreement specifies
73  * the terms and conditions for use and redistribution.
74  */
75 /*
76  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77  * support for mandatory and extensible security protections.  This notice
78  * is included in support of clause 2.2 (b) of the Apple Public License,
79  * Version 2.0.
80  */
81 
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/conf.h>
91 #include <sys/buf_internal.h>
92 #include <sys/user.h>
93 #include <sys/time.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96 #include <sys/kasl.h>
97 
98 #include <security/audit/audit.h>
99 
100 #include <sys/malloc.h>
101 #include <sys/dkstat.h>
102 #include <sys/codesign.h>
103 
104 #include <kern/startup.h>
105 #include <kern/thread.h>
106 #include <kern/task.h>
107 #include <kern/ast.h>
108 #include <kern/zalloc.h>
109 #include <kern/ux_handler.h>            /* for ux_handler_setup() */
110 #include <kern/sched_hygiene.h>
111 
112 #include <mach/vm_param.h>
113 
114 #include <vm/vm_map.h>
115 #include <vm/vm_kern.h>
116 
117 #include <sys/reboot.h>
118 #include <dev/busvar.h>                 /* for pseudo_inits */
119 #include <sys/kdebug.h>
120 #include <sys/monotonic.h>
121 
122 #include <mach/mach_types.h>
123 #include <mach/vm_prot.h>
124 #include <mach/semaphore.h>
125 #include <mach/sync_policy.h>
126 #include <kern/clock.h>
127 #include <sys/csr.h>
128 #include <mach/kern_return.h>
129 #include <mach/thread_act.h>            /* for thread_resume() */
130 #include <sys/mcache.h>                 /* for mcache_init() */
131 #include <sys/mbuf.h>                   /* for mbinit() */
132 #include <sys/event.h>                  /* for knote_init() */
133 #include <sys/eventhandler.h>           /* for eventhandler_init() */
134 #include <sys/kern_memorystatus.h>      /* for memorystatus_init() */
135 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
136 #include <sys/aio_kern.h>               /* for aio_init() */
137 #include <sys/semaphore.h>              /* for psem_cache_init() */
138 #include <net/dlil.h>                   /* for dlil_init() */
139 #include <net/iptap.h>                  /* for iptap_init() */
140 #include <sys/socketvar.h>              /* for socketinit() */
141 #include <sys/protosw.h>                /* for domaininit() */
142 #include <kern/sched_prim.h>            /* for thread_wakeup() */
143 #include <net/if_ether.h>               /* for ether_family_init() */
144 #include <net/if_gif.h>                 /* for gif_init() */
145 #include <miscfs/devfs/devfsdefs.h>     /* for devfs_kernel_mount() */
146 #include <vm/vm_kern.h>                 /* for kmem_suballoc() */
147 #include <sys/proc_uuid_policy.h>       /* proc_uuid_policy_init() */
148 #include <netinet/flow_divert.h>        /* flow_divert_init() */
149 #include <net/content_filter.h>         /* for cfil_init() */
150 #include <net/necp.h>                   /* for necp_init() */
151 #include <net/network_agent.h>          /* for netagent_init() */
152 #include <net/packet_mangler.h>         /* for pkt_mnglr_init() */
153 #include <net/if_utun.h>                /* for utun_register_control() */
154 #include <net/if_ipsec.h>               /* for ipsec_register_control() */
155 #include <net/netsrc.h>                 /* for netsrc_init() */
156 #include <net/ntstat.h>                 /* for nstat_init() */
157 #include <netinet/tcp_cc.h>                     /* for tcp_cc_init() */
158 #include <netinet/mptcp_var.h>          /* for mptcp_control_register() */
159 #include <net/nwk_wq.h>                 /* for nwk_wq_init */
160 #include <net/restricted_in_port.h>     /* for restricted_in_port_init() */
161 #include <net/remote_vif.h>             /* for rvi_init() */
162 #include <kern/assert.h>                /* for assert() */
163 #include <sys/kern_overrides.h>         /* for init_system_override() */
164 #include <sys/lockf.h>                  /* for lf_init() */
165 #include <sys/fsctl.h>
166 
167 #include <net/init.h>
168 
169 #if CONFIG_MACF
170 #include <security/mac_framework.h>
171 #include <security/mac_internal.h>      /* mac_init_bsd() */
172 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
173 #endif
174 
175 #include <machine/exec.h>
176 
177 #if CONFIG_NETBOOT
178 #include <sys/netboot.h>
179 #endif
180 
181 #if CONFIG_IMAGEBOOT
182 #include <sys/imageboot.h>
183 #endif
184 
185 #if PFLOG
186 #include <net/if_pflog.h>
187 #endif
188 
189 #if SKYWALK
190 #include <skywalk/os_skywalk_private.h>
191 #endif /* SKYWALK */
192 
193 #include <pexpert/pexpert.h>
194 #include <machine/pal_routines.h>
195 #include <console/video_console.h>
196 
197 #if CONFIG_XNUPOST
198 #include <tests/xnupost.h>
199 #endif
200 
201 void * get_user_regs(thread_t);         /* XXX kludge for <machine/thread.h> */
202 void IOKitInitializeTime(void);         /* XXX */
203 void IOSleep(unsigned int);             /* XXX */
204 void IOSetImageBoot(void);              /* XXX */
205 void loopattach(void);                  /* XXX */
206 
207 const char *const copyright =
208     "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
209     "The Regents of the University of California. "
210     "All rights reserved.\n\n";
211 
212 /* Components of the first process -- never freed. */
213 SECURITY_READ_ONLY_LATE(struct vfs_context) vfs_context0;
214 
215 struct proc proc0 = {
216 	.p_comm    = "kernel_task",
217 	.p_name    = "kernel_task",
218 	.p_pptr    = &proc0,
219 	.p_stat    = SRUN,
220 #if defined(__LP64__)
221 	.p_flag    = P_SYSTEM | P_LP64,
222 #else
223 	.p_flag    = P_SYSTEM,
224 #endif
225 	.p_nice    = NZERO,
226 	.p_uthlist = TAILQ_HEAD_INITIALIZER(proc0.p_uthlist),
227 };
228 static struct plimit limit0;
229 static struct pstats pstats0;
230 SECURITY_READ_ONLY_LATE(proc_t) kernproc = &proc0;
231 proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
232 
233 long tk_cancc;
234 long tk_nin;
235 long tk_nout;
236 long tk_rawcc;
237 
238 int lock_trace = 0;
239 /* Global variables to make pstat happy. We do swapping differently */
240 int nswdev, nswap;
241 int nswapmap;
242 void *swapmap;
243 struct swdevt swdevt[1];
244 
245 static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
246 LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
247 LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
248 
249 dev_t   rootdev;                /* device of the root */
250 dev_t   dumpdev;                /* device to take dumps on */
251 long    dumplo;                 /* offset into dumpdev */
252 long    hostid;
253 char    hostname[MAXHOSTNAMELEN];
254 char    domainname[MAXDOMNAMELEN];
255 char    rootdevice[DEVMAXNAMESIZE];
256 
257 struct  vnode *rootvp;
258 bool rootvp_is_ssd = false;
259 SECURITY_READ_ONLY_LATE(int) boothowto;
260 /*
261  * -minimalboot indicates that we want userspace to be bootstrapped to a
262  * minimal environment.  What constitutes minimal is up to the bootstrap
263  * process.
264  */
265 TUNABLE(int, minimalboot, "-minimalboot", 0);
266 #if CONFIG_DARKBOOT
267 int darkboot = 0;
268 #endif
269 
270 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
271 extern void IOSecureBSDRoot(const char * rootName);
272 extern kern_return_t IOKitBSDInit(void );
273 extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
274 extern void kminit(void);
275 extern void bsd_bufferinit(void);
276 extern void throttle_init(void);
277 
278 vm_map_t        bsd_pageable_map;
279 vm_map_t        mb_map;
280 
281 static  int bsd_simul_execs;
282 static int bsd_pageable_map_size;
283 __private_extern__ int execargs_cache_size = 0;
284 __private_extern__ int execargs_free_count = 0;
285 __private_extern__ vm_offset_t * execargs_cache = NULL;
286 
287 void bsd_exec_setup(int);
288 
289 __private_extern__ int bootarg_execfailurereports = 0;
290 
291 #if __x86_64__
292 __private_extern__ TUNABLE(int, bootarg_no32exec, "no32exec", 1);
293 #endif
294 
295 #if DEVELOPMENT || DEBUG
296 /* Prevent kernel-based ASLR from being used. */
297 __private_extern__ TUNABLE(bool, bootarg_disable_aslr, "-disable_aslr", 0);
298 #endif
299 
300 /*
301  * Allow an alternate dyld to be used for testing.
302  */
303 
304 #if DEVELOPMENT || DEBUG
305 char dyld_alt_path[MAXPATHLEN];
306 int use_alt_dyld = 0;
307 #endif
308 
309 int     cmask = CMASK;
310 extern int customnbuf;
311 
312 kern_return_t bsd_autoconf(void);
313 void bsd_utaskbootstrap(void);
314 
315 #if CONFIG_DEV_KMEM
316 extern void dev_kmem_init(void);
317 #endif
318 static void process_name(const char *, proc_t);
319 
320 static void setconf(void);
321 
322 #if CONFIG_BASESYSTEMROOT
323 static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
324 static boolean_t bsdmgroot_bootable(void);
325 #endif // CONFIG_BASESYSTEMROOT
326 
327 static bool bsd_rooted_ramdisk(void);
328 
329 #if SYSV_SHM
330 extern void sysv_shm_lock_init(void);
331 #endif
332 #if SYSV_SEM
333 extern void sysv_sem_lock_init(void);
334 #endif
335 #if SYSV_MSG
336 extern void sysv_msg_lock_init(void);
337 #endif
338 
339 #if CONFIG_MACF
340 #if defined (__i386__) || defined (__x86_64__)
341 /* MACF policy_check configuration flags; see policy_check.c for details */
342 extern int check_policy_init(int);
343 #endif
344 #endif  /* CONFIG_MACF */
345 
346 /* If we are using CONFIG_DTRACE */
347 #if CONFIG_DTRACE
348 extern void dtrace_postinit(void);
349 #endif
350 
351 /*
352  * Initialization code.
353  * Called from cold start routine as
354  * soon as a stack and segmentation
355  * have been established.
356  * Functions:
357  *	turn on clock
358  *	hand craft 0th process
359  *	call all initialization routines
360  *  hand craft 1st user process
361  */
362 
363 /*
364  *	Sets the name for the given task.
365  */
366 static void
process_name(const char * s,proc_t p)367 process_name(const char *s, proc_t p)
368 {
369 	strlcpy(p->p_comm, s, sizeof(p->p_comm));
370 	strlcpy(p->p_name, s, sizeof(p->p_name));
371 }
372 
373 /* To allow these values to be patched, they're globals here */
374 #include <machine/vmparam.h>
375 struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
376 struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
377 struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
378 
379 extern struct os_refgrp rlimit_refgrp;
380 
381 extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
382 extern int      (*mountroot)(void);
383 
384 LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
385 LCK_GRP_DECLARE(proc_lck_grp, "proc");
386 LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
387 LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
388 LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
389 LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
390 LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
391 LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
392 LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
393 
394 
395 LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
396 
397 #if XNU_TARGET_OS_OSX
398 /* hook called after root is mounted XXX temporary hack */
399 void (*mountroot_post_hook)(void);
400 void (*unmountroot_pre_hook)(void);
401 #endif
402 void set_rootvnode(vnode_t);
403 
404 extern lck_rw_t rootvnode_rw_lock;
405 
406 /* called with an iocount and usecount on new_rootvnode */
407 void
set_rootvnode(vnode_t new_rootvnode)408 set_rootvnode(vnode_t new_rootvnode)
409 {
410 	mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
411 	vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
412 	vnode_t old_rootvnode = rootvnode;
413 
414 	new_rootvnode->v_flag |= VROOT;
415 	rootvp = new_devvp;
416 	rootvnode = new_rootvnode;
417 	kernproc->p_fd.fd_cdir = new_rootvnode;
418 	if (new_devvp != NULL) {
419 		rootdev = vnode_specrdev(new_devvp);
420 	} else if (new_mount != NULL) {
421 		rootdev = vfs_statfs(new_mount)->f_fsid.val[0];  /* like ATTR_CMN_DEVID */
422 	} else {
423 		rootdev = NODEV;
424 	}
425 
426 	if (old_rootvnode) {
427 		vnode_rele(old_rootvnode);
428 	}
429 }
430 
431 #define RAMDEV "md0"
432 
433 bool
bsd_rooted_ramdisk(void)434 bsd_rooted_ramdisk(void)
435 {
436 	bool is_ramdisk = false;
437 	char *dev_path = zalloc(ZV_NAMEI);
438 	if (dev_path == NULL) {
439 		panic("failed to allocate devpath string!");
440 	}
441 
442 	if (PE_parse_boot_argn("rd", dev_path, MAXPATHLEN)) {
443 		if (strncmp(dev_path, RAMDEV, strlen(RAMDEV)) == 0) {
444 			is_ramdisk = true;
445 		}
446 	}
447 
448 	zfree(ZV_NAMEI, dev_path);
449 	return is_ramdisk;
450 }
451 
452 /*
453  * This function is called very early on in the Mach startup, from the
454  * function start_kernel_threads() in osfmk/kern/startup.c.  It's called
455  * in the context of the current (startup) task using a call to the
456  * function kernel_thread_create() to jump into start_kernel_threads().
457  * Internally, kernel_thread_create() calls thread_create_internal(),
458  * which calls uthread_init().  The function of uthread_init() is
459  * normally to init a uthread structure, and fill out the uu_sigmask,
460  * tro_ucred/tro_proc fields.  It skips filling these out in the case of the "task"
461  * being "kernel_task", because the order of operation is inverted.  To
462  * account for that, we need to manually fill in at least the contents
463  * of the tro_ucred field so that the uthread structure can be
464  * used like any other.
465  */
466 
467 void
bsd_init(void)468 bsd_init(void)
469 {
470 	struct uthread *ut;
471 	kern_return_t   ret;
472 	vnode_t init_rootvnode = NULLVP;
473 	struct proc_ro_data kernproc_ro_data = {
474 		.p_csflags = CS_VALID,
475 	};
476 	struct task_ro_data kerntask_ro_data = { };
477 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
478 	boolean_t       netboot = FALSE;
479 #endif
480 
481 #define DEBUG_BSDINIT 0
482 
483 #if DEBUG_BSDINIT
484 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
485 #else
486 #define bsd_init_kprintf(x, ...)
487 #endif
488 
489 	throttle_init();
490 
491 	printf(copyright);
492 
493 #if CONFIG_DEV_KMEM
494 	bsd_init_kprintf("calling dev_kmem_init\n");
495 	dev_kmem_init();
496 #endif
497 
498 	/* Initialize kauth subsystem before instancing the first credential */
499 	bsd_init_kprintf("calling kauth_init\n");
500 	kauth_init();
501 
502 	/* kernel_task->proc = kernproc; */
503 	set_bsdtask_info(kernel_task, (void *)kernproc);
504 
505 	/* set the cred */
506 	kauth_cred_set(&kernproc_ro_data.p_ucred, vfs_context0.vc_ucred);
507 	kernproc->p_proc_ro = proc_ro_alloc(kernproc, &kernproc_ro_data,
508 	    kernel_task, &kerntask_ro_data);
509 
510 	/* give kernproc a name */
511 	bsd_init_kprintf("calling process_name\n");
512 	process_name("kernel_task", kernproc);
513 
514 	/* Allocate proc lock attribute */
515 
516 	lck_mtx_init(&kernproc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
517 	lck_mtx_init(&kernproc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
518 	lck_spin_init(&kernproc->p_slock, &proc_slock_grp, &proc_lck_attr);
519 
520 	/* Init the file descriptor table. */
521 	fdt_init(kernproc);
522 	kernproc->p_fd.fd_cmask = (mode_t)cmask;
523 
524 	assert(bsd_simul_execs != 0);
525 	execargs_cache_size = bsd_simul_execs;
526 	execargs_free_count = bsd_simul_execs;
527 	execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
528 	    ZALIGN(vm_offset_t));
529 
530 	if (current_task() != kernel_task) {
531 		printf("bsd_init: We have a problem, "
532 		    "current task is not kernel task\n");
533 	}
534 
535 	bsd_init_kprintf("calling get_bsdthread_info\n");
536 	ut = current_uthread();
537 
538 #if CONFIG_MACF
539 	/*
540 	 * Initialize the MAC Framework
541 	 */
542 	mac_policy_initbsd();
543 
544 #if defined (__i386__) || defined (__x86_64__)
545 	/*
546 	 * We currently only support this on i386/x86_64, as that is the
547 	 * only lock code we have instrumented so far.
548 	 */
549 	int policy_check_flags;
550 	PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
551 	check_policy_init(policy_check_flags);
552 #endif
553 #endif /* MAC */
554 
555 	/*
556 	 * Make a session and group
557 	 *
558 	 * No need to hold the pgrp lock,
559 	 * there are no other BSD threads yet.
560 	 */
561 	struct session *session0 = session_alloc(kernproc);
562 	struct pgrp *pgrp0 = pgrp_alloc(0, PGRP_REF_NONE);
563 	session0->s_ttypgrpid = 0;
564 	pgrp0->pg_session = session0;
565 
566 	/*
567 	 * Create process 0.
568 	 */
569 	proc_list_lock();
570 	os_ref_init_mask(&kernproc->p_refcount, P_REF_BITS, &p_refgrp, P_REF_NONE);
571 	os_ref_init_raw(&kernproc->p_waitref, &p_refgrp);
572 
573 	/*
574 	 * Make a group and session, then simulate pinsertchild(),
575 	 * adjusted for the kernel.
576 	 */
577 	pghash_insert_locked(0, pgrp0);
578 
579 	LIST_INSERT_HEAD(&pgrp0->pg_members, kernproc, p_pglist);
580 	hazard_ptr_init(&kernproc->p_pgrp, pgrp0);
581 	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
582 
583 	LIST_INSERT_HEAD(SESSHASH(0), session0, s_hash);
584 	proc_list_unlock();
585 
586 	kernproc->task = kernel_task;
587 
588 #if DEVELOPMENT || DEBUG
589 	if (bootarg_disable_aslr) {
590 		kernproc->p_flag |= P_DISABLE_ASLR;
591 	}
592 #endif
593 
594 	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
595 
596 	bsd_init_kprintf("calling kauth_cred_create\n");
597 	/*
598 	 * Officially associate the kernel with vfs_context0.vc_ucred.
599 	 */
600 #if CONFIG_MACF
601 	mac_cred_label_associate_kernel(vfs_context0.vc_ucred);
602 #endif
603 	proc_update_creds_onproc(kernproc);
604 
605 	TAILQ_INIT(&kernproc->p_aio_activeq);
606 	TAILQ_INIT(&kernproc->p_aio_doneq);
607 	kernproc->p_aio_total_count = 0;
608 
609 	/* Create the limits structures. */
610 	for (uint32_t i = 0; i < ARRAY_COUNT(limit0.pl_rlimit); i++) {
611 		limit0.pl_rlimit[i].rlim_cur =
612 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
613 	}
614 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
615 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
616 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
617 	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
618 	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
619 	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
620 	os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
621 
622 	hazard_ptr_init(&kernproc->p_limit, &limit0);
623 	kernproc->p_stats = &pstats0;
624 	proc_sigacts_copy(kernproc, NULL);
625 	kernproc->p_subsystem_root_path = NULL;
626 
627 	/*
628 	 * Charge root for one process: launchd.
629 	 */
630 	bsd_init_kprintf("calling chgproccnt\n");
631 	(void)chgproccnt(0, 1);
632 
633 	/*
634 	 *	Allocate a kernel submap for pageable memory
635 	 *	for temporary copying (execve()).
636 	 */
637 	{
638 		vm_offset_t     minimum;
639 
640 		bsd_init_kprintf("calling kmem_suballoc\n");
641 		assert(bsd_pageable_map_size != 0);
642 		ret = kmem_suballoc(kernel_map,
643 		    &minimum,
644 		    (vm_size_t)bsd_pageable_map_size,
645 		    VM_MAP_CREATE_PAGEABLE,
646 		    VM_FLAGS_ANYWHERE,
647 		    VM_MAP_KERNEL_FLAGS_NONE,
648 		    VM_KERN_MEMORY_BSD,
649 		    &bsd_pageable_map);
650 		if (ret != KERN_SUCCESS) {
651 			panic("bsd_init: Failed to allocate bsd pageable map");
652 		}
653 	}
654 
655 	/*
656 	 * Initialize buffers and hash links for buffers
657 	 *
658 	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
659 	 *		happen after a credential has been associated with
660 	 *		the kernel task.
661 	 */
662 	bsd_init_kprintf("calling bsd_bufferinit\n");
663 	bsd_bufferinit();
664 
665 	/*
666 	 * Initialize the calendar.
667 	 */
668 	bsd_init_kprintf("calling IOKitInitializeTime\n");
669 	IOKitInitializeTime();
670 
671 	/* Initialize the file systems. */
672 	bsd_init_kprintf("calling vfsinit\n");
673 	vfsinit();
674 
675 #if CONFIG_PROC_UUID_POLICY
676 	/* Initial proc_uuid_policy subsystem */
677 	bsd_init_kprintf("calling proc_uuid_policy_init()\n");
678 	proc_uuid_policy_init();
679 #endif
680 
681 #if SOCKETS
682 	/* Initialize per-CPU cache allocator */
683 	mcache_init();
684 
685 	/* Initialize mbuf's. */
686 	bsd_init_kprintf("calling mbinit\n");
687 	mbinit();
688 	restricted_in_port_init();
689 #endif /* SOCKETS */
690 
691 	/*
692 	 * Initializes security event auditing.
693 	 * XXX: Should/could this occur later?
694 	 */
695 #if CONFIG_AUDIT
696 	bsd_init_kprintf("calling audit_init\n");
697 	audit_init();
698 #endif
699 
700 	/* Initialize kqueues */
701 	bsd_init_kprintf("calling knote_init\n");
702 	knote_init();
703 
704 	/* Initialize event handler */
705 	bsd_init_kprintf("calling eventhandler_init\n");
706 	eventhandler_init();
707 
708 	/* Initialize for async IO */
709 	bsd_init_kprintf("calling aio_init\n");
710 	aio_init();
711 
712 	pthread_init();
713 	/* POSIX Shm and Sem */
714 	bsd_init_kprintf("calling pshm_cache_init\n");
715 	pshm_cache_init();
716 	bsd_init_kprintf("calling psem_cache_init\n");
717 	psem_cache_init();
718 
719 	/*
720 	 * Initialize protocols.  Block reception of incoming packets
721 	 * until everything is ready.
722 	 */
723 #if NETWORKING
724 	bsd_init_kprintf("calling nwk_wq_init\n");
725 	nwk_wq_init();
726 	bsd_init_kprintf("calling dlil_init\n");
727 	dlil_init();
728 #endif /* NETWORKING */
729 #if SOCKETS
730 	bsd_init_kprintf("calling socketinit\n");
731 	socketinit();
732 	bsd_init_kprintf("calling domaininit\n");
733 	domaininit();
734 	iptap_init();
735 #if FLOW_DIVERT
736 	flow_divert_init();
737 #endif  /* FLOW_DIVERT */
738 #endif /* SOCKETS */
739 #if SKYWALK
740 	bsd_init_kprintf("calling skywalk_init\n");
741 	(void) skywalk_init();
742 #endif /* SKYWALK */
743 #if NETWORKING
744 #if NECP
745 	/* Initialize Network Extension Control Policies */
746 	necp_init();
747 #endif
748 	netagent_init();
749 #endif /* NETWORKING */
750 
751 #if defined (__x86_64__)
752 	hvg_bsd_init();
753 #endif /* DEBUG || DEVELOPMENT */
754 
755 #if CONFIG_FREEZE
756 #ifndef CONFIG_MEMORYSTATUS
757     #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
758 #endif
759 	/* Initialise background freezing */
760 	bsd_init_kprintf("calling memorystatus_freeze_init\n");
761 	memorystatus_freeze_init();
762 #endif
763 
764 #if CONFIG_MEMORYSTATUS
765 	/* Initialize kernel memory status notifications */
766 	bsd_init_kprintf("calling memorystatus_init\n");
767 	memorystatus_init();
768 #endif /* CONFIG_MEMORYSTATUS */
769 
770 	bsd_init_kprintf("calling sysctl_mib_init\n");
771 	sysctl_mib_init();
772 
773 	bsd_init_kprintf("calling bsd_autoconf\n");
774 	bsd_autoconf();
775 
776 #if CONFIG_DTRACE
777 	dtrace_postinit();
778 #endif
779 
780 	/*
781 	 * We attach the loopback interface *way* down here to ensure
782 	 * it happens after autoconf(), otherwise it becomes the
783 	 * "primary" interface.
784 	 */
785 #include <loop.h>
786 #if NLOOP > 0
787 	bsd_init_kprintf("calling loopattach\n");
788 	loopattach();                   /* XXX */
789 #endif
790 #if NGIF
791 	/* Initialize gif interface (after lo0) */
792 	gif_init();
793 #endif
794 
795 #if PFLOG
796 	/* Initialize packet filter log interface */
797 	pfloginit();
798 #endif /* PFLOG */
799 
800 #if NETHER > 0
801 	/* Register the built-in dlil ethernet interface family */
802 	bsd_init_kprintf("calling ether_family_init\n");
803 	ether_family_init();
804 #endif /* ETHER */
805 
806 #if NETWORKING
807 #if CONTENT_FILTER
808 	cfil_init();
809 #endif
810 
811 #if PACKET_MANGLER
812 	pkt_mnglr_init();
813 #endif
814 
815 	/*
816 	 * Register subsystems with kernel control handlers
817 	 */
818 	utun_register_control();
819 #if IPSEC
820 	ipsec_register_control();
821 #endif /* IPSEC */
822 	netsrc_init();
823 	nstat_init();
824 	tcp_cc_init();
825 #if MPTCP
826 	mptcp_control_register();
827 #endif /* MPTCP */
828 
829 #if REMOTE_VIF
830 	rvi_init();
831 #endif /* REMOTE_VIF */
832 
833 	/*
834 	 * The the networking stack is now initialized so it is a good time to call
835 	 * the clients that are waiting for the networking stack to be usable.
836 	 */
837 	bsd_init_kprintf("calling net_init_run\n");
838 	net_init_run();
839 #endif /* NETWORKING */
840 
841 	bsd_init_kprintf("calling inittodr\n");
842 	inittodr(0);
843 
844 	/* Mount the root file system. */
845 	while (TRUE) {
846 		int err;
847 
848 		bsd_init_kprintf("calling setconf\n");
849 		setconf();
850 #if CONFIG_NETBOOT
851 		netboot = (mountroot == netboot_mountroot);
852 #endif
853 
854 		bsd_init_kprintf("vfs_mountroot\n");
855 		if (0 == (err = vfs_mountroot())) {
856 			break;
857 		}
858 		rootdevice[0] = '\0';
859 #if CONFIG_NETBOOT
860 		if (netboot) {
861 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
862 			vc_progress_set(FALSE, 0);
863 			for (uint32_t i = 1; 1; i *= 2) {
864 				printf("bsd_init: failed to mount network root, error %d, %s\n",
865 				    err, PE_boot_args());
866 				printf("We are hanging here...\n");
867 				IOSleep(i * 60 * 1000);
868 			}
869 			/*NOTREACHED*/
870 		}
871 #endif
872 		printf("cannot mount root, errno = %d\n", err);
873 	}
874 
875 	IOSecureBSDRoot(rootdevice);
876 
877 	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
878 
879 	bsd_init_kprintf("calling VFS_ROOT\n");
880 	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
881 	if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, vfs_context_kernel())) {
882 		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
883 	}
884 	(void)vnode_ref(init_rootvnode);
885 	(void)vnode_put(init_rootvnode);
886 
887 	lck_rw_lock_exclusive(&rootvnode_rw_lock);
888 	set_rootvnode(init_rootvnode);
889 	lck_rw_unlock_exclusive(&rootvnode_rw_lock);
890 	init_rootvnode = NULLVP;  /* use rootvnode after this point */
891 
892 
893 	if (!bsd_rooted_ramdisk()) {
894 		boolean_t require_rootauth = FALSE;
895 
896 #if XNU_TARGET_OS_OSX && defined(__arm64__)
897 #if CONFIG_IMAGEBOOT
898 		/* Apple Silicon MacOS */
899 		require_rootauth = !imageboot_desired();
900 #endif // CONFIG_IMAGEBOOT
901 #elif !XNU_TARGET_OS_OSX
902 		/* Non MacOS */
903 		require_rootauth = TRUE;
904 #endif // XNU_TARGET_OS_OSX && defined(__arm64__)
905 
906 		if (require_rootauth) {
907 			/* enforce sealedness */
908 			int autherr = VNOP_IOCTL(rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
909 			if (autherr) {
910 				panic("rootvp not authenticated after mounting");
911 			}
912 		}
913 	}
914 
915 
916 #if CONFIG_NETBOOT
917 	if (netboot) {
918 		int err;
919 
920 		netboot = TRUE;
921 		/* post mount setup */
922 		if ((err = netboot_setup()) != 0) {
923 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
924 			vc_progress_set(FALSE, 0);
925 			for (uint32_t i = 1; 1; i *= 2) {
926 				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
927 				    err, PE_boot_args());
928 				printf("We are hanging here...\n");
929 				IOSleep(i * 60 * 1000);
930 			}
931 			/*NOTREACHED*/
932 		}
933 	}
934 #endif
935 
936 
937 #if CONFIG_IMAGEBOOT
938 	/*
939 	 * See if a system disk image is present. If so, mount it and
940 	 * switch the root vnode to point to it
941 	 */
942 	imageboot_type_t imageboot_type = imageboot_needed();
943 	if (netboot == FALSE && imageboot_type) {
944 		/*
945 		 * An image was found.  No turning back: we're booted
946 		 * with a kernel from the disk image.
947 		 */
948 		bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
949 		imageboot_setup(imageboot_type);
950 		IOSetImageBoot();
951 	}
952 
953 #endif /* CONFIG_IMAGEBOOT */
954 
955 	/* set initial time; all other resource data is  already zero'ed */
956 	microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);
957 
958 #if DEVFS
959 	{
960 		char mounthere[] = "/dev"; /* !const because of internal casting */
961 
962 		bsd_init_kprintf("calling devfs_kernel_mount\n");
963 		devfs_kernel_mount(mounthere);
964 	}
965 #endif /* DEVFS */
966 
967 #if CONFIG_BASESYSTEMROOT
968 #if CONFIG_IMAGEBOOT
969 	if (bsdmgroot_bootable()) {
970 		int error;
971 		bool rooted_dmg = false;
972 		bool skip_signature_check = false;
973 
974 		printf("trying to find and mount BaseSystem dmg as root volume\n");
975 #if DEVELOPMENT || DEBUG
976 		printf("(set boot-arg -nobsdmgroot to avoid this)\n");
977 #endif // DEVELOPMENT || DEBUG
978 
979 		char *dmgpath = NULL;
980 		dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK | Z_NOFAIL);
981 
982 		error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg, &skip_signature_check);
983 		if (error) {
984 			bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
985 		} else {
986 			PE_parse_boot_argn("bsdmgpath", dmgpath, sizeof(dmgpath));
987 
988 			bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
989 
990 			error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg, skip_signature_check);
991 			if (error) {
992 				bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
993 			} else {
994 				IOSetImageBoot();
995 			}
996 		}
997 		zfree(ZV_NAMEI, dmgpath);
998 	}
999 #else /* CONFIG_IMAGEBOOT */
1000 #error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
1001 #endif /* CONFIG_IMAGEBOOT */
1002 #endif /* CONFIG_BASESYSTEMROOT */
1003 
1004 	/* Initialize signal state for process 0. */
1005 	bsd_init_kprintf("calling siginit\n");
1006 	siginit(kernproc);
1007 
1008 	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1009 	bsd_utaskbootstrap();
1010 
1011 	pal_kernel_announce();
1012 
1013 	bsd_init_kprintf("calling mountroot_post_hook\n");
1014 
1015 #if XNU_TARGET_OS_OSX
1016 	/* invoke post-root-mount hook */
1017 	if (mountroot_post_hook != NULL) {
1018 		mountroot_post_hook();
1019 	}
1020 #endif
1021 
1022 #if 0 /* not yet */
1023 	consider_zone_gc(FALSE);
1024 #endif
1025 
1026 	/*
1027 	 * At this point, we consider the kernel "booted" enough to apply
1028 	 * stricter timeouts.
1029 	 */
1030 	machine_timeout_bsd_init();
1031 
1032 	bsd_init_kprintf("done\n");
1033 }
1034 
1035 void
bsdinit_task(void)1036 bsdinit_task(void)
1037 {
1038 	proc_t p = current_proc();
1039 
1040 	process_name("init", p);
1041 
1042 	/* Set up exception-to-signal reflection */
1043 	ux_handler_setup();
1044 
1045 #if CONFIG_MACF
1046 	mac_cred_label_associate_user(proc_ucred(p));
1047 #endif
1048 
1049 	vm_init_before_launchd();
1050 
1051 #if CONFIG_XNUPOST
1052 	int result = bsd_list_tests();
1053 	result = bsd_do_post();
1054 	if (result != 0) {
1055 		panic("bsd_do_post: Tests failed with result = 0x%08x", result);
1056 	}
1057 #endif
1058 
1059 	bsd_init_kprintf("bsd_do_post - done");
1060 
1061 	load_init_program(p);
1062 	lock_trace = 1;
1063 }
1064 
1065 kern_return_t
bsd_autoconf(void)1066 bsd_autoconf(void)
1067 {
1068 	kprintf("bsd_autoconf: calling kminit\n");
1069 	kminit();
1070 
1071 	/*
1072 	 * Early startup for bsd pseudodevices.
1073 	 */
1074 	{
1075 		struct pseudo_init *pi;
1076 
1077 		for (pi = pseudo_inits; pi->ps_func; pi++) {
1078 			(*pi->ps_func)(pi->ps_count);
1079 		}
1080 	}
1081 
1082 	return IOKitBSDInit();
1083 }
1084 
1085 
1086 #include <sys/disklabel.h>  /* for MAXPARTITIONS */
1087 
1088 static void
setconf(void)1089 setconf(void)
1090 {
1091 	u_int32_t       flags;
1092 	kern_return_t   err;
1093 
1094 	err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1095 	if (err) {
1096 		printf("setconf: IOFindBSDRoot returned an error (%d);"
1097 		    "setting rootdevice to 'sd0a'.\n", err);     /* XXX DEBUG TEMP */
1098 		rootdev = makedev( 6, 0 );
1099 		strlcpy(rootdevice, "sd0a", sizeof(rootdevice));
1100 		flags = 0;
1101 	}
1102 
1103 #if CONFIG_NETBOOT
1104 	if (flags & 1) {
1105 		/* network device */
1106 		mountroot = netboot_mountroot;
1107 	} else {
1108 #endif
1109 	/* otherwise have vfs determine root filesystem */
1110 	mountroot = NULL;
1111 #if CONFIG_NETBOOT
1112 }
1113 #endif
1114 }
1115 
1116 /*
1117  * Boot into the flavor of Recovery dictated by `mode`.
1118  */
1119 boolean_t
bsd_boot_to_recovery(bsd_bootfail_mode_t mode,uuid_t volume_uuid,boolean_t reboot)1120 bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1121 {
1122 	return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1123 }
1124 
1125 void
bsd_utaskbootstrap(void)1126 bsd_utaskbootstrap(void)
1127 {
1128 	thread_t thread;
1129 	struct uthread *ut;
1130 
1131 	/*
1132 	 * Clone the bootstrap process from the kernel process, without
1133 	 * inheriting either task characteristics or memory from the kernel;
1134 	 */
1135 	thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE);
1136 
1137 	/* Hold the reference as it will be dropped during shutdown */
1138 	initproc = proc_find(1);
1139 #if __PROC_INTERNAL_DEBUG
1140 	if (initproc == PROC_NULL) {
1141 		panic("bsd_utaskbootstrap: initproc not set");
1142 	}
1143 #endif
1144 
1145 	zalloc_first_proc_made();
1146 
1147 	/*
1148 	 * Since we aren't going back out the normal way to our parent,
1149 	 * we have to drop the transition locks explicitly.
1150 	 */
1151 	proc_signalend(initproc, 0);
1152 	proc_transend(initproc, 0);
1153 
1154 	ut = (struct uthread *)get_bsdthread_info(thread);
1155 	ut->uu_sigmask = 0;
1156 	act_set_astbsd(thread);
1157 	task_clear_return_wait(get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
1158 }
1159 
1160 static void
parse_bsd_args(void)1161 parse_bsd_args(void)
1162 {
1163 	char namep[48];
1164 
1165 	if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
1166 		boothowto |= RB_SINGLE;
1167 	}
1168 
1169 	if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
1170 		boothowto |= RB_SAFEBOOT;
1171 	}
1172 
1173 	if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
1174 	    sizeof(max_nbuf_headers))) {
1175 		customnbuf = 1;
1176 	}
1177 
1178 #if CONFIG_DARKBOOT
1179 	/*
1180 	 * The darkboot flag is specified by the bootloader and is stored in
1181 	 * boot_args->bootFlags. This flag is available starting revision 2.
1182 	 */
1183 	boot_args *args = (boot_args *) PE_state.bootArgs;
1184 	if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1185 		darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1186 	} else {
1187 		darkboot = 0;
1188 	}
1189 #endif
1190 
1191 #if DEVELOPMENT || DEBUG
1192 	if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1193 		if (strlen(dyld_alt_path) > 0) {
1194 			use_alt_dyld = 1;
1195 		}
1196 	}
1197 #endif /* DEVELOPMENT || DEBUG */
1198 }
1199 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, parse_bsd_args);
1200 
1201 #if CONFIG_BASESYSTEMROOT
1202 
1203 extern bool IOGetBootUUID(char *);
1204 extern bool IOGetApfsPrebootUUID(char *);
1205 
1206 
1207 // This function returns the UUID of the Preboot (and Recovery) folder associated with the
1208 // current boot volume, if applicable. The meaning of the UUID can be
1209 // filesystem-dependent and not all kinds of boots will have a UUID.
1210 // On success, the UUID is copied into the past-in parameter and TRUE is returned.
1211 // In case the current boot has no applicable Preboot UUID, FALSE is returned.
1212 static bool
get_preboot_uuid(uuid_string_t maybe_uuid_string)1213 get_preboot_uuid(uuid_string_t maybe_uuid_string)
1214 {
1215 	// try IOGetApfsPrebootUUID
1216 	if (IOGetApfsPrebootUUID(maybe_uuid_string)) {
1217 		uuid_t maybe_uuid;
1218 		int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1219 		if (error == 0) {
1220 			return true;
1221 		}
1222 	}
1223 
1224 	// try IOGetBootUUID
1225 	if (IOGetBootUUID(maybe_uuid_string)) {
1226 		uuid_t maybe_uuid;
1227 		int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1228 		if (error == 0) {
1229 			return true;
1230 		}
1231 	}
1232 
1233 	// didn't find it
1234 	return false;
1235 }
1236 
1237 #if defined(__arm64__)
1238 extern bool IOGetBootObjectsPath(char *);
1239 #endif
1240 
1241 // Find the BaseSystem.dmg to be used as the initial root volume during certain
1242 // kinds of boots.
1243 // This may mount volumes and lookup vnodes.
1244 // The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1245 // If it returns 0 (no error), then it also writes the absolute path to the
1246 // BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1247 static
1248 int
bsd_find_basesystem_dmg(char * bsdmgpath_out,bool * rooted_dmg,bool * skip_signature_check)1249 bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
1250 {
1251 	int error;
1252 	size_t len;
1253 	char *dmgbasepath;
1254 	char *dmgpath;
1255 	bool allow_rooted_dmg = false;
1256 
1257 	dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1258 	dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1259 	vnode_t imagevp = NULLVP;
1260 
1261 #if DEVELOPMENT || DEBUG
1262 	allow_rooted_dmg = true;
1263 #endif
1264 
1265 	//must provide output bool
1266 	if (rooted_dmg && skip_signature_check) {
1267 		*rooted_dmg = false;
1268 		*skip_signature_check = false;
1269 	} else {
1270 		error = EINVAL;
1271 		goto done;
1272 	}
1273 
1274 	error = vfs_mount_recovery();
1275 	if (error) {
1276 		goto done;
1277 	}
1278 
1279 	len = strlcpy(dmgbasepath, "/System/Volumes/Recovery/", MAXPATHLEN);
1280 	if (len > MAXPATHLEN) {
1281 		error = ENAMETOOLONG;
1282 		goto done;
1283 	}
1284 
1285 	if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
1286 		*skip_signature_check = true;
1287 		allow_rooted_dmg = true;
1288 	}
1289 
1290 #if defined(__arm64__)
1291 	char boot_obj_path[MAXPATHLEN] = "";
1292 
1293 	if (IOGetBootObjectsPath(boot_obj_path)) {
1294 		if (boot_obj_path[0] == '/') {
1295 			dmgbasepath[len - 1] = '\0';
1296 		}
1297 
1298 		len = strlcat(dmgbasepath, boot_obj_path, MAXPATHLEN);
1299 		if (len > MAXPATHLEN) {
1300 			error = ENAMETOOLONG;
1301 			goto done;
1302 		}
1303 
1304 		len = strlcat(dmgbasepath, "/usr/standalone/firmware/", MAXPATHLEN);
1305 		if (len > MAXPATHLEN) {
1306 			error = ENAMETOOLONG;
1307 			goto done;
1308 		}
1309 
1310 		if (allow_rooted_dmg) {
1311 			len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1312 			if (len > MAXPATHLEN) {
1313 				error = ENAMETOOLONG;
1314 				goto done;
1315 			}
1316 
1317 			len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1318 			if (len > MAXPATHLEN) {
1319 				error = ENAMETOOLONG;
1320 				goto done;
1321 			}
1322 
1323 			error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1324 			if (error == 0) {
1325 				*rooted_dmg = true;
1326 				*skip_signature_check = true;
1327 				goto done;
1328 			}
1329 			memset(dmgpath, 0, MAXPATHLEN);
1330 		}
1331 
1332 		len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1333 		if (len > MAXPATHLEN) {
1334 			error = ENAMETOOLONG;
1335 			goto done;
1336 		}
1337 
1338 		len = strlcat(dmgpath, "arm64eBaseSystem.dmg", MAXPATHLEN);
1339 		if (len > MAXPATHLEN) {
1340 			error = ENAMETOOLONG;
1341 			goto done;
1342 		}
1343 
1344 		error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1345 		if (error == 0) {
1346 			goto done;
1347 		}
1348 		memset(dmgpath, 0, MAXPATHLEN);
1349 		dmgbasepath[strlen("/System/Volumes/Recovery/")] = '\0';
1350 	}
1351 #endif // __arm64__
1352 
1353 	uuid_string_t preboot_uuid;
1354 	if (!get_preboot_uuid(preboot_uuid)) {
1355 		// no preboot? bail out
1356 		return EINVAL;
1357 	}
1358 
1359 	len = strlcat(dmgbasepath, preboot_uuid, MAXPATHLEN);
1360 	if (len > MAXPATHLEN) {
1361 		error = ENAMETOOLONG;
1362 		goto done;
1363 	}
1364 
1365 	if (allow_rooted_dmg) {
1366 		// Try BaseSystem.rooted.dmg
1367 		len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1368 		if (len > MAXPATHLEN) {
1369 			error = ENAMETOOLONG;
1370 			goto done;
1371 		}
1372 
1373 		len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
1374 		if (len > MAXPATHLEN) {
1375 			error = ENAMETOOLONG;
1376 			goto done;
1377 		}
1378 
1379 		error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1380 		if (error == 0) {
1381 			// we found it! success!
1382 			*rooted_dmg = true;
1383 			*skip_signature_check = true;
1384 			goto done;
1385 		}
1386 	}
1387 
1388 	// Try BaseSystem.dmg
1389 	len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1390 	if (len > MAXPATHLEN) {
1391 		error = ENAMETOOLONG;
1392 		goto done;
1393 	}
1394 
1395 	len = strlcat(dmgpath, "/BaseSystem.dmg", MAXPATHLEN);
1396 	if (len > MAXPATHLEN) {
1397 		error = ENAMETOOLONG;
1398 		goto done;
1399 	}
1400 
1401 	error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1402 	if (error == 0) {
1403 		// success!
1404 		goto done;
1405 	}
1406 
1407 done:
1408 	if (error == 0) {
1409 		strlcpy(bsdmgpath_out, dmgpath, MAXPATHLEN);
1410 	} else {
1411 		bsd_init_kprintf("%s: error %d\n", __func__, error);
1412 	}
1413 	if (imagevp != NULLVP) {
1414 		vnode_put(imagevp);
1415 	}
1416 	zfree(ZV_NAMEI, dmgpath);
1417 	zfree(ZV_NAMEI, dmgbasepath);
1418 	return error;
1419 }
1420 
1421 static boolean_t
bsdmgroot_bootable(void)1422 bsdmgroot_bootable(void)
1423 {
1424 #if defined(__arm64__)
1425 #define BSDMGROOT_DEFAULT true
1426 #else
1427 #define BSDMGROOT_DEFAULT false
1428 #endif
1429 
1430 	boolean_t resolved = BSDMGROOT_DEFAULT;
1431 
1432 	boolean_t boot_arg_bsdmgroot = false;
1433 	boolean_t boot_arg_nobsdmgroot = false;
1434 	int error;
1435 	mount_t mp;
1436 	boolean_t root_part_of_volume_group = false;
1437 	struct vfs_attr vfsattr;
1438 
1439 	mp = rootvnode->v_mount;
1440 	VFSATTR_INIT(&vfsattr);
1441 	VFSATTR_WANTED(&vfsattr, f_capabilities);
1442 
1443 	boot_arg_bsdmgroot = PE_parse_boot_argn("-bsdmgroot", NULL, 0);
1444 	boot_arg_nobsdmgroot = PE_parse_boot_argn("-nobsdmgroot", NULL, 0);
1445 
1446 	error = vfs_getattr(mp, &vfsattr, vfs_context_kernel());
1447 	if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1448 		if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1449 		    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1450 			root_part_of_volume_group = true;
1451 		}
1452 	}
1453 
1454 	boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1455 
1456 	// Start with the #defined default above.
1457 	// If booting to single-user mode, default to false, because single-
1458 	// user mode inside the BaseSystem is probably not what's wanted.
1459 	// If the 'yes' boot-arg is set, we'll allow that even in single-user
1460 	// mode, we'll assume you know what you're doing.
1461 	// The 'no' boot-arg overpowers the 'yes' boot-arg.
1462 	// In any case, we will not attempt to root from BaseSystem if the
1463 	// original (booter-chosen) root volume isn't in a volume group.
1464 	// This is just out of an abundance of caution: if the boot environment
1465 	// seems to be "something other than a standard install",
1466 	// we'll be conservative in messing with the root volume.
1467 
1468 	if (singleuser) {
1469 		resolved = false;
1470 	}
1471 
1472 	if (boot_arg_bsdmgroot) {
1473 		resolved = true;
1474 	}
1475 
1476 	if (boot_arg_nobsdmgroot) {
1477 		resolved = false;
1478 	}
1479 
1480 	if (!root_part_of_volume_group) {
1481 		resolved = false;
1482 	}
1483 
1484 	return resolved;
1485 }
1486 #endif // CONFIG_BASESYSTEMROOT
1487 
1488 void
bsd_exec_setup(int scale)1489 bsd_exec_setup(int scale)
1490 {
1491 	switch (scale) {
1492 	case 0:
1493 	case 1:
1494 		bsd_simul_execs = BSD_SIMUL_EXECS;
1495 		break;
1496 	case 2:
1497 	case 3:
1498 		bsd_simul_execs = 65;
1499 		break;
1500 	case 4:
1501 	case 5:
1502 		bsd_simul_execs = 129;
1503 		break;
1504 	case 6:
1505 	case 7:
1506 		bsd_simul_execs = 257;
1507 		break;
1508 	default:
1509 		bsd_simul_execs = 513;
1510 		break;
1511 	}
1512 	bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1513 }
1514 
1515 #if !CONFIG_NETBOOT
1516 int
1517 netboot_root(void);
1518 
1519 int
netboot_root(void)1520 netboot_root(void)
1521 {
1522 	return 0;
1523 }
1524 #endif
1525