xref: /xnu-12377.41.6/bsd/kern/bsd_init.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  *
29  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  * (c) UNIX System Laboratories, Inc.
32  * All or some portions of this file are derived from material licensed
33  * to the University of California by American Telephone and Telegraph
34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35  * the permission of UNIX System Laboratories, Inc.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
66  */
67 
68 /*
69  *
70  * Mach Operating System
71  * Copyright (c) 1987 Carnegie-Mellon University
72  * All rights reserved.  The CMU software License Agreement specifies
73  * the terms and conditions for use and redistribution.
74  */
75 /*
76  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77  * support for mandatory and extensible security protections.  This notice
78  * is included in support of clause 2.2 (b) of the Apple Public License,
79  * Version 2.0.
80  */
81 
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/conf.h>
91 #include <sys/buf_internal.h>
92 #include <sys/user.h>
93 #include <sys/time.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96 
97 #include <security/audit/audit.h>
98 
99 #include <sys/malloc.h>
100 #include <sys/dkstat.h>
101 #include <sys/codesign.h>
102 
103 #include <kern/startup.h>
104 #include <kern/thread.h>
105 #include <kern/task.h>
106 #include <kern/ast.h>
107 #include <kern/zalloc.h>
108 #include <kern/ux_handler.h>            /* for ux_handler_setup() */
109 #include <kern/sched_hygiene.h>
110 
111 #if (DEVELOPMENT || DEBUG)
112 #include <kern/debug.h>
113 #endif
114 
115 #include <mach/vm_param.h>
116 
117 #include <vm/vm_map_xnu.h>
118 #include <vm/vm_kern_xnu.h>
119 
120 #include <sys/reboot.h>
121 #include <dev/busvar.h>                 /* for pseudo_inits */
122 #include <sys/kdebug.h>
123 #include <sys/monotonic.h>
124 
125 #include <mach/mach_types.h>
126 #include <mach/vm_prot.h>
127 #include <mach/semaphore.h>
128 #include <mach/sync_policy.h>
129 #include <kern/clock.h>
130 #include <sys/csr.h>
131 #include <mach/kern_return.h>
132 #include <mach/thread_act.h>            /* for thread_resume() */
133 #include <sys/mcache.h>                 /* for mcache_init() */
134 #include <sys/mbuf.h>                   /* for mbinit() */
135 #include <sys/event.h>                  /* for knote_init() */
136 #include <sys/eventhandler.h>           /* for eventhandler_init() */
137 #include <sys/kern_memorystatus.h>      /* for memorystatus_init() */
138 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
139 #include <sys/aio_kern.h>               /* for aio_init() */
140 #include <sys/semaphore.h>              /* for psem_cache_init() */
141 #include <net/dlil.h>                   /* for dlil_init() */
142 #include <net/iptap.h>                  /* for iptap_init() */
143 #include <sys/socketvar.h>              /* for socketinit() */
144 #include <sys/protosw.h>                /* for domaininit() */
145 #include <kern/sched_prim.h>            /* for thread_wakeup() */
146 #include <net/if_ether.h>               /* for ether_family_init() */
147 #include <net/if_gif.h>                 /* for gif_init() */
148 #include <miscfs/devfs/devfsdefs.h>     /* for devfs_kernel_mount() */
149 #include <vm/vm_kern.h>                 /* for kmem_suballoc() */
150 #include <sys/proc_uuid_policy.h>       /* proc_uuid_policy_init() */
151 #include <netinet/flow_divert.h>        /* flow_divert_init() */
152 #include <net/content_filter.h>         /* for cfil_init() */
153 #include <net/necp.h>                   /* for necp_init() */
154 #include <net/network_agent.h>          /* for netagent_init() */
155 #include <net/packet_mangler.h>         /* for pkt_mnglr_init() */
156 #include <net/if_utun.h>                /* for utun_register_control() */
157 #include <netinet6/ipsec.h>             /* for ipsec_init() */
158 #include <net/if_redirect.h>            /* for if_redirect_init() */
159 #include <net/netsrc.h>                 /* for netsrc_init() */
160 #include <net/ntstat.h>                 /* for nstat_init() */
161 #include <netinet/mptcp_var.h>          /* for mptcp_control_register() */
162 #include <net/nwk_wq.h>                 /* for nwk_wq_init */
163 #include <net/restricted_in_port.h>     /* for restricted_in_port_init() */
164 #include <net/remote_vif.h>             /* for rvi_init() */
165 #include <net/kctl_test.h>              /* for kctl_test_init() */
166 #include <net/aop/kpi_aop.h>            /* for kern_aop_net_init() */
167 #include <netinet/kpi_ipfilter_var.h>   /* for ipfilter_init() */
168 #include <kern/assert.h>                /* for assert() */
169 #include <sys/kern_overrides.h>         /* for init_system_override() */
170 #include <sys/lockf.h>                  /* for lf_init() */
171 #include <sys/fsctl.h>
172 
173 #include <net/init.h>
174 
175 #if CONFIG_MACF
176 #include <security/mac_framework.h>
177 #include <security/mac_internal.h>      /* mac_init_bsd() */
178 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
179 #endif
180 
181 #include <machine/exec.h>
182 
183 #if CONFIG_NETBOOT
184 #include <sys/netboot.h>
185 #endif
186 
187 #if CONFIG_IMAGEBOOT
188 #include <sys/imageboot.h>
189 #endif
190 
191 #if PFLOG
192 #include <net/if_pflog.h>
193 #endif
194 
195 #if SKYWALK
196 #include <skywalk/os_skywalk_private.h>
197 #endif /* SKYWALK */
198 
199 #include <pexpert/pexpert.h>
200 #include <machine/pal_routines.h>
201 #include <console/video_console.h>
202 
203 #if CONFIG_XNUPOST
204 #include <tests/xnupost.h>
205 #endif
206 
207 void * get_user_regs(thread_t);         /* XXX kludge for <machine/thread.h> */
208 void IOKitInitializeTime(void);         /* XXX */
209 void IOSleep(unsigned int);             /* XXX */
210 void IOSetImageBoot(void);              /* XXX */
211 void loopattach(void);                  /* XXX */
212 
213 void ipc_task_enable(task_t task);
214 
215 const char *const copyright =
216     "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
217     "The Regents of the University of California. "
218     "All rights reserved.\n\n";
219 
220 /* Components of the first process -- never freed. */
221 SECURITY_READ_ONLY_LATE(struct vfs_context) vfs_context0;
222 
223 static struct plimit limit0;
224 static struct pstats pstats0;
225 SECURITY_READ_ONLY_LATE(proc_t) kernproc;
226 proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
227 
228 long tk_cancc;
229 long tk_nin;
230 long tk_nout;
231 long tk_rawcc;
232 
233 int lock_trace = 0;
234 /* Global variables to make pstat happy. We do swapping differently */
235 int nswdev, nswap;
236 int nswapmap;
237 void *swapmap;
238 struct swdevt swdevt[1];
239 
240 static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
241 LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
242 LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
243 
244 dev_t   rootdev;                /* device of the root */
245 dev_t   dumpdev;                /* device to take dumps on */
246 long    dumplo;                 /* offset into dumpdev */
247 long    hostid;
248 char    hostname[MAXHOSTNAMELEN];
249 char    domainname[MAXDOMNAMELEN];
250 char    rootdevice[DEVMAXNAMESIZE];
251 
252 struct  vnode *rootvp;
253 bool rootvp_is_ssd = false;
254 SECURITY_READ_ONLY_LATE(int) boothowto;
255 /*
256  * -minimalboot indicates that we want userspace to be bootstrapped to a
257  * minimal environment.  What constitutes minimal is up to the bootstrap
258  * process.
259  */
260 TUNABLE(int, minimalboot, "-minimalboot", 0);
261 #if CONFIG_DARKBOOT
262 int darkboot = 0;
263 #endif
264 
265 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
266 extern void IOSecureBSDRoot(const char * rootName);
267 extern kern_return_t IOKitBSDInit(void );
268 extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
269 extern void kminit(void);
270 extern void bsd_bufferinit(void);
271 extern void throttle_init(void);
272 
273 vm_map_t        bsd_pageable_map;
274 #if CONFIG_MBUF_MCACHE
275 vm_map_t        mb_map;
276 #endif /* CONFIG_MBUF_MCACHE */
277 
278 static  int bsd_simul_execs;
279 static int bsd_pageable_map_size;
280 __private_extern__ int execargs_cache_size = 0;
281 __private_extern__ int execargs_free_count = 0;
282 __private_extern__ vm_offset_t * execargs_cache = NULL;
283 
284 void bsd_exec_setup(int);
285 
286 __private_extern__ int bootarg_execfailurereports = 0;
287 
288 #if __x86_64__
289 __private_extern__ TUNABLE(int, bootarg_no32exec, "no32exec", 1);
290 #endif
291 
292 #if DEVELOPMENT || DEBUG
293 /* Prevent kernel-based ASLR from being used. */
294 __private_extern__ TUNABLE(bool, bootarg_disable_aslr, "-disable_aslr", 0);
295 #endif
296 
297 /*
298  * Allow an alternate dyld to be used for testing.
299  */
300 
301 #if DEVELOPMENT || DEBUG
302 char dyld_alt_path[MAXPATHLEN];
303 int use_alt_dyld = 0;
304 
305 char panic_on_proc_crash[NAME_MAX];
306 int use_panic_on_proc_crash = 0;
307 
308 char panic_on_proc_exit[NAME_MAX];
309 int use_panic_on_proc_exit = 0;
310 
311 char panic_on_proc_spawn_fail[NAME_MAX];
312 int use_panic_on_proc_spawn_fail = 0;
313 
314 char dyld_suffix[NAME_MAX];
315 int use_dyld_suffix = 0;
316 #endif
317 
318 #if DEVELOPMENT || DEBUG
319 __private_extern__ bool bootarg_hide_process_traced = 0;
320 #endif
321 
322 int     cmask = CMASK;
323 extern int customnbuf;
324 
325 kern_return_t bsd_autoconf(void);
326 void bsd_utaskbootstrap(void);
327 
328 #if CONFIG_DEV_KMEM
329 extern void dev_kmem_init(void);
330 #endif
331 static void process_name(const char *, proc_t);
332 
333 static void setconf(void);
334 
335 #if CONFIG_BASESYSTEMROOT
336 static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
337 static boolean_t bsdmgroot_bootable(void);
338 #endif // CONFIG_BASESYSTEMROOT
339 
340 bool bsd_rooted_ramdisk(void);
341 
342 #if SYSV_SHM
343 extern void sysv_shm_lock_init(void);
344 #endif
345 #if SYSV_SEM
346 extern void sysv_sem_lock_init(void);
347 #endif
348 #if SYSV_MSG
349 extern void sysv_msg_lock_init(void);
350 #endif
351 
352 #if CONFIG_MACF
353 #if defined (__i386__) || defined (__x86_64__)
354 /* MACF policy_check configuration flags; see policy_check.c for details */
355 extern int check_policy_init(int);
356 #endif
357 #endif  /* CONFIG_MACF */
358 
359 /* If we are using CONFIG_DTRACE */
360 #if CONFIG_DTRACE
361 extern void dtrace_postinit(void);
362 #endif
363 
364 /*
365  * Initialization code.
366  * Called from cold start routine as
367  * soon as a stack and segmentation
368  * have been established.
369  * Functions:
370  *	turn on clock
371  *	hand craft 0th process
372  *	call all initialization routines
373  *  hand craft 1st user process
374  */
375 
376 /*
377  *	Sets the name for the given task.
378  */
379 static void
process_name(const char * s,proc_t p)380 process_name(const char *s, proc_t p)
381 {
382 	strlcpy(p->p_comm, s, sizeof(p->p_comm));
383 	strlcpy(p->p_name, s, sizeof(p->p_name));
384 }
385 
386 /* To allow these values to be patched, they're globals here */
387 #include <machine/vmparam.h>
388 struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
389 struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
390 struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
391 
392 extern struct os_refgrp rlimit_refgrp;
393 
394 extern int      (*mountroot)(void);
395 
396 LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
397 LCK_GRP_DECLARE(proc_lck_grp, "proc");
398 LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
399 LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
400 LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
401 LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
402 LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
403 LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
404 LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
405 
406 
407 LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
408 
409 #if XNU_TARGET_OS_OSX
410 /* hook called after root is mounted XXX temporary hack */
411 void (*mountroot_post_hook)(void);
412 void (*unmountroot_pre_hook)(void);
413 #endif
414 void set_rootvnode(vnode_t);
415 
416 extern lck_rw_t rootvnode_rw_lock;
417 
418 SECURITY_READ_ONLY_LATE(struct mach_vm_range) bsd_pageable_range = {};
419 KMEM_RANGE_REGISTER_DYNAMIC(bsd_pageable, &bsd_pageable_range, ^() {
420 	assert(bsd_pageable_map_size != 0);
421 	return (vm_map_size_t) bsd_pageable_map_size;
422 });
423 
424 /* called with an iocount and usecount on new_rootvnode */
425 void
set_rootvnode(vnode_t new_rootvnode)426 set_rootvnode(vnode_t new_rootvnode)
427 {
428 	mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
429 	vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
430 	vnode_t old_rootvnode = rootvnode;
431 
432 	new_rootvnode->v_flag |= VROOT;
433 	rootvp = new_devvp;
434 	rootvnode = new_rootvnode;
435 	kernproc->p_fd.fd_cdir = new_rootvnode;
436 	if (new_devvp != NULL) {
437 		rootdev = vnode_specrdev(new_devvp);
438 	} else if (new_mount != NULL) {
439 		rootdev = vfs_statfs(new_mount)->f_fsid.val[0];  /* like ATTR_CMN_DEVID */
440 	} else {
441 		rootdev = NODEV;
442 	}
443 
444 	if (old_rootvnode) {
445 		vnode_rele(old_rootvnode);
446 	}
447 }
448 
449 #define RAMDEV "md0"
450 
451 bool
bsd_rooted_ramdisk(void)452 bsd_rooted_ramdisk(void)
453 {
454 	bool is_ramdisk = false;
455 	char *dev_path = zalloc(ZV_NAMEI);
456 	if (dev_path == NULL) {
457 		panic("failed to allocate devpath string!");
458 	}
459 
460 	if (PE_parse_boot_argn("rd", dev_path, MAXPATHLEN)) {
461 		if (strncmp(dev_path, RAMDEV, strlen(RAMDEV)) == 0) {
462 			is_ramdisk = true;
463 		}
464 	}
465 
466 	zfree(ZV_NAMEI, dev_path);
467 	return is_ramdisk;
468 }
469 
470 /*
471  * This function is called very early on in the Mach startup, from the
472  * function start_kernel_threads() in osfmk/kern/startup.c.  It's called
473  * in the context of the current (startup) task using a call to the
474  * function kernel_thread_create() to jump into start_kernel_threads().
475  * Internally, kernel_thread_create() calls thread_create_internal(),
476  * which calls uthread_init().  The function of uthread_init() is
477  * normally to init a uthread structure, and fill out the uu_sigmask,
478  * tro_ucred/tro_proc fields.  It skips filling these out in the case of the "task"
479  * being "kernel_task", because the order of operation is inverted.  To
480  * account for that, we need to manually fill in at least the contents
481  * of the tro_ucred field so that the uthread structure can be
482  * used like any other.
483  */
484 void
bsd_init(void)485 bsd_init(void)
486 {
487 	struct uthread *ut;
488 	vnode_t init_rootvnode = NULLVP;
489 	struct proc_ro_data kernproc_ro_data = {
490 		.p_csflags = CS_VALID,
491 	};
492 	struct task_ro_data kerntask_ro_data = { };
493 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
494 	boolean_t       netboot = FALSE;
495 #endif
496 
497 #if HAS_UPSI_FAILURE_INJECTION
498 	check_for_failure_injection(XNU_STAGE_BSD_INIT_START);
499 #endif
500 
501 #define DEBUG_BSDINIT 0
502 
503 #if DEBUG_BSDINIT
504 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
505 #else
506 #define bsd_init_kprintf(x, ...)
507 #endif
508 
509 	throttle_init();
510 
511 	printf(copyright);
512 
513 #if CONFIG_DEV_KMEM
514 	bsd_init_kprintf("calling dev_kmem_init\n");
515 	dev_kmem_init();
516 #endif
517 
518 	/* Initialize kauth subsystem before instancing the first credential */
519 	bsd_init_kprintf("calling kauth_init\n");
520 	kauth_init();
521 
522 	/* kernel_task->proc = kernproc; */
523 	set_bsdtask_info(kernel_task, (void *)kernproc);
524 
525 	/* Set the parent of kernproc to itself */
526 	kernproc->p_pptr = kernproc;
527 
528 	/* Set the state to SRUN */
529 	kernproc->p_stat = SRUN;
530 
531 	/* Set the proc flags */
532 #if defined(__LP64__)
533 	kernproc->p_flag = P_SYSTEM | P_LP64;
534 #else
535 	kernproc->p_flag = P_SYSTEM;
536 #endif
537 
538 	kernproc->p_nice = NZERO;
539 	TAILQ_INIT(&kernproc->p_uthlist);
540 
541 	/* set the cred */
542 	kauth_cred_set(&kernproc_ro_data.p_ucred.__smr_ptr, vfs_context0.vc_ucred);
543 	kernproc->p_proc_ro = proc_ro_alloc(kernproc, &kernproc_ro_data,
544 	    kernel_task, &kerntask_ro_data);
545 
546 	/* give kernproc a name */
547 	bsd_init_kprintf("calling process_name\n");
548 	process_name("kernel_task", kernproc);
549 
550 	/* Allocate proc lock attribute */
551 
552 	lck_mtx_init(&kernproc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
553 	lck_mtx_init(&kernproc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
554 #if CONFIG_AUDIT
555 	lck_mtx_init(&kernproc->p_audit_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
556 #endif /* CONFIG_AUDIT */
557 	lck_spin_init(&kernproc->p_slock, &proc_slock_grp, &proc_lck_attr);
558 
559 	/* Init the file descriptor table. */
560 	fdt_init(kernproc);
561 	kernproc->p_fd.fd_cmask = (mode_t)cmask;
562 
563 	assert(bsd_simul_execs != 0);
564 	execargs_cache_size = bsd_simul_execs;
565 	execargs_free_count = bsd_simul_execs;
566 	execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
567 	    ZALIGN(vm_offset_t));
568 
569 	if (current_task() != kernel_task) {
570 		printf("bsd_init: We have a problem, "
571 		    "current task is not kernel task\n");
572 	}
573 
574 	bsd_init_kprintf("calling get_bsdthread_info\n");
575 	ut = current_uthread();
576 
577 #if CONFIG_MACF
578 	/*
579 	 * Initialize the MAC Framework
580 	 */
581 	mac_policy_initbsd();
582 
583 #if defined (__i386__) || defined (__x86_64__)
584 	/*
585 	 * We currently only support this on i386/x86_64, as that is the
586 	 * only lock code we have instrumented so far.
587 	 */
588 	int policy_check_flags;
589 	PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
590 	check_policy_init(policy_check_flags);
591 #endif
592 #endif /* MAC */
593 
594 	/*
595 	 * Make a session and group
596 	 *
597 	 * No need to hold the pgrp lock,
598 	 * there are no other BSD threads yet.
599 	 */
600 	struct session *session0 = session_alloc(kernproc);
601 	struct pgrp *pgrp0 = pgrp_alloc(0, PGRP_REF_NONE);
602 	session0->s_ttypgrpid = 0;
603 	pgrp0->pg_session = session0;
604 
605 	/*
606 	 * Create process 0.
607 	 */
608 	proc_list_lock();
609 	os_ref_init_mask(&kernproc->p_refcount, P_REF_BITS, &p_refgrp, P_REF_NONE);
610 	os_ref_init_raw(&kernproc->p_waitref, &p_refgrp);
611 	proc_ref_hold_proc_task_struct(kernproc);
612 
613 	/*
614 	 * Make a group and session, then simulate pinsertchild(),
615 	 * adjusted for the kernel.
616 	 */
617 	pghash_insert_locked(pgrp0);
618 
619 	LIST_INSERT_HEAD(&pgrp0->pg_members, kernproc, p_pglist);
620 	smr_init_store(&kernproc->p_pgrp, pgrp0);
621 	LIST_INSERT_HEAD(&allproc, kernproc, p_list);
622 
623 	LIST_INSERT_HEAD(SESSHASH(0), session0, s_hash);
624 	proc_list_unlock();
625 
626 	proc_set_task(kernproc, kernel_task);
627 
628 #if DEVELOPMENT || DEBUG
629 	if (bootarg_disable_aslr) {
630 		kernproc->p_flag |= P_DISABLE_ASLR;
631 	}
632 #endif
633 
634 	TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
635 
636 	/*
637 	 * Officially associate the kernel with vfs_context0.vc_ucred.
638 	 */
639 #if CONFIG_MACF
640 	mac_cred_label_associate_kernel(vfs_context0.vc_ucred);
641 #endif
642 	proc_update_creds_onproc(kernproc, vfs_context0.vc_ucred);
643 
644 	TAILQ_INIT(&kernproc->p_aio_activeq);
645 	TAILQ_INIT(&kernproc->p_aio_doneq);
646 	kernproc->p_aio_total_count = 0;
647 
648 	/* Create the limits structures. */
649 	for (uint32_t i = 0; i < ARRAY_COUNT(limit0.pl_rlimit); i++) {
650 		limit0.pl_rlimit[i].rlim_cur =
651 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
652 	}
653 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
654 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
655 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
656 	limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
657 	limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
658 	limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
659 	os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
660 
661 	smr_init_store(&kernproc->p_limit, &limit0);
662 	kernproc->p_stats = &pstats0;
663 	kernproc->p_subsystem_root_path = NULL;
664 
665 	/*
666 	 * Charge root for one process: launchd.
667 	 */
668 	bsd_init_kprintf("calling chgproccnt\n");
669 	(void)chgproccnt(0, 1);
670 
671 	/*
672 	 *	Allocate a kernel submap for pageable memory
673 	 *	for temporary copying (execve()).
674 	 */
675 	bsd_init_kprintf("calling kmem_suballoc\n");
676 	bsd_pageable_map = kmem_suballoc(kernel_map,
677 	    &bsd_pageable_range.min_address,
678 	    (vm_size_t)bsd_pageable_map_size,
679 	    VM_MAP_CREATE_PAGEABLE,
680 	    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
681 	    KMS_PERMANENT | KMS_NOFAIL,
682 	    VM_KERN_MEMORY_BSD).kmr_submap;
683 
684 	/*
685 	 * Initialize buffers and hash links for buffers
686 	 *
687 	 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
688 	 *		happen after a credential has been associated with
689 	 *		the kernel task.
690 	 */
691 	bsd_init_kprintf("calling bsd_bufferinit\n");
692 	bsd_bufferinit();
693 
694 	/*
695 	 * Initialize the calendar.
696 	 */
697 	bsd_init_kprintf("calling IOKitInitializeTime\n");
698 	IOKitInitializeTime();
699 
700 	/* Initialize the file systems. */
701 	bsd_init_kprintf("calling vfsinit\n");
702 	vfsinit();
703 
704 #if CONFIG_PROC_UUID_POLICY
705 	/* Initial proc_uuid_policy subsystem */
706 	bsd_init_kprintf("calling proc_uuid_policy_init()\n");
707 	proc_uuid_policy_init();
708 #endif
709 
710 #if SOCKETS
711 	net_update_uptime();
712 #if CONFIG_MBUF_MCACHE
713 	/* Initialize per-CPU cache allocator */
714 	mcache_init();
715 #endif /* CONFIG_MBUF_MCACHE */
716 
717 	/* Initialize mbuf's. */
718 	bsd_init_kprintf("calling mbinit\n");
719 	mbinit();
720 	restricted_in_port_init();
721 #endif /* SOCKETS */
722 
723 	/*
724 	 * Initializes security event auditing.
725 	 * XXX: Should/could this occur later?
726 	 */
727 #if CONFIG_AUDIT
728 	bsd_init_kprintf("calling audit_init\n");
729 	audit_init();
730 #endif
731 
732 	/* Initialize kqueues */
733 	bsd_init_kprintf("calling knote_init\n");
734 	knote_init();
735 
736 	/* Initialize event handler */
737 	bsd_init_kprintf("calling eventhandler_init\n");
738 	eventhandler_init();
739 
740 	/* Initialize for async IO */
741 	bsd_init_kprintf("calling aio_init\n");
742 	aio_init();
743 
744 	pthread_init();
745 	/* POSIX Shm and Sem */
746 	bsd_init_kprintf("calling pshm_cache_init\n");
747 	pshm_cache_init();
748 	bsd_init_kprintf("calling psem_cache_init\n");
749 	psem_cache_init();
750 
751 	/*
752 	 * Initialize protocols.  Block reception of incoming packets
753 	 * until everything is ready.
754 	 */
755 #if NETWORKING
756 	bsd_init_kprintf("calling nwk_wq_init\n");
757 	nwk_wq_init();
758 	bsd_init_kprintf("calling dlil_init\n");
759 	dlil_init();
760 #endif /* NETWORKING */
761 #if SOCKETS
762 	bsd_init_kprintf("calling socketinit\n");
763 	socketinit();
764 	bsd_init_kprintf("calling domaininit\n");
765 	domaininit();
766 	iptap_init();
767 #if FLOW_DIVERT
768 	flow_divert_init();
769 #endif  /* FLOW_DIVERT */
770 #endif /* SOCKETS */
771 #if SKYWALK
772 	bsd_init_kprintf("calling skywalk_init\n");
773 	(void) skywalk_init();
774 #endif /* SKYWALK */
775 #if NETWORKING
776 #if NECP
777 	/* Initialize Network Extension Control Policies */
778 	necp_init();
779 #endif
780 	netagent_init();
781 	net_aop_init();
782 #endif /* NETWORKING */
783 
784 #if CONFIG_FREEZE
785 #ifndef CONFIG_MEMORYSTATUS
786     #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
787 #endif
788 	/* Initialise background freezing */
789 	bsd_init_kprintf("calling memorystatus_freeze_init\n");
790 	memorystatus_freeze_init();
791 #endif
792 
793 #if CONFIG_MEMORYSTATUS
794 	/* Initialize kernel memory status notifications */
795 	bsd_init_kprintf("calling memorystatus_init\n");
796 	memorystatus_init();
797 
798 	/* Fixup memorystatus fields of the kernel process (only for logging purposes) */
799 	kernproc->p_memstat_state |= P_MEMSTAT_INTERNAL;
800 	kernproc->p_memstat_effectivepriority = JETSAM_PRIORITY_INTERNAL;
801 	kernproc->p_memstat_requestedpriority = JETSAM_PRIORITY_INTERNAL;
802 #endif /* CONFIG_MEMORYSTATUS */
803 
804 	bsd_init_kprintf("calling sysctl_mib_init\n");
805 	sysctl_mib_init();
806 
807 	bsd_init_kprintf("calling bsd_autoconf\n");
808 	bsd_autoconf();
809 
810 #if CONFIG_DTRACE
811 	dtrace_postinit();
812 #endif
813 
814 	/*
815 	 * We attach the loopback interface *way* down here to ensure
816 	 * it happens after autoconf(), otherwise it becomes the
817 	 * "primary" interface.
818 	 */
819 #include <loop.h>
820 #if NLOOP > 0
821 	bsd_init_kprintf("calling loopattach\n");
822 	loopattach();                   /* XXX */
823 #endif
824 #if NGIF
825 	/* Initialize gif interface (after lo0) */
826 	gif_init();
827 #endif
828 
829 #if PFLOG
830 	/* Initialize packet filter log interface */
831 	pfloginit();
832 #endif /* PFLOG */
833 
834 #if NETHER > 0
835 	/* Register the built-in dlil ethernet interface family */
836 	bsd_init_kprintf("calling ether_family_init\n");
837 	ether_family_init();
838 #endif /* ETHER */
839 
840 #if NETWORKING
841 #if CONTENT_FILTER
842 	cfil_init();
843 #endif
844 
845 #if PACKET_MANGLER
846 	pkt_mnglr_init();
847 #endif
848 
849 	/*
850 	 * Register subsystems with kernel control handlers
851 	 */
852 	utun_register_control();
853 #if IPSEC
854 	ipsec_init();
855 #endif /* IPSEC */
856 	netsrc_init();
857 	nstat_init();
858 #if MPTCP
859 	mptcp_control_register();
860 #endif /* MPTCP */
861 
862 #if REMOTE_VIF
863 	rvi_init();
864 #endif /* REMOTE_VIF */
865 
866 #if IF_REDIRECT
867 	if_redirect_init();
868 #endif /* REDIRECT */
869 
870 #if KCTL_TEST
871 	kctl_test_init();
872 #endif /* KCTL_TEST */
873 
874 	/*
875 	 * The the networking stack is now initialized so it is a good time to call
876 	 * the clients that are waiting for the networking stack to be usable.
877 	 */
878 	bsd_init_kprintf("calling net_init_run\n");
879 	net_init_run();
880 #endif /* NETWORKING */
881 
882 	bsd_init_kprintf("calling inittodr\n");
883 	inittodr(0);
884 
885 	/* Mount the root file system. */
886 	while (TRUE) {
887 		int err;
888 
889 		bsd_init_kprintf("calling setconf\n");
890 		setconf();
891 #if CONFIG_NETBOOT
892 		netboot = (mountroot == netboot_mountroot);
893 #endif
894 
895 		bsd_init_kprintf("vfs_mountroot\n");
896 		if (0 == (err = vfs_mountroot())) {
897 			break;
898 		}
899 		rootdevice[0] = '\0';
900 #if CONFIG_NETBOOT
901 		if (netboot) {
902 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
903 			vc_progress_set(FALSE, 0);
904 			for (uint32_t i = 1; 1; i *= 2) {
905 				printf("bsd_init: failed to mount network root, error %d, %s\n",
906 				    err, PE_boot_args());
907 				printf("We are hanging here...\n");
908 				IOSleep(i * 60 * 1000);
909 			}
910 			/*NOTREACHED*/
911 		}
912 #endif
913 		printf("cannot mount root, errno = %d\n", err);
914 	}
915 
916 	IOSecureBSDRoot(rootdevice);
917 
918 	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
919 
920 	bsd_init_kprintf("calling VFS_ROOT\n");
921 	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
922 	if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, vfs_context_kernel())) {
923 		panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
924 	}
925 	(void)vnode_ref(init_rootvnode);
926 	(void)vnode_put(init_rootvnode);
927 
928 	lck_rw_lock_exclusive(&rootvnode_rw_lock);
929 	set_rootvnode(init_rootvnode);
930 	lck_rw_unlock_exclusive(&rootvnode_rw_lock);
931 	init_rootvnode = NULLVP;  /* use rootvnode after this point */
932 
933 
934 	if (!bsd_rooted_ramdisk()) {
935 		boolean_t require_rootauth = FALSE;
936 
937 #if XNU_TARGET_OS_OSX && defined(__arm64__)
938 #if CONFIG_IMAGEBOOT
939 		/* Apple Silicon MacOS */
940 		require_rootauth = !imageboot_desired();
941 #endif // CONFIG_IMAGEBOOT
942 #elif !XNU_TARGET_OS_OSX
943 		/* Non MacOS */
944 		require_rootauth = TRUE;
945 #endif // XNU_TARGET_OS_OSX && defined(__arm64__)
946 
947 		if (require_rootauth) {
948 			/* enforce sealedness */
949 			int autherr = VNOP_IOCTL(rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
950 			if (autherr) {
951 				panic("rootvp not authenticated after mounting");
952 			}
953 		}
954 	}
955 
956 
957 #if CONFIG_NETBOOT
958 	if (netboot) {
959 		int err;
960 
961 		netboot = TRUE;
962 		/* post mount setup */
963 		if ((err = netboot_setup()) != 0) {
964 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
965 			vc_progress_set(FALSE, 0);
966 			for (uint32_t i = 1; 1; i *= 2) {
967 				printf("bsd_init: NetBoot could not find root, error %d: %s\n",
968 				    err, PE_boot_args());
969 				printf("We are hanging here...\n");
970 				IOSleep(i * 60 * 1000);
971 			}
972 			/*NOTREACHED*/
973 		}
974 	}
975 #endif
976 
977 
978 #if CONFIG_IMAGEBOOT
979 	/*
980 	 * See if a system disk image is present. If so, mount it and
981 	 * switch the root vnode to point to it
982 	 */
983 	imageboot_type_t imageboot_type = imageboot_needed();
984 	if (netboot == FALSE && imageboot_type) {
985 		/*
986 		 * An image was found.  No turning back: we're booted
987 		 * with a kernel from the disk image.
988 		 */
989 		bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
990 		imageboot_setup(imageboot_type);
991 		IOSetImageBoot();
992 	}
993 
994 #endif /* CONFIG_IMAGEBOOT */
995 
996 	/* set initial time; all other resource data is  already zero'ed */
997 	microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);
998 
999 #if DEVFS
1000 	{
1001 		char mounthere[] = "/dev"; /* !const because of internal casting */
1002 
1003 		bsd_init_kprintf("calling devfs_kernel_mount\n");
1004 		devfs_kernel_mount(mounthere);
1005 	}
1006 #endif /* DEVFS */
1007 
1008 #if CONFIG_BASESYSTEMROOT
1009 #if CONFIG_IMAGEBOOT
1010 	if (bsdmgroot_bootable()) {
1011 		int error;
1012 		bool rooted_dmg = false;
1013 		bool skip_signature_check = false;
1014 
1015 		printf("trying to find and mount BaseSystem dmg as root volume\n");
1016 #if DEVELOPMENT || DEBUG
1017 		printf("(set boot-arg -nobsdmgroot to avoid this)\n");
1018 #endif // DEVELOPMENT || DEBUG
1019 
1020 		char *dmgpath = NULL;
1021 		dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK | Z_NOFAIL);
1022 
1023 		error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg, &skip_signature_check);
1024 		if (error) {
1025 			bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
1026 		} else {
1027 			PE_parse_boot_argn("bsdmgpath", dmgpath, sizeof(dmgpath));
1028 
1029 			bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
1030 
1031 			error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg, skip_signature_check);
1032 			if (error) {
1033 				bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
1034 			} else {
1035 				IOSetImageBoot();
1036 			}
1037 		}
1038 		zfree(ZV_NAMEI, dmgpath);
1039 	}
1040 #else /* CONFIG_IMAGEBOOT */
1041 #error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
1042 #endif /* CONFIG_IMAGEBOOT */
1043 #endif /* CONFIG_BASESYSTEMROOT */
1044 
1045 	/* Initialize signal state for process 0. */
1046 	bsd_init_kprintf("calling siginit\n");
1047 	siginit(kernproc);
1048 
1049 	bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1050 	bsd_utaskbootstrap();
1051 
1052 	pal_kernel_announce();
1053 
1054 	bsd_init_kprintf("calling mountroot_post_hook\n");
1055 
1056 #if XNU_TARGET_OS_OSX
1057 	/* invoke post-root-mount hook */
1058 	if (mountroot_post_hook != NULL) {
1059 		mountroot_post_hook();
1060 	}
1061 #endif
1062 
1063 #if 0 /* not yet */
1064 	consider_zone_gc(FALSE);
1065 #endif
1066 
1067 #if DEVELOPMENT || DEBUG
1068 	/*
1069 	 * At this point, we consider the kernel "booted" enough to apply
1070 	 * stricter timeouts. Only used for debug timeouts.
1071 	 */
1072 	machine_timeout_bsd_init();
1073 #endif /* DEVELOPMENT || DEBUG */
1074 
1075 #if HAS_UPSI_FAILURE_INJECTION
1076 	check_for_failure_injection(XNU_STAGE_BSD_INIT_END);
1077 #endif
1078 
1079 	bsd_init_kprintf("done\n");
1080 }
1081 
1082 void
bsdinit_task(void)1083 bsdinit_task(void)
1084 {
1085 	proc_t p = current_proc();
1086 
1087 	process_name("init", p);
1088 
1089 	/* Set up exception-to-signal reflection */
1090 	ux_handler_setup();
1091 
1092 #if CONFIG_MACF
1093 	mac_cred_label_associate_user(proc_ucred_unsafe(p)); /* in init */
1094 #endif
1095 
1096 	vm_init_before_launchd();
1097 
1098 #if CONFIG_XNUPOST
1099 	int result = bsd_list_tests();
1100 	result = bsd_do_post();
1101 	if (result != 0) {
1102 		panic("bsd_do_post: Tests failed with result = 0x%08x", result);
1103 	}
1104 #endif
1105 
1106 	bsd_init_kprintf("bsd_do_post - done");
1107 
1108 	load_init_program(p);
1109 	lock_trace = 1;
1110 }
1111 
1112 kern_return_t
bsd_autoconf(void)1113 bsd_autoconf(void)
1114 {
1115 	kprintf("bsd_autoconf: calling kminit\n");
1116 	kminit();
1117 
1118 	/*
1119 	 * Early startup for bsd pseudodevices.
1120 	 */
1121 	{
1122 		struct pseudo_init *pi;
1123 
1124 		for (pi = pseudo_inits; pi->ps_func; pi++) {
1125 			(*pi->ps_func)(pi->ps_count);
1126 		}
1127 	}
1128 
1129 	return IOKitBSDInit();
1130 }
1131 
1132 
1133 #include <sys/disklabel.h>  /* for MAXPARTITIONS */
1134 
1135 static void
setconf(void)1136 setconf(void)
1137 {
1138 	u_int32_t       flags;
1139 	kern_return_t   err;
1140 
1141 	err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1142 	if (err) {
1143 		printf("setconf: IOFindBSDRoot returned an error (%d);"
1144 		    "setting rootdevice to 'sd0a'.\n", err);     /* XXX DEBUG TEMP */
1145 		rootdev = makedev( 6, 0 );
1146 		strlcpy(rootdevice, "sd0a", sizeof(rootdevice));
1147 		flags = 0;
1148 	}
1149 
1150 #if CONFIG_NETBOOT
1151 	if (flags & 1) {
1152 		/* network device */
1153 		mountroot = netboot_mountroot;
1154 	} else {
1155 #endif
1156 	/* otherwise have vfs determine root filesystem */
1157 	mountroot = NULL;
1158 #if CONFIG_NETBOOT
1159 }
1160 #endif
1161 }
1162 
1163 /*
1164  * Boot into the flavor of Recovery dictated by `mode`.
1165  */
1166 boolean_t
bsd_boot_to_recovery(bsd_bootfail_mode_t mode,uuid_t volume_uuid,boolean_t reboot)1167 bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1168 {
1169 	return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1170 }
1171 
1172 void
bsd_utaskbootstrap(void)1173 bsd_utaskbootstrap(void)
1174 {
1175 	thread_t thread;
1176 	struct uthread *ut;
1177 
1178 	/*
1179 	 * Clone the bootstrap process from the kernel process, without
1180 	 * inheriting either task characteristics or memory from the kernel;
1181 	 */
1182 	thread = cloneproc(TASK_NULL, NULL, kernproc, CLONEPROC_INITPROC);
1183 
1184 	/* Hold the reference as it will be dropped during shutdown */
1185 	initproc = proc_find(1);
1186 #if __PROC_INTERNAL_DEBUG
1187 	if (initproc == PROC_NULL) {
1188 		panic("bsd_utaskbootstrap: initproc not set");
1189 	}
1190 #endif
1191 
1192 	zalloc_first_proc_made();
1193 
1194 	/*
1195 	 * Since we aren't going back out the normal way to our parent,
1196 	 * we have to drop the transition locks explicitly.
1197 	 */
1198 	proc_signalend(initproc, 0);
1199 	proc_transend(initproc, 0);
1200 
1201 	ut = (struct uthread *)get_bsdthread_info(thread);
1202 	ut->uu_sigmask = 0;
1203 	act_set_astbsd(thread);
1204 
1205 	task_t task = get_threadtask(thread);
1206 	vm_map_setup(get_task_map(task), task);
1207 	task_set_ctrl_port_default(task, thread);
1208 	ipc_task_enable(task);
1209 
1210 	task_clear_return_wait(task, TCRW_CLEAR_ALL_WAIT);
1211 }
1212 
1213 static void
parse_bsd_args(void)1214 parse_bsd_args(void)
1215 {
1216 	char namep[48];
1217 
1218 	if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
1219 		boothowto |= RB_SINGLE;
1220 	}
1221 
1222 	if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
1223 		boothowto |= RB_SAFEBOOT;
1224 	}
1225 
1226 	if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
1227 	    sizeof(max_nbuf_headers))) {
1228 		customnbuf = 1;
1229 	}
1230 
1231 #if CONFIG_DARKBOOT
1232 	/*
1233 	 * The darkboot flag is specified by the bootloader and is stored in
1234 	 * boot_args->bootFlags. This flag is available starting revision 2.
1235 	 */
1236 	boot_args *args = (boot_args *) PE_state.bootArgs;
1237 	if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1238 		darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1239 	} else {
1240 		darkboot = 0;
1241 	}
1242 #endif
1243 
1244 #if DEVELOPMENT || DEBUG
1245 	if (PE_parse_boot_argn("dyldsuffix", dyld_suffix, sizeof(dyld_suffix))) {
1246 		if (strlen(dyld_suffix) > 0) {
1247 			use_dyld_suffix = 1;
1248 		}
1249 	}
1250 
1251 	if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1252 		if (strlen(dyld_alt_path) > 0) {
1253 			use_alt_dyld = 1;
1254 		}
1255 	}
1256 
1257 	if (PE_parse_boot_arg_str("panic-on-proc-crash", panic_on_proc_crash, sizeof(panic_on_proc_crash))) {
1258 		if (strlen(panic_on_proc_crash) > 0) {
1259 			use_panic_on_proc_crash = 1;
1260 		}
1261 	}
1262 
1263 	if (PE_parse_boot_arg_str("panic-on-proc-exit", panic_on_proc_exit, sizeof(panic_on_proc_exit))) {
1264 		if (strlen(panic_on_proc_exit) > 0) {
1265 			use_panic_on_proc_exit = 1;
1266 		}
1267 	}
1268 
1269 	if (PE_parse_boot_arg_str("panic-on-proc-spawn-fail", panic_on_proc_spawn_fail, sizeof(panic_on_proc_spawn_fail))) {
1270 		if (strlen(panic_on_proc_spawn_fail) > 0) {
1271 			use_panic_on_proc_spawn_fail = 1;
1272 		}
1273 	}
1274 
1275 	if (PE_i_can_has_debugger(NULL) && PE_parse_boot_argn("-hide_process_traced", namep, sizeof(namep))) {
1276 		bootarg_hide_process_traced = 1;
1277 	}
1278 #endif /* DEVELOPMENT || DEBUG */
1279 }
1280 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, parse_bsd_args);
1281 
1282 #if CONFIG_BASESYSTEMROOT
1283 
1284 extern bool IOGetBootUUID(char *);
1285 extern bool IOGetApfsPrebootUUID(char *);
1286 
1287 
1288 // This function returns the UUID of the Preboot (and Recovery) folder associated with the
1289 // current boot volume, if applicable. The meaning of the UUID can be
1290 // filesystem-dependent and not all kinds of boots will have a UUID.
1291 // On success, the UUID is copied into the past-in parameter and TRUE is returned.
1292 // In case the current boot has no applicable Preboot UUID, FALSE is returned.
1293 static bool
get_preboot_uuid(uuid_string_t maybe_uuid_string)1294 get_preboot_uuid(uuid_string_t maybe_uuid_string)
1295 {
1296 	// try IOGetApfsPrebootUUID
1297 	if (IOGetApfsPrebootUUID(maybe_uuid_string)) {
1298 		uuid_t maybe_uuid;
1299 		int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1300 		if (error == 0) {
1301 			return true;
1302 		}
1303 	}
1304 
1305 	// try IOGetBootUUID
1306 	if (IOGetBootUUID(maybe_uuid_string)) {
1307 		uuid_t maybe_uuid;
1308 		int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1309 		if (error == 0) {
1310 			return true;
1311 		}
1312 	}
1313 
1314 	// didn't find it
1315 	return false;
1316 }
1317 
1318 #if defined(__arm64__)
1319 extern bool IOGetBootObjectsPath(char *);
1320 #endif
1321 
1322 // Find the BaseSystem.dmg to be used as the initial root volume during certain
1323 // kinds of boots.
1324 // This may mount volumes and lookup vnodes.
1325 // The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1326 // If it returns 0 (no error), then it also writes the absolute path to the
1327 // BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1328 static
1329 int
bsd_find_basesystem_dmg(char * bsdmgpath_out,bool * rooted_dmg,bool * skip_signature_check)1330 bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
1331 {
1332 	int error;
1333 	size_t len;
1334 	char *dmgbasepath;
1335 	char *dmgpath;
1336 	bool allow_rooted_dmg = false;
1337 
1338 	dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1339 	dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1340 	vnode_t imagevp = NULLVP;
1341 
1342 #if DEVELOPMENT || DEBUG
1343 	allow_rooted_dmg = true;
1344 #endif
1345 
1346 	//must provide output bool
1347 	if (rooted_dmg && skip_signature_check) {
1348 		*rooted_dmg = false;
1349 		*skip_signature_check = false;
1350 	} else {
1351 		error = EINVAL;
1352 		goto done;
1353 	}
1354 
1355 	error = vfs_mount_recovery();
1356 	if (error) {
1357 		goto done;
1358 	}
1359 
1360 	len = strlcpy(dmgbasepath, "/System/Volumes/Recovery/", MAXPATHLEN);
1361 	if (len > MAXPATHLEN) {
1362 		error = ENAMETOOLONG;
1363 		goto done;
1364 	}
1365 
1366 	if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
1367 		*skip_signature_check = true;
1368 		allow_rooted_dmg = true;
1369 	}
1370 
1371 #if defined(__arm64__)
1372 	char boot_obj_path[MAXPATHLEN] = "";
1373 
1374 	if (IOGetBootObjectsPath(boot_obj_path)) {
1375 		if (boot_obj_path[0] == '/') {
1376 			dmgbasepath[len - 1] = '\0';
1377 		}
1378 
1379 		len = strlcat(dmgbasepath, boot_obj_path, MAXPATHLEN);
1380 		if (len > MAXPATHLEN) {
1381 			error = ENAMETOOLONG;
1382 			goto done;
1383 		}
1384 
1385 		len = strlcat(dmgbasepath, "/usr/standalone/firmware/", MAXPATHLEN);
1386 		if (len > MAXPATHLEN) {
1387 			error = ENAMETOOLONG;
1388 			goto done;
1389 		}
1390 
1391 		if (allow_rooted_dmg) {
1392 			len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1393 			if (len > MAXPATHLEN) {
1394 				error = ENAMETOOLONG;
1395 				goto done;
1396 			}
1397 
1398 			len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1399 			if (len > MAXPATHLEN) {
1400 				error = ENAMETOOLONG;
1401 				goto done;
1402 			}
1403 
1404 			error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1405 			if (error == 0) {
1406 				*rooted_dmg = true;
1407 				*skip_signature_check = true;
1408 				goto done;
1409 			}
1410 			memset(dmgpath, 0, MAXPATHLEN);
1411 		}
1412 
1413 		len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1414 		if (len > MAXPATHLEN) {
1415 			error = ENAMETOOLONG;
1416 			goto done;
1417 		}
1418 
1419 		len = strlcat(dmgpath, "arm64eBaseSystem.dmg", MAXPATHLEN);
1420 		if (len > MAXPATHLEN) {
1421 			error = ENAMETOOLONG;
1422 			goto done;
1423 		}
1424 
1425 		error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1426 		if (error == 0) {
1427 			goto done;
1428 		}
1429 		memset(dmgpath, 0, MAXPATHLEN);
1430 		dmgbasepath[strlen("/System/Volumes/Recovery/")] = '\0';
1431 	}
1432 #endif // __arm64__
1433 
1434 	uuid_string_t preboot_uuid;
1435 	if (!get_preboot_uuid(preboot_uuid)) {
1436 		// no preboot? bail out
1437 		return EINVAL;
1438 	}
1439 
1440 	len = strlcat(dmgbasepath, preboot_uuid, MAXPATHLEN);
1441 	if (len > MAXPATHLEN) {
1442 		error = ENAMETOOLONG;
1443 		goto done;
1444 	}
1445 
1446 	if (allow_rooted_dmg) {
1447 		// Try BaseSystem.rooted.dmg
1448 		len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1449 		if (len > MAXPATHLEN) {
1450 			error = ENAMETOOLONG;
1451 			goto done;
1452 		}
1453 
1454 		len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
1455 		if (len > MAXPATHLEN) {
1456 			error = ENAMETOOLONG;
1457 			goto done;
1458 		}
1459 
1460 		error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1461 		if (error == 0) {
1462 			// we found it! success!
1463 			*rooted_dmg = true;
1464 			*skip_signature_check = true;
1465 			goto done;
1466 		}
1467 	}
1468 
1469 	// Try BaseSystem.dmg
1470 	len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1471 	if (len > MAXPATHLEN) {
1472 		error = ENAMETOOLONG;
1473 		goto done;
1474 	}
1475 
1476 	len = strlcat(dmgpath, "/BaseSystem.dmg", MAXPATHLEN);
1477 	if (len > MAXPATHLEN) {
1478 		error = ENAMETOOLONG;
1479 		goto done;
1480 	}
1481 
1482 	error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1483 	if (error == 0) {
1484 		// success!
1485 		goto done;
1486 	}
1487 
1488 done:
1489 	if (error == 0) {
1490 		strlcpy(bsdmgpath_out, dmgpath, MAXPATHLEN);
1491 	} else {
1492 		bsd_init_kprintf("%s: error %d\n", __func__, error);
1493 	}
1494 	if (imagevp != NULLVP) {
1495 		vnode_put(imagevp);
1496 	}
1497 	zfree(ZV_NAMEI, dmgpath);
1498 	zfree(ZV_NAMEI, dmgbasepath);
1499 	return error;
1500 }
1501 
1502 static boolean_t
bsdmgroot_bootable(void)1503 bsdmgroot_bootable(void)
1504 {
1505 #if defined(__arm64__)
1506 #define BSDMGROOT_DEFAULT true
1507 #else
1508 #define BSDMGROOT_DEFAULT false
1509 #endif
1510 
1511 	boolean_t resolved = BSDMGROOT_DEFAULT;
1512 
1513 	boolean_t boot_arg_bsdmgroot = false;
1514 	boolean_t boot_arg_nobsdmgroot = false;
1515 	int error;
1516 	mount_t mp;
1517 	boolean_t root_part_of_volume_group = false;
1518 	struct vfs_attr vfsattr;
1519 
1520 	mp = rootvnode->v_mount;
1521 	VFSATTR_INIT(&vfsattr);
1522 	VFSATTR_WANTED(&vfsattr, f_capabilities);
1523 
1524 	boot_arg_bsdmgroot = PE_parse_boot_argn("-bsdmgroot", NULL, 0);
1525 	boot_arg_nobsdmgroot = PE_parse_boot_argn("-nobsdmgroot", NULL, 0);
1526 
1527 	error = vfs_getattr(mp, &vfsattr, vfs_context_kernel());
1528 	if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1529 		if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1530 		    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1531 			root_part_of_volume_group = true;
1532 		}
1533 	}
1534 
1535 	boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1536 
1537 	// Start with the #defined default above.
1538 	// If booting to single-user mode, default to false, because single-
1539 	// user mode inside the BaseSystem is probably not what's wanted.
1540 	// If the 'yes' boot-arg is set, we'll allow that even in single-user
1541 	// mode, we'll assume you know what you're doing.
1542 	// The 'no' boot-arg overpowers the 'yes' boot-arg.
1543 	// In any case, we will not attempt to root from BaseSystem if the
1544 	// original (booter-chosen) root volume isn't in a volume group.
1545 	// This is just out of an abundance of caution: if the boot environment
1546 	// seems to be "something other than a standard install",
1547 	// we'll be conservative in messing with the root volume.
1548 
1549 	if (singleuser) {
1550 		resolved = false;
1551 	}
1552 
1553 	if (boot_arg_bsdmgroot) {
1554 		resolved = true;
1555 	}
1556 
1557 	if (boot_arg_nobsdmgroot) {
1558 		resolved = false;
1559 	}
1560 
1561 	if (!root_part_of_volume_group) {
1562 		resolved = false;
1563 	}
1564 
1565 	return resolved;
1566 }
1567 #endif // CONFIG_BASESYSTEMROOT
1568 
1569 void
bsd_exec_setup(int scale)1570 bsd_exec_setup(int scale)
1571 {
1572 	switch (scale) {
1573 	case 0:
1574 	case 1:
1575 		bsd_simul_execs = BSD_SIMUL_EXECS;
1576 		break;
1577 	case 2:
1578 	case 3:
1579 		bsd_simul_execs = 65;
1580 		break;
1581 	case 4:
1582 	case 5:
1583 		bsd_simul_execs = 129;
1584 		break;
1585 	case 6:
1586 	case 7:
1587 		bsd_simul_execs = 257;
1588 		break;
1589 	default:
1590 		bsd_simul_execs = 513;
1591 		break;
1592 	}
1593 	bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1594 }
1595 
1596 #if !CONFIG_NETBOOT
1597 int
1598 netboot_root(void);
1599 
1600 int
netboot_root(void)1601 netboot_root(void)
1602 {
1603 	return 0;
1604 }
1605 #endif
1606