1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 *
29 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
66 */
67
68 /*
69 *
70 * Mach Operating System
71 * Copyright (c) 1987 Carnegie-Mellon University
72 * All rights reserved. The CMU software License Agreement specifies
73 * the terms and conditions for use and redistribution.
74 */
75 /*
76 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/conf.h>
91 #include <sys/buf_internal.h>
92 #include <sys/user.h>
93 #include <sys/time.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96
97 #include <security/audit/audit.h>
98
99 #include <sys/malloc.h>
100 #include <sys/dkstat.h>
101 #include <sys/codesign.h>
102
103 #include <kern/startup.h>
104 #include <kern/thread.h>
105 #include <kern/task.h>
106 #include <kern/ast.h>
107 #include <kern/zalloc.h>
108 #include <kern/ux_handler.h> /* for ux_handler_setup() */
109 #include <kern/sched_hygiene.h>
110
111 #if (DEVELOPMENT || DEBUG)
112 #include <kern/debug.h>
113 #endif
114
115 #include <mach/vm_param.h>
116
117 #include <vm/vm_map.h>
118 #include <vm/vm_kern.h>
119
120 #include <sys/reboot.h>
121 #include <dev/busvar.h> /* for pseudo_inits */
122 #include <sys/kdebug.h>
123 #include <sys/monotonic.h>
124
125 #include <mach/mach_types.h>
126 #include <mach/vm_prot.h>
127 #include <mach/semaphore.h>
128 #include <mach/sync_policy.h>
129 #include <kern/clock.h>
130 #include <sys/csr.h>
131 #include <mach/kern_return.h>
132 #include <mach/thread_act.h> /* for thread_resume() */
133 #include <sys/mcache.h> /* for mcache_init() */
134 #include <sys/mbuf.h> /* for mbinit() */
135 #include <sys/event.h> /* for knote_init() */
136 #include <sys/eventhandler.h> /* for eventhandler_init() */
137 #include <sys/kern_memorystatus.h> /* for memorystatus_init() */
138 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
139 #include <sys/aio_kern.h> /* for aio_init() */
140 #include <sys/semaphore.h> /* for psem_cache_init() */
141 #include <net/dlil.h> /* for dlil_init() */
142 #include <net/iptap.h> /* for iptap_init() */
143 #include <sys/socketvar.h> /* for socketinit() */
144 #include <sys/protosw.h> /* for domaininit() */
145 #include <kern/sched_prim.h> /* for thread_wakeup() */
146 #include <net/if_ether.h> /* for ether_family_init() */
147 #include <net/if_gif.h> /* for gif_init() */
148 #include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
149 #include <vm/vm_kern.h> /* for kmem_suballoc() */
150 #include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
151 #include <netinet/flow_divert.h> /* flow_divert_init() */
152 #include <net/content_filter.h> /* for cfil_init() */
153 #include <net/necp.h> /* for necp_init() */
154 #include <net/network_agent.h> /* for netagent_init() */
155 #include <net/packet_mangler.h> /* for pkt_mnglr_init() */
156 #include <net/if_utun.h> /* for utun_register_control() */
157 #include <netinet6/ipsec.h> /* for ipsec_init() */
158 #include <net/if_redirect.h> /* for if_redirect_init() */
159 #include <net/netsrc.h> /* for netsrc_init() */
160 #include <net/ntstat.h> /* for nstat_init() */
161 #include <netinet/mptcp_var.h> /* for mptcp_control_register() */
162 #include <net/nwk_wq.h> /* for nwk_wq_init */
163 #include <net/restricted_in_port.h> /* for restricted_in_port_init() */
164 #include <net/remote_vif.h> /* for rvi_init() */
165 #include <net/kctl_test.h> /* for kctl_test_init() */
166 #include <netinet/kpi_ipfilter_var.h> /* for ipfilter_init() */
167 #include <kern/assert.h> /* for assert() */
168 #include <sys/kern_overrides.h> /* for init_system_override() */
169 #include <sys/lockf.h> /* for lf_init() */
170 #include <sys/fsctl.h>
171
172 #include <net/init.h>
173
174 #if CONFIG_MACF
175 #include <security/mac_framework.h>
176 #include <security/mac_internal.h> /* mac_init_bsd() */
177 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
178 #endif
179
180 #include <machine/exec.h>
181
182 #if CONFIG_NETBOOT
183 #include <sys/netboot.h>
184 #endif
185
186 #if CONFIG_IMAGEBOOT
187 #include <sys/imageboot.h>
188 #endif
189
190 #if PFLOG
191 #include <net/if_pflog.h>
192 #endif
193
194 #if SKYWALK
195 #include <skywalk/os_skywalk_private.h>
196 #endif /* SKYWALK */
197
198 #include <pexpert/pexpert.h>
199 #include <machine/pal_routines.h>
200 #include <console/video_console.h>
201
202 #if CONFIG_XNUPOST
203 #include <tests/xnupost.h>
204 #endif
205
206 void * get_user_regs(thread_t); /* XXX kludge for <machine/thread.h> */
207 void IOKitInitializeTime(void); /* XXX */
208 void IOSleep(unsigned int); /* XXX */
209 void IOSetImageBoot(void); /* XXX */
210 void loopattach(void); /* XXX */
211
212 void ipc_task_enable(task_t task);
213
214 const char *const copyright =
215 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
216 "The Regents of the University of California. "
217 "All rights reserved.\n\n";
218
219 /* Components of the first process -- never freed. */
220 SECURITY_READ_ONLY_LATE(struct vfs_context) vfs_context0;
221
222 static struct plimit limit0;
223 static struct pstats pstats0;
224 SECURITY_READ_ONLY_LATE(proc_t) kernproc;
225 proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
226
227 long tk_cancc;
228 long tk_nin;
229 long tk_nout;
230 long tk_rawcc;
231
232 int lock_trace = 0;
233 /* Global variables to make pstat happy. We do swapping differently */
234 int nswdev, nswap;
235 int nswapmap;
236 void *swapmap;
237 struct swdevt swdevt[1];
238
239 static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
240 LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
241 LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
242
243 dev_t rootdev; /* device of the root */
244 dev_t dumpdev; /* device to take dumps on */
245 long dumplo; /* offset into dumpdev */
246 long hostid;
247 char hostname[MAXHOSTNAMELEN];
248 char domainname[MAXDOMNAMELEN];
249 char rootdevice[DEVMAXNAMESIZE];
250
251 struct vnode *rootvp;
252 bool rootvp_is_ssd = false;
253 SECURITY_READ_ONLY_LATE(int) boothowto;
254 /*
255 * -minimalboot indicates that we want userspace to be bootstrapped to a
256 * minimal environment. What constitutes minimal is up to the bootstrap
257 * process.
258 */
259 TUNABLE(int, minimalboot, "-minimalboot", 0);
260 #if CONFIG_DARKBOOT
261 int darkboot = 0;
262 #endif
263
264 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
265 extern void IOSecureBSDRoot(const char * rootName);
266 extern kern_return_t IOKitBSDInit(void );
267 extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
268 extern void kminit(void);
269 extern void bsd_bufferinit(void);
270 extern void throttle_init(void);
271
272 vm_map_t bsd_pageable_map;
273 vm_map_t mb_map;
274
275 static int bsd_simul_execs;
276 static int bsd_pageable_map_size;
277 __private_extern__ int execargs_cache_size = 0;
278 __private_extern__ int execargs_free_count = 0;
279 __private_extern__ vm_offset_t * execargs_cache = NULL;
280
281 void bsd_exec_setup(int);
282
283 __private_extern__ int bootarg_execfailurereports = 0;
284
285 #if __x86_64__
286 __private_extern__ TUNABLE(int, bootarg_no32exec, "no32exec", 1);
287 #endif
288
289 #if DEVELOPMENT || DEBUG
290 /* Prevent kernel-based ASLR from being used. */
291 __private_extern__ TUNABLE(bool, bootarg_disable_aslr, "-disable_aslr", 0);
292 #endif
293
294 /*
295 * Allow an alternate dyld to be used for testing.
296 */
297
298 #if DEVELOPMENT || DEBUG
299 char dyld_alt_path[MAXPATHLEN];
300 int use_alt_dyld = 0;
301
302 char panic_on_proc_crash[NAME_MAX];
303 int use_panic_on_proc_crash = 0;
304
305 char panic_on_proc_exit[NAME_MAX];
306 int use_panic_on_proc_exit = 0;
307
308 char panic_on_proc_spawn_fail[NAME_MAX];
309 int use_panic_on_proc_spawn_fail = 0;
310
311 char dyld_suffix[NAME_MAX];
312 int use_dyld_suffix = 0;
313 #endif
314
315 #if DEVELOPMENT || DEBUG
316 __private_extern__ bool bootarg_hide_process_traced = 0;
317 #endif
318
319 int cmask = CMASK;
320 extern int customnbuf;
321
322 kern_return_t bsd_autoconf(void);
323 void bsd_utaskbootstrap(void);
324
325 #if CONFIG_DEV_KMEM
326 extern void dev_kmem_init(void);
327 #endif
328 static void process_name(const char *, proc_t);
329
330 static void setconf(void);
331
332 #if CONFIG_BASESYSTEMROOT
333 static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
334 static boolean_t bsdmgroot_bootable(void);
335 #endif // CONFIG_BASESYSTEMROOT
336
337 static bool bsd_rooted_ramdisk(void);
338
339 #if SYSV_SHM
340 extern void sysv_shm_lock_init(void);
341 #endif
342 #if SYSV_SEM
343 extern void sysv_sem_lock_init(void);
344 #endif
345 #if SYSV_MSG
346 extern void sysv_msg_lock_init(void);
347 #endif
348
349 #if CONFIG_MACF
350 #if defined (__i386__) || defined (__x86_64__)
351 /* MACF policy_check configuration flags; see policy_check.c for details */
352 extern int check_policy_init(int);
353 #endif
354 #endif /* CONFIG_MACF */
355
356 /* If we are using CONFIG_DTRACE */
357 #if CONFIG_DTRACE
358 extern void dtrace_postinit(void);
359 #endif
360
361 /*
362 * Initialization code.
363 * Called from cold start routine as
364 * soon as a stack and segmentation
365 * have been established.
366 * Functions:
367 * turn on clock
368 * hand craft 0th process
369 * call all initialization routines
370 * hand craft 1st user process
371 */
372
373 /*
374 * Sets the name for the given task.
375 */
376 static void
process_name(const char * s,proc_t p)377 process_name(const char *s, proc_t p)
378 {
379 strlcpy(p->p_comm, s, sizeof(p->p_comm));
380 strlcpy(p->p_name, s, sizeof(p->p_name));
381 }
382
383 /* To allow these values to be patched, they're globals here */
384 #include <machine/vmparam.h>
385 struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
386 struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
387 struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
388
389 extern struct os_refgrp rlimit_refgrp;
390
391 extern int (*mountroot)(void);
392
393 LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
394 LCK_GRP_DECLARE(proc_lck_grp, "proc");
395 LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
396 LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
397 LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
398 LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
399 LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
400 LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
401 LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
402
403
404 LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
405
406 #if XNU_TARGET_OS_OSX
407 /* hook called after root is mounted XXX temporary hack */
408 void (*mountroot_post_hook)(void);
409 void (*unmountroot_pre_hook)(void);
410 #endif
411 void set_rootvnode(vnode_t);
412
413 extern lck_rw_t rootvnode_rw_lock;
414
415 SECURITY_READ_ONLY_LATE(struct mach_vm_range) bsd_pageable_range = {};
416 KMEM_RANGE_REGISTER_DYNAMIC(bsd_pageable, &bsd_pageable_range, ^() {
417 assert(bsd_pageable_map_size != 0);
418 return (vm_map_size_t) bsd_pageable_map_size;
419 });
420
421 /* called with an iocount and usecount on new_rootvnode */
422 void
set_rootvnode(vnode_t new_rootvnode)423 set_rootvnode(vnode_t new_rootvnode)
424 {
425 mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
426 vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
427 vnode_t old_rootvnode = rootvnode;
428
429 new_rootvnode->v_flag |= VROOT;
430 rootvp = new_devvp;
431 rootvnode = new_rootvnode;
432 kernproc->p_fd.fd_cdir = new_rootvnode;
433 if (new_devvp != NULL) {
434 rootdev = vnode_specrdev(new_devvp);
435 } else if (new_mount != NULL) {
436 rootdev = vfs_statfs(new_mount)->f_fsid.val[0]; /* like ATTR_CMN_DEVID */
437 } else {
438 rootdev = NODEV;
439 }
440
441 if (old_rootvnode) {
442 vnode_rele(old_rootvnode);
443 }
444 }
445
446 #define RAMDEV "md0"
447
448 bool
bsd_rooted_ramdisk(void)449 bsd_rooted_ramdisk(void)
450 {
451 bool is_ramdisk = false;
452 char *dev_path = zalloc(ZV_NAMEI);
453 if (dev_path == NULL) {
454 panic("failed to allocate devpath string!");
455 }
456
457 if (PE_parse_boot_argn("rd", dev_path, MAXPATHLEN)) {
458 if (strncmp(dev_path, RAMDEV, strlen(RAMDEV)) == 0) {
459 is_ramdisk = true;
460 }
461 }
462
463 zfree(ZV_NAMEI, dev_path);
464 return is_ramdisk;
465 }
466
467 /*
468 * This function is called very early on in the Mach startup, from the
469 * function start_kernel_threads() in osfmk/kern/startup.c. It's called
470 * in the context of the current (startup) task using a call to the
471 * function kernel_thread_create() to jump into start_kernel_threads().
472 * Internally, kernel_thread_create() calls thread_create_internal(),
473 * which calls uthread_init(). The function of uthread_init() is
474 * normally to init a uthread structure, and fill out the uu_sigmask,
475 * tro_ucred/tro_proc fields. It skips filling these out in the case of the "task"
476 * being "kernel_task", because the order of operation is inverted. To
477 * account for that, we need to manually fill in at least the contents
478 * of the tro_ucred field so that the uthread structure can be
479 * used like any other.
480 */
481 void
bsd_init(void)482 bsd_init(void)
483 {
484 struct uthread *ut;
485 vnode_t init_rootvnode = NULLVP;
486 struct proc_ro_data kernproc_ro_data = {
487 .p_csflags = CS_VALID,
488 };
489 struct task_ro_data kerntask_ro_data = { };
490 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
491 boolean_t netboot = FALSE;
492 #endif
493
494 #if (DEVELOPMENT || DEBUG)
495 platform_stall_panic_or_spin(PLATFORM_STALL_XNU_LOCATION_BSD_INIT);
496 #endif
497
498 #define DEBUG_BSDINIT 0
499
500 #if DEBUG_BSDINIT
501 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
502 #else
503 #define bsd_init_kprintf(x, ...)
504 #endif
505
506 throttle_init();
507
508 printf(copyright);
509
510 #if CONFIG_DEV_KMEM
511 bsd_init_kprintf("calling dev_kmem_init\n");
512 dev_kmem_init();
513 #endif
514
515 /* Initialize kauth subsystem before instancing the first credential */
516 bsd_init_kprintf("calling kauth_init\n");
517 kauth_init();
518
519 /* kernel_task->proc = kernproc; */
520 set_bsdtask_info(kernel_task, (void *)kernproc);
521
522 /* Set the parent of kernproc to itself */
523 kernproc->p_pptr = kernproc;
524
525 /* Set the state to SRUN */
526 kernproc->p_stat = SRUN;
527
528 /* Set the proc flags */
529 #if defined(__LP64__)
530 kernproc->p_flag = P_SYSTEM | P_LP64;
531 #else
532 kernproc->p_flag = P_SYSTEM;
533 #endif
534
535 kernproc->p_nice = NZERO;
536 TAILQ_INIT(&kernproc->p_uthlist);
537
538 /* set the cred */
539 kauth_cred_set(&kernproc_ro_data.p_ucred.__smr_ptr, vfs_context0.vc_ucred);
540 kernproc->p_proc_ro = proc_ro_alloc(kernproc, &kernproc_ro_data,
541 kernel_task, &kerntask_ro_data);
542
543 /* give kernproc a name */
544 bsd_init_kprintf("calling process_name\n");
545 process_name("kernel_task", kernproc);
546
547 /* Allocate proc lock attribute */
548
549 lck_mtx_init(&kernproc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
550 lck_mtx_init(&kernproc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
551 #if CONFIG_AUDIT
552 lck_mtx_init(&kernproc->p_audit_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
553 #endif /* CONFIG_AUDIT */
554 lck_spin_init(&kernproc->p_slock, &proc_slock_grp, &proc_lck_attr);
555
556 /* Init the file descriptor table. */
557 fdt_init(kernproc);
558 kernproc->p_fd.fd_cmask = (mode_t)cmask;
559
560 assert(bsd_simul_execs != 0);
561 execargs_cache_size = bsd_simul_execs;
562 execargs_free_count = bsd_simul_execs;
563 execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
564 ZALIGN(vm_offset_t));
565
566 if (current_task() != kernel_task) {
567 printf("bsd_init: We have a problem, "
568 "current task is not kernel task\n");
569 }
570
571 bsd_init_kprintf("calling get_bsdthread_info\n");
572 ut = current_uthread();
573
574 #if CONFIG_MACF
575 /*
576 * Initialize the MAC Framework
577 */
578 mac_policy_initbsd();
579
580 #if defined (__i386__) || defined (__x86_64__)
581 /*
582 * We currently only support this on i386/x86_64, as that is the
583 * only lock code we have instrumented so far.
584 */
585 int policy_check_flags;
586 PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
587 check_policy_init(policy_check_flags);
588 #endif
589 #endif /* MAC */
590
591 /*
592 * Make a session and group
593 *
594 * No need to hold the pgrp lock,
595 * there are no other BSD threads yet.
596 */
597 struct session *session0 = session_alloc(kernproc);
598 struct pgrp *pgrp0 = pgrp_alloc(0, PGRP_REF_NONE);
599 session0->s_ttypgrpid = 0;
600 pgrp0->pg_session = session0;
601
602 /*
603 * Create process 0.
604 */
605 proc_list_lock();
606 os_ref_init_mask(&kernproc->p_refcount, P_REF_BITS, &p_refgrp, P_REF_NONE);
607 os_ref_init_raw(&kernproc->p_waitref, &p_refgrp);
608 proc_ref_hold_proc_task_struct(kernproc);
609
610 /*
611 * Make a group and session, then simulate pinsertchild(),
612 * adjusted for the kernel.
613 */
614 pghash_insert_locked(pgrp0);
615
616 LIST_INSERT_HEAD(&pgrp0->pg_members, kernproc, p_pglist);
617 smr_init_store(&kernproc->p_pgrp, pgrp0);
618 LIST_INSERT_HEAD(&allproc, kernproc, p_list);
619
620 LIST_INSERT_HEAD(SESSHASH(0), session0, s_hash);
621 proc_list_unlock();
622
623 proc_set_task(kernproc, kernel_task);
624
625 #if DEVELOPMENT || DEBUG
626 if (bootarg_disable_aslr) {
627 kernproc->p_flag |= P_DISABLE_ASLR;
628 }
629 #endif
630
631 TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
632
633 /*
634 * Officially associate the kernel with vfs_context0.vc_ucred.
635 */
636 #if CONFIG_MACF
637 mac_cred_label_associate_kernel(vfs_context0.vc_ucred);
638 #endif
639 proc_update_creds_onproc(kernproc, vfs_context0.vc_ucred);
640
641 TAILQ_INIT(&kernproc->p_aio_activeq);
642 TAILQ_INIT(&kernproc->p_aio_doneq);
643 kernproc->p_aio_total_count = 0;
644
645 /* Create the limits structures. */
646 for (uint32_t i = 0; i < ARRAY_COUNT(limit0.pl_rlimit); i++) {
647 limit0.pl_rlimit[i].rlim_cur =
648 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
649 }
650 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
651 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
652 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
653 limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
654 limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
655 limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
656 os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
657
658 smr_init_store(&kernproc->p_limit, &limit0);
659 kernproc->p_stats = &pstats0;
660 kernproc->p_subsystem_root_path = NULL;
661
662 /*
663 * Charge root for one process: launchd.
664 */
665 bsd_init_kprintf("calling chgproccnt\n");
666 (void)chgproccnt(0, 1);
667
668 /*
669 * Allocate a kernel submap for pageable memory
670 * for temporary copying (execve()).
671 */
672 bsd_init_kprintf("calling kmem_suballoc\n");
673 bsd_pageable_map = kmem_suballoc(kernel_map,
674 &bsd_pageable_range.min_address,
675 (vm_size_t)bsd_pageable_map_size,
676 VM_MAP_CREATE_PAGEABLE,
677 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
678 KMS_PERMANENT | KMS_NOFAIL,
679 VM_KERN_MEMORY_BSD).kmr_submap;
680
681 /*
682 * Initialize buffers and hash links for buffers
683 *
684 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
685 * happen after a credential has been associated with
686 * the kernel task.
687 */
688 bsd_init_kprintf("calling bsd_bufferinit\n");
689 bsd_bufferinit();
690
691 /*
692 * Initialize the calendar.
693 */
694 bsd_init_kprintf("calling IOKitInitializeTime\n");
695 IOKitInitializeTime();
696
697 /* Initialize the file systems. */
698 bsd_init_kprintf("calling vfsinit\n");
699 vfsinit();
700
701 #if CONFIG_PROC_UUID_POLICY
702 /* Initial proc_uuid_policy subsystem */
703 bsd_init_kprintf("calling proc_uuid_policy_init()\n");
704 proc_uuid_policy_init();
705 #endif
706
707 #if SOCKETS
708 #if CONFIG_MBUF_MCACHE
709 /* Initialize per-CPU cache allocator */
710 mcache_init();
711 #endif /* CONFIG_MBUF_MCACHE */
712
713 /* Initialize mbuf's. */
714 bsd_init_kprintf("calling mbinit\n");
715 mbinit();
716 restricted_in_port_init();
717 #endif /* SOCKETS */
718
719 /*
720 * Initializes security event auditing.
721 * XXX: Should/could this occur later?
722 */
723 #if CONFIG_AUDIT
724 bsd_init_kprintf("calling audit_init\n");
725 audit_init();
726 #endif
727
728 /* Initialize kqueues */
729 bsd_init_kprintf("calling knote_init\n");
730 knote_init();
731
732 /* Initialize event handler */
733 bsd_init_kprintf("calling eventhandler_init\n");
734 eventhandler_init();
735
736 /* Initialize for async IO */
737 bsd_init_kprintf("calling aio_init\n");
738 aio_init();
739
740 pthread_init();
741 /* POSIX Shm and Sem */
742 bsd_init_kprintf("calling pshm_cache_init\n");
743 pshm_cache_init();
744 bsd_init_kprintf("calling psem_cache_init\n");
745 psem_cache_init();
746
747 /*
748 * Initialize protocols. Block reception of incoming packets
749 * until everything is ready.
750 */
751 #if NETWORKING
752 bsd_init_kprintf("calling nwk_wq_init\n");
753 nwk_wq_init();
754 bsd_init_kprintf("calling dlil_init\n");
755 dlil_init();
756 #endif /* NETWORKING */
757 #if SOCKETS
758 bsd_init_kprintf("calling socketinit\n");
759 socketinit();
760 bsd_init_kprintf("calling domaininit\n");
761 domaininit();
762 iptap_init();
763 #if FLOW_DIVERT
764 flow_divert_init();
765 #endif /* FLOW_DIVERT */
766 #endif /* SOCKETS */
767 #if SKYWALK
768 bsd_init_kprintf("calling skywalk_init\n");
769 (void) skywalk_init();
770 #endif /* SKYWALK */
771 #if NETWORKING
772 #if NECP
773 /* Initialize Network Extension Control Policies */
774 necp_init();
775 #endif
776 netagent_init();
777 #endif /* NETWORKING */
778
779 #if CONFIG_FREEZE
780 #ifndef CONFIG_MEMORYSTATUS
781 #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
782 #endif
783 /* Initialise background freezing */
784 bsd_init_kprintf("calling memorystatus_freeze_init\n");
785 memorystatus_freeze_init();
786 #endif
787
788 #if CONFIG_MEMORYSTATUS
789 /* Initialize kernel memory status notifications */
790 bsd_init_kprintf("calling memorystatus_init\n");
791 memorystatus_init();
792 #endif /* CONFIG_MEMORYSTATUS */
793
794 bsd_init_kprintf("calling sysctl_mib_init\n");
795 sysctl_mib_init();
796
797 bsd_init_kprintf("calling bsd_autoconf\n");
798 bsd_autoconf();
799
800 #if CONFIG_DTRACE
801 dtrace_postinit();
802 #endif
803
804 /*
805 * We attach the loopback interface *way* down here to ensure
806 * it happens after autoconf(), otherwise it becomes the
807 * "primary" interface.
808 */
809 #include <loop.h>
810 #if NLOOP > 0
811 bsd_init_kprintf("calling loopattach\n");
812 loopattach(); /* XXX */
813 #endif
814 #if NGIF
815 /* Initialize gif interface (after lo0) */
816 gif_init();
817 #endif
818
819 #if PFLOG
820 /* Initialize packet filter log interface */
821 pfloginit();
822 #endif /* PFLOG */
823
824 #if NETHER > 0
825 /* Register the built-in dlil ethernet interface family */
826 bsd_init_kprintf("calling ether_family_init\n");
827 ether_family_init();
828 #endif /* ETHER */
829
830 #if NETWORKING
831 #if CONTENT_FILTER
832 cfil_init();
833 #endif
834
835 #if PACKET_MANGLER
836 pkt_mnglr_init();
837 #endif
838
839 /*
840 * Register subsystems with kernel control handlers
841 */
842 utun_register_control();
843 #if IPSEC
844 ipsec_init();
845 #endif /* IPSEC */
846 netsrc_init();
847 nstat_init();
848 #if MPTCP
849 mptcp_control_register();
850 #endif /* MPTCP */
851
852 #if REMOTE_VIF
853 rvi_init();
854 #endif /* REMOTE_VIF */
855
856 #if IF_REDIRECT
857 if_redirect_init();
858 #endif /* REDIRECT */
859
860 #if KCTL_TEST
861 kctl_test_init();
862 #endif /* KCTL_TEST */
863
864 /*
865 * The the networking stack is now initialized so it is a good time to call
866 * the clients that are waiting for the networking stack to be usable.
867 */
868 bsd_init_kprintf("calling net_init_run\n");
869 net_init_run();
870 #endif /* NETWORKING */
871
872 bsd_init_kprintf("calling inittodr\n");
873 inittodr(0);
874
875 /* Mount the root file system. */
876 while (TRUE) {
877 int err;
878
879 bsd_init_kprintf("calling setconf\n");
880 setconf();
881 #if CONFIG_NETBOOT
882 netboot = (mountroot == netboot_mountroot);
883 #endif
884
885 bsd_init_kprintf("vfs_mountroot\n");
886 if (0 == (err = vfs_mountroot())) {
887 break;
888 }
889 rootdevice[0] = '\0';
890 #if CONFIG_NETBOOT
891 if (netboot) {
892 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
893 vc_progress_set(FALSE, 0);
894 for (uint32_t i = 1; 1; i *= 2) {
895 printf("bsd_init: failed to mount network root, error %d, %s\n",
896 err, PE_boot_args());
897 printf("We are hanging here...\n");
898 IOSleep(i * 60 * 1000);
899 }
900 /*NOTREACHED*/
901 }
902 #endif
903 printf("cannot mount root, errno = %d\n", err);
904 }
905
906 IOSecureBSDRoot(rootdevice);
907
908 mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
909
910 bsd_init_kprintf("calling VFS_ROOT\n");
911 /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
912 if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, vfs_context_kernel())) {
913 panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
914 }
915 (void)vnode_ref(init_rootvnode);
916 (void)vnode_put(init_rootvnode);
917
918 lck_rw_lock_exclusive(&rootvnode_rw_lock);
919 set_rootvnode(init_rootvnode);
920 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
921 init_rootvnode = NULLVP; /* use rootvnode after this point */
922
923
924 if (!bsd_rooted_ramdisk()) {
925 boolean_t require_rootauth = FALSE;
926
927 #if XNU_TARGET_OS_OSX && defined(__arm64__)
928 #if CONFIG_IMAGEBOOT
929 /* Apple Silicon MacOS */
930 require_rootauth = !imageboot_desired();
931 #endif // CONFIG_IMAGEBOOT
932 #elif !XNU_TARGET_OS_OSX
933 /* Non MacOS */
934 require_rootauth = TRUE;
935 #endif // XNU_TARGET_OS_OSX && defined(__arm64__)
936
937 if (require_rootauth) {
938 /* enforce sealedness */
939 int autherr = VNOP_IOCTL(rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
940 if (autherr) {
941 panic("rootvp not authenticated after mounting");
942 }
943 }
944 }
945
946
947 #if CONFIG_NETBOOT
948 if (netboot) {
949 int err;
950
951 netboot = TRUE;
952 /* post mount setup */
953 if ((err = netboot_setup()) != 0) {
954 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
955 vc_progress_set(FALSE, 0);
956 for (uint32_t i = 1; 1; i *= 2) {
957 printf("bsd_init: NetBoot could not find root, error %d: %s\n",
958 err, PE_boot_args());
959 printf("We are hanging here...\n");
960 IOSleep(i * 60 * 1000);
961 }
962 /*NOTREACHED*/
963 }
964 }
965 #endif
966
967
968 #if CONFIG_IMAGEBOOT
969 /*
970 * See if a system disk image is present. If so, mount it and
971 * switch the root vnode to point to it
972 */
973 imageboot_type_t imageboot_type = imageboot_needed();
974 if (netboot == FALSE && imageboot_type) {
975 /*
976 * An image was found. No turning back: we're booted
977 * with a kernel from the disk image.
978 */
979 bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
980 imageboot_setup(imageboot_type);
981 IOSetImageBoot();
982 }
983
984 #endif /* CONFIG_IMAGEBOOT */
985
986 /* set initial time; all other resource data is already zero'ed */
987 microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);
988
989 #if DEVFS
990 {
991 char mounthere[] = "/dev"; /* !const because of internal casting */
992
993 bsd_init_kprintf("calling devfs_kernel_mount\n");
994 devfs_kernel_mount(mounthere);
995 }
996 #endif /* DEVFS */
997
998 #if CONFIG_BASESYSTEMROOT
999 #if CONFIG_IMAGEBOOT
1000 if (bsdmgroot_bootable()) {
1001 int error;
1002 bool rooted_dmg = false;
1003 bool skip_signature_check = false;
1004
1005 printf("trying to find and mount BaseSystem dmg as root volume\n");
1006 #if DEVELOPMENT || DEBUG
1007 printf("(set boot-arg -nobsdmgroot to avoid this)\n");
1008 #endif // DEVELOPMENT || DEBUG
1009
1010 char *dmgpath = NULL;
1011 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK | Z_NOFAIL);
1012
1013 error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg, &skip_signature_check);
1014 if (error) {
1015 bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
1016 } else {
1017 PE_parse_boot_argn("bsdmgpath", dmgpath, sizeof(dmgpath));
1018
1019 bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
1020
1021 error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg, skip_signature_check);
1022 if (error) {
1023 bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
1024 } else {
1025 IOSetImageBoot();
1026 }
1027 }
1028 zfree(ZV_NAMEI, dmgpath);
1029 }
1030 #else /* CONFIG_IMAGEBOOT */
1031 #error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
1032 #endif /* CONFIG_IMAGEBOOT */
1033 #endif /* CONFIG_BASESYSTEMROOT */
1034
1035 /* Initialize signal state for process 0. */
1036 bsd_init_kprintf("calling siginit\n");
1037 siginit(kernproc);
1038
1039 bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1040 bsd_utaskbootstrap();
1041
1042 pal_kernel_announce();
1043
1044 bsd_init_kprintf("calling mountroot_post_hook\n");
1045
1046 #if XNU_TARGET_OS_OSX
1047 /* invoke post-root-mount hook */
1048 if (mountroot_post_hook != NULL) {
1049 mountroot_post_hook();
1050 }
1051 #endif
1052
1053 #if 0 /* not yet */
1054 consider_zone_gc(FALSE);
1055 #endif
1056
1057 #if DEVELOPMENT || DEBUG
1058 /*
1059 * At this point, we consider the kernel "booted" enough to apply
1060 * stricter timeouts. Only used for debug timeouts.
1061 */
1062 machine_timeout_bsd_init();
1063 #endif /* DEVELOPMENT || DEBUG */
1064
1065 bsd_init_kprintf("done\n");
1066 }
1067
1068 void
bsdinit_task(void)1069 bsdinit_task(void)
1070 {
1071 proc_t p = current_proc();
1072
1073 process_name("init", p);
1074
1075 /* Set up exception-to-signal reflection */
1076 ux_handler_setup();
1077
1078 #if CONFIG_MACF
1079 mac_cred_label_associate_user(proc_ucred_unsafe(p)); /* in init */
1080 #endif
1081
1082 vm_init_before_launchd();
1083
1084 #if CONFIG_XNUPOST
1085 int result = bsd_list_tests();
1086 result = bsd_do_post();
1087 if (result != 0) {
1088 panic("bsd_do_post: Tests failed with result = 0x%08x", result);
1089 }
1090 #endif
1091
1092 bsd_init_kprintf("bsd_do_post - done");
1093
1094 load_init_program(p);
1095 lock_trace = 1;
1096 }
1097
1098 kern_return_t
bsd_autoconf(void)1099 bsd_autoconf(void)
1100 {
1101 kprintf("bsd_autoconf: calling kminit\n");
1102 kminit();
1103
1104 /*
1105 * Early startup for bsd pseudodevices.
1106 */
1107 {
1108 struct pseudo_init *pi;
1109
1110 for (pi = pseudo_inits; pi->ps_func; pi++) {
1111 (*pi->ps_func)(pi->ps_count);
1112 }
1113 }
1114
1115 return IOKitBSDInit();
1116 }
1117
1118
1119 #include <sys/disklabel.h> /* for MAXPARTITIONS */
1120
1121 static void
setconf(void)1122 setconf(void)
1123 {
1124 u_int32_t flags;
1125 kern_return_t err;
1126
1127 err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1128 if (err) {
1129 printf("setconf: IOFindBSDRoot returned an error (%d);"
1130 "setting rootdevice to 'sd0a'.\n", err); /* XXX DEBUG TEMP */
1131 rootdev = makedev( 6, 0 );
1132 strlcpy(rootdevice, "sd0a", sizeof(rootdevice));
1133 flags = 0;
1134 }
1135
1136 #if CONFIG_NETBOOT
1137 if (flags & 1) {
1138 /* network device */
1139 mountroot = netboot_mountroot;
1140 } else {
1141 #endif
1142 /* otherwise have vfs determine root filesystem */
1143 mountroot = NULL;
1144 #if CONFIG_NETBOOT
1145 }
1146 #endif
1147 }
1148
1149 /*
1150 * Boot into the flavor of Recovery dictated by `mode`.
1151 */
1152 boolean_t
bsd_boot_to_recovery(bsd_bootfail_mode_t mode,uuid_t volume_uuid,boolean_t reboot)1153 bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1154 {
1155 return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1156 }
1157
1158 void
bsd_utaskbootstrap(void)1159 bsd_utaskbootstrap(void)
1160 {
1161 thread_t thread;
1162 struct uthread *ut;
1163
1164 /*
1165 * Clone the bootstrap process from the kernel process, without
1166 * inheriting either task characteristics or memory from the kernel;
1167 */
1168 thread = cloneproc(TASK_NULL, NULL, kernproc, CLONEPROC_FLAGS_MEMSTAT_INTERNAL);
1169
1170 /* Hold the reference as it will be dropped during shutdown */
1171 initproc = proc_find(1);
1172 #if __PROC_INTERNAL_DEBUG
1173 if (initproc == PROC_NULL) {
1174 panic("bsd_utaskbootstrap: initproc not set");
1175 }
1176 #endif
1177
1178 zalloc_first_proc_made();
1179
1180 /*
1181 * Since we aren't going back out the normal way to our parent,
1182 * we have to drop the transition locks explicitly.
1183 */
1184 proc_signalend(initproc, 0);
1185 proc_transend(initproc, 0);
1186
1187 ut = (struct uthread *)get_bsdthread_info(thread);
1188 ut->uu_sigmask = 0;
1189 act_set_astbsd(thread);
1190
1191 ipc_task_enable(get_threadtask(thread));
1192
1193 task_clear_return_wait(get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
1194 }
1195
1196 static void
parse_bsd_args(void)1197 parse_bsd_args(void)
1198 {
1199 char namep[48];
1200
1201 if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
1202 boothowto |= RB_SINGLE;
1203 }
1204
1205 if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
1206 boothowto |= RB_SAFEBOOT;
1207 }
1208
1209 if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
1210 sizeof(max_nbuf_headers))) {
1211 customnbuf = 1;
1212 }
1213
1214 #if CONFIG_DARKBOOT
1215 /*
1216 * The darkboot flag is specified by the bootloader and is stored in
1217 * boot_args->bootFlags. This flag is available starting revision 2.
1218 */
1219 boot_args *args = (boot_args *) PE_state.bootArgs;
1220 if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1221 darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1222 } else {
1223 darkboot = 0;
1224 }
1225 #endif
1226
1227 #if DEVELOPMENT || DEBUG
1228 if (PE_parse_boot_argn("dyldsuffix", dyld_suffix, sizeof(dyld_suffix))) {
1229 if (strlen(dyld_suffix) > 0) {
1230 use_dyld_suffix = 1;
1231 }
1232 }
1233
1234 if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1235 if (strlen(dyld_alt_path) > 0) {
1236 use_alt_dyld = 1;
1237 }
1238 }
1239
1240 if (PE_parse_boot_arg_str("panic-on-proc-crash", panic_on_proc_crash, sizeof(panic_on_proc_crash))) {
1241 if (strlen(panic_on_proc_crash) > 0) {
1242 use_panic_on_proc_crash = 1;
1243 }
1244 }
1245
1246 if (PE_parse_boot_arg_str("panic-on-proc-exit", panic_on_proc_exit, sizeof(panic_on_proc_exit))) {
1247 if (strlen(panic_on_proc_exit) > 0) {
1248 use_panic_on_proc_exit = 1;
1249 }
1250 }
1251
1252 if (PE_parse_boot_arg_str("panic-on-proc-spawn-fail", panic_on_proc_spawn_fail, sizeof(panic_on_proc_spawn_fail))) {
1253 if (strlen(panic_on_proc_spawn_fail) > 0) {
1254 use_panic_on_proc_spawn_fail = 1;
1255 }
1256 }
1257
1258 if (PE_i_can_has_debugger(NULL) && PE_parse_boot_argn("-hide_process_traced", namep, sizeof(namep))) {
1259 bootarg_hide_process_traced = 1;
1260 }
1261 #endif /* DEVELOPMENT || DEBUG */
1262 }
1263 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, parse_bsd_args);
1264
1265 #if CONFIG_BASESYSTEMROOT
1266
1267 extern bool IOGetBootUUID(char *);
1268 extern bool IOGetApfsPrebootUUID(char *);
1269
1270
1271 // This function returns the UUID of the Preboot (and Recovery) folder associated with the
1272 // current boot volume, if applicable. The meaning of the UUID can be
1273 // filesystem-dependent and not all kinds of boots will have a UUID.
1274 // On success, the UUID is copied into the past-in parameter and TRUE is returned.
1275 // In case the current boot has no applicable Preboot UUID, FALSE is returned.
1276 static bool
get_preboot_uuid(uuid_string_t maybe_uuid_string)1277 get_preboot_uuid(uuid_string_t maybe_uuid_string)
1278 {
1279 // try IOGetApfsPrebootUUID
1280 if (IOGetApfsPrebootUUID(maybe_uuid_string)) {
1281 uuid_t maybe_uuid;
1282 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1283 if (error == 0) {
1284 return true;
1285 }
1286 }
1287
1288 // try IOGetBootUUID
1289 if (IOGetBootUUID(maybe_uuid_string)) {
1290 uuid_t maybe_uuid;
1291 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1292 if (error == 0) {
1293 return true;
1294 }
1295 }
1296
1297 // didn't find it
1298 return false;
1299 }
1300
1301 #if defined(__arm64__)
1302 extern bool IOGetBootObjectsPath(char *);
1303 #endif
1304
1305 // Find the BaseSystem.dmg to be used as the initial root volume during certain
1306 // kinds of boots.
1307 // This may mount volumes and lookup vnodes.
1308 // The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1309 // If it returns 0 (no error), then it also writes the absolute path to the
1310 // BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1311 static
1312 int
bsd_find_basesystem_dmg(char * bsdmgpath_out,bool * rooted_dmg,bool * skip_signature_check)1313 bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
1314 {
1315 int error;
1316 size_t len;
1317 char *dmgbasepath;
1318 char *dmgpath;
1319 bool allow_rooted_dmg = false;
1320
1321 dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1322 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1323 vnode_t imagevp = NULLVP;
1324
1325 #if DEVELOPMENT || DEBUG
1326 allow_rooted_dmg = true;
1327 #endif
1328
1329 //must provide output bool
1330 if (rooted_dmg && skip_signature_check) {
1331 *rooted_dmg = false;
1332 *skip_signature_check = false;
1333 } else {
1334 error = EINVAL;
1335 goto done;
1336 }
1337
1338 error = vfs_mount_recovery();
1339 if (error) {
1340 goto done;
1341 }
1342
1343 len = strlcpy(dmgbasepath, "/System/Volumes/Recovery/", MAXPATHLEN);
1344 if (len > MAXPATHLEN) {
1345 error = ENAMETOOLONG;
1346 goto done;
1347 }
1348
1349 if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
1350 *skip_signature_check = true;
1351 allow_rooted_dmg = true;
1352 }
1353
1354 #if defined(__arm64__)
1355 char boot_obj_path[MAXPATHLEN] = "";
1356
1357 if (IOGetBootObjectsPath(boot_obj_path)) {
1358 if (boot_obj_path[0] == '/') {
1359 dmgbasepath[len - 1] = '\0';
1360 }
1361
1362 len = strlcat(dmgbasepath, boot_obj_path, MAXPATHLEN);
1363 if (len > MAXPATHLEN) {
1364 error = ENAMETOOLONG;
1365 goto done;
1366 }
1367
1368 len = strlcat(dmgbasepath, "/usr/standalone/firmware/", MAXPATHLEN);
1369 if (len > MAXPATHLEN) {
1370 error = ENAMETOOLONG;
1371 goto done;
1372 }
1373
1374 if (allow_rooted_dmg) {
1375 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1376 if (len > MAXPATHLEN) {
1377 error = ENAMETOOLONG;
1378 goto done;
1379 }
1380
1381 len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1382 if (len > MAXPATHLEN) {
1383 error = ENAMETOOLONG;
1384 goto done;
1385 }
1386
1387 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1388 if (error == 0) {
1389 *rooted_dmg = true;
1390 *skip_signature_check = true;
1391 goto done;
1392 }
1393 memset(dmgpath, 0, MAXPATHLEN);
1394 }
1395
1396 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1397 if (len > MAXPATHLEN) {
1398 error = ENAMETOOLONG;
1399 goto done;
1400 }
1401
1402 len = strlcat(dmgpath, "arm64eBaseSystem.dmg", MAXPATHLEN);
1403 if (len > MAXPATHLEN) {
1404 error = ENAMETOOLONG;
1405 goto done;
1406 }
1407
1408 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1409 if (error == 0) {
1410 goto done;
1411 }
1412 memset(dmgpath, 0, MAXPATHLEN);
1413 dmgbasepath[strlen("/System/Volumes/Recovery/")] = '\0';
1414 }
1415 #endif // __arm64__
1416
1417 uuid_string_t preboot_uuid;
1418 if (!get_preboot_uuid(preboot_uuid)) {
1419 // no preboot? bail out
1420 return EINVAL;
1421 }
1422
1423 len = strlcat(dmgbasepath, preboot_uuid, MAXPATHLEN);
1424 if (len > MAXPATHLEN) {
1425 error = ENAMETOOLONG;
1426 goto done;
1427 }
1428
1429 if (allow_rooted_dmg) {
1430 // Try BaseSystem.rooted.dmg
1431 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1432 if (len > MAXPATHLEN) {
1433 error = ENAMETOOLONG;
1434 goto done;
1435 }
1436
1437 len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
1438 if (len > MAXPATHLEN) {
1439 error = ENAMETOOLONG;
1440 goto done;
1441 }
1442
1443 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1444 if (error == 0) {
1445 // we found it! success!
1446 *rooted_dmg = true;
1447 *skip_signature_check = true;
1448 goto done;
1449 }
1450 }
1451
1452 // Try BaseSystem.dmg
1453 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1454 if (len > MAXPATHLEN) {
1455 error = ENAMETOOLONG;
1456 goto done;
1457 }
1458
1459 len = strlcat(dmgpath, "/BaseSystem.dmg", MAXPATHLEN);
1460 if (len > MAXPATHLEN) {
1461 error = ENAMETOOLONG;
1462 goto done;
1463 }
1464
1465 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1466 if (error == 0) {
1467 // success!
1468 goto done;
1469 }
1470
1471 done:
1472 if (error == 0) {
1473 strlcpy(bsdmgpath_out, dmgpath, MAXPATHLEN);
1474 } else {
1475 bsd_init_kprintf("%s: error %d\n", __func__, error);
1476 }
1477 if (imagevp != NULLVP) {
1478 vnode_put(imagevp);
1479 }
1480 zfree(ZV_NAMEI, dmgpath);
1481 zfree(ZV_NAMEI, dmgbasepath);
1482 return error;
1483 }
1484
1485 static boolean_t
bsdmgroot_bootable(void)1486 bsdmgroot_bootable(void)
1487 {
1488 #if defined(__arm64__)
1489 #define BSDMGROOT_DEFAULT true
1490 #else
1491 #define BSDMGROOT_DEFAULT false
1492 #endif
1493
1494 boolean_t resolved = BSDMGROOT_DEFAULT;
1495
1496 boolean_t boot_arg_bsdmgroot = false;
1497 boolean_t boot_arg_nobsdmgroot = false;
1498 int error;
1499 mount_t mp;
1500 boolean_t root_part_of_volume_group = false;
1501 struct vfs_attr vfsattr;
1502
1503 mp = rootvnode->v_mount;
1504 VFSATTR_INIT(&vfsattr);
1505 VFSATTR_WANTED(&vfsattr, f_capabilities);
1506
1507 boot_arg_bsdmgroot = PE_parse_boot_argn("-bsdmgroot", NULL, 0);
1508 boot_arg_nobsdmgroot = PE_parse_boot_argn("-nobsdmgroot", NULL, 0);
1509
1510 error = vfs_getattr(mp, &vfsattr, vfs_context_kernel());
1511 if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1512 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1513 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1514 root_part_of_volume_group = true;
1515 }
1516 }
1517
1518 boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1519
1520 // Start with the #defined default above.
1521 // If booting to single-user mode, default to false, because single-
1522 // user mode inside the BaseSystem is probably not what's wanted.
1523 // If the 'yes' boot-arg is set, we'll allow that even in single-user
1524 // mode, we'll assume you know what you're doing.
1525 // The 'no' boot-arg overpowers the 'yes' boot-arg.
1526 // In any case, we will not attempt to root from BaseSystem if the
1527 // original (booter-chosen) root volume isn't in a volume group.
1528 // This is just out of an abundance of caution: if the boot environment
1529 // seems to be "something other than a standard install",
1530 // we'll be conservative in messing with the root volume.
1531
1532 if (singleuser) {
1533 resolved = false;
1534 }
1535
1536 if (boot_arg_bsdmgroot) {
1537 resolved = true;
1538 }
1539
1540 if (boot_arg_nobsdmgroot) {
1541 resolved = false;
1542 }
1543
1544 if (!root_part_of_volume_group) {
1545 resolved = false;
1546 }
1547
1548 return resolved;
1549 }
1550 #endif // CONFIG_BASESYSTEMROOT
1551
1552 void
bsd_exec_setup(int scale)1553 bsd_exec_setup(int scale)
1554 {
1555 switch (scale) {
1556 case 0:
1557 case 1:
1558 bsd_simul_execs = BSD_SIMUL_EXECS;
1559 break;
1560 case 2:
1561 case 3:
1562 bsd_simul_execs = 65;
1563 break;
1564 case 4:
1565 case 5:
1566 bsd_simul_execs = 129;
1567 break;
1568 case 6:
1569 case 7:
1570 bsd_simul_execs = 257;
1571 break;
1572 default:
1573 bsd_simul_execs = 513;
1574 break;
1575 }
1576 bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1577 }
1578
1579 #if !CONFIG_NETBOOT
1580 int
1581 netboot_root(void);
1582
1583 int
netboot_root(void)1584 netboot_root(void)
1585 {
1586 return 0;
1587 }
1588 #endif
1589