1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 *
29 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
66 */
67
68 /*
69 *
70 * Mach Operating System
71 * Copyright (c) 1987 Carnegie-Mellon University
72 * All rights reserved. The CMU software License Agreement specifies
73 * the terms and conditions for use and redistribution.
74 */
75 /*
76 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/conf.h>
91 #include <sys/buf_internal.h>
92 #include <sys/user.h>
93 #include <sys/time.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96 #include <sys/kasl.h>
97
98 #include <security/audit/audit.h>
99
100 #include <sys/malloc.h>
101 #include <sys/dkstat.h>
102 #include <sys/codesign.h>
103
104 #include <kern/startup.h>
105 #include <kern/thread.h>
106 #include <kern/task.h>
107 #include <kern/ast.h>
108 #include <kern/zalloc.h>
109 #include <kern/ux_handler.h> /* for ux_handler_setup() */
110 #include <kern/sched_hygiene.h>
111
112 #include <mach/vm_param.h>
113
114 #include <vm/vm_map.h>
115 #include <vm/vm_kern.h>
116
117 #include <sys/reboot.h>
118 #include <dev/busvar.h> /* for pseudo_inits */
119 #include <sys/kdebug.h>
120 #include <sys/monotonic.h>
121
122 #include <mach/mach_types.h>
123 #include <mach/vm_prot.h>
124 #include <mach/semaphore.h>
125 #include <mach/sync_policy.h>
126 #include <kern/clock.h>
127 #include <sys/csr.h>
128 #include <mach/kern_return.h>
129 #include <mach/thread_act.h> /* for thread_resume() */
130 #include <sys/mcache.h> /* for mcache_init() */
131 #include <sys/mbuf.h> /* for mbinit() */
132 #include <sys/event.h> /* for knote_init() */
133 #include <sys/eventhandler.h> /* for eventhandler_init() */
134 #include <sys/kern_memorystatus.h> /* for memorystatus_init() */
135 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
136 #include <sys/aio_kern.h> /* for aio_init() */
137 #include <sys/semaphore.h> /* for psem_cache_init() */
138 #include <net/dlil.h> /* for dlil_init() */
139 #include <net/iptap.h> /* for iptap_init() */
140 #include <sys/socketvar.h> /* for socketinit() */
141 #include <sys/protosw.h> /* for domaininit() */
142 #include <kern/sched_prim.h> /* for thread_wakeup() */
143 #include <net/if_ether.h> /* for ether_family_init() */
144 #include <net/if_gif.h> /* for gif_init() */
145 #include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
146 #include <vm/vm_kern.h> /* for kmem_suballoc() */
147 #include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
148 #include <netinet/flow_divert.h> /* flow_divert_init() */
149 #include <net/content_filter.h> /* for cfil_init() */
150 #include <net/necp.h> /* for necp_init() */
151 #include <net/network_agent.h> /* for netagent_init() */
152 #include <net/packet_mangler.h> /* for pkt_mnglr_init() */
153 #include <net/if_utun.h> /* for utun_register_control() */
154 #include <net/if_ipsec.h> /* for ipsec_register_control() */
155 #include <net/netsrc.h> /* for netsrc_init() */
156 #include <net/ntstat.h> /* for nstat_init() */
157 #include <netinet/tcp_cc.h> /* for tcp_cc_init() */
158 #include <netinet/mptcp_var.h> /* for mptcp_control_register() */
159 #include <net/nwk_wq.h> /* for nwk_wq_init */
160 #include <net/restricted_in_port.h> /* for restricted_in_port_init() */
161 #include <net/remote_vif.h> /* for rvi_init() */
162 #include <kern/assert.h> /* for assert() */
163 #include <sys/kern_overrides.h> /* for init_system_override() */
164 #include <sys/lockf.h> /* for lf_init() */
165 #include <sys/fsctl.h>
166
167 #include <net/init.h>
168
169 #if CONFIG_MACF
170 #include <security/mac_framework.h>
171 #include <security/mac_internal.h> /* mac_init_bsd() */
172 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
173 #endif
174
175 #include <machine/exec.h>
176
177 #if CONFIG_NETBOOT
178 #include <sys/netboot.h>
179 #endif
180
181 #if CONFIG_IMAGEBOOT
182 #include <sys/imageboot.h>
183 #endif
184
185 #if PFLOG
186 #include <net/if_pflog.h>
187 #endif
188
189 #if SKYWALK
190 #include <skywalk/os_skywalk_private.h>
191 #endif /* SKYWALK */
192
193 #include <pexpert/pexpert.h>
194 #include <machine/pal_routines.h>
195 #include <console/video_console.h>
196
197 #if CONFIG_XNUPOST
198 #include <tests/xnupost.h>
199 #endif
200
201 void * get_user_regs(thread_t); /* XXX kludge for <machine/thread.h> */
202 void IOKitInitializeTime(void); /* XXX */
203 void IOSleep(unsigned int); /* XXX */
204 void IOSetImageBoot(void); /* XXX */
205 void loopattach(void); /* XXX */
206
207 const char *const copyright =
208 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
209 "The Regents of the University of California. "
210 "All rights reserved.\n\n";
211
212 /* Components of the first process -- never freed. */
213 SECURITY_READ_ONLY_LATE(struct vfs_context) vfs_context0;
214
215 struct proc proc0 = {
216 .p_comm = "kernel_task",
217 .p_name = "kernel_task",
218 .p_pptr = &proc0,
219 .p_stat = SRUN,
220 #if defined(__LP64__)
221 .p_flag = P_SYSTEM | P_LP64,
222 #else
223 .p_flag = P_SYSTEM,
224 #endif
225 .p_nice = NZERO,
226 .p_uthlist = TAILQ_HEAD_INITIALIZER(proc0.p_uthlist),
227 };
228 static struct plimit limit0;
229 static struct pstats pstats0;
230 SECURITY_READ_ONLY_LATE(proc_t) kernproc = &proc0;
231 proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
232
233 long tk_cancc;
234 long tk_nin;
235 long tk_nout;
236 long tk_rawcc;
237
238 int lock_trace = 0;
239 /* Global variables to make pstat happy. We do swapping differently */
240 int nswdev, nswap;
241 int nswapmap;
242 void *swapmap;
243 struct swdevt swdevt[1];
244
245 static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
246 LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
247 LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
248
249 dev_t rootdev; /* device of the root */
250 dev_t dumpdev; /* device to take dumps on */
251 long dumplo; /* offset into dumpdev */
252 long hostid;
253 char hostname[MAXHOSTNAMELEN];
254 char domainname[MAXDOMNAMELEN];
255 char rootdevice[DEVMAXNAMESIZE];
256
257 struct vnode *rootvp;
258 bool rootvp_is_ssd = false;
259 SECURITY_READ_ONLY_LATE(int) boothowto;
260 /*
261 * -minimalboot indicates that we want userspace to be bootstrapped to a
262 * minimal environment. What constitutes minimal is up to the bootstrap
263 * process.
264 */
265 TUNABLE(int, minimalboot, "-minimalboot", 0);
266 #if CONFIG_DARKBOOT
267 int darkboot = 0;
268 #endif
269
270 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
271 extern void IOSecureBSDRoot(const char * rootName);
272 extern kern_return_t IOKitBSDInit(void );
273 extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
274 extern void kminit(void);
275 extern void bsd_bufferinit(void);
276 extern void throttle_init(void);
277
278 vm_map_t bsd_pageable_map;
279 vm_map_t mb_map;
280
281 static int bsd_simul_execs;
282 static int bsd_pageable_map_size;
283 __private_extern__ int execargs_cache_size = 0;
284 __private_extern__ int execargs_free_count = 0;
285 __private_extern__ vm_offset_t * execargs_cache = NULL;
286
287 void bsd_exec_setup(int);
288
289 __private_extern__ int bootarg_execfailurereports = 0;
290
291 #if __x86_64__
292 __private_extern__ TUNABLE(int, bootarg_no32exec, "no32exec", 1);
293 #endif
294
295 #if DEVELOPMENT || DEBUG
296 /* Prevent kernel-based ASLR from being used. */
297 __private_extern__ TUNABLE(bool, bootarg_disable_aslr, "-disable_aslr", 0);
298 #endif
299
300 /*
301 * Allow an alternate dyld to be used for testing.
302 */
303
304 #if DEVELOPMENT || DEBUG
305 char dyld_alt_path[MAXPATHLEN];
306 int use_alt_dyld = 0;
307 #endif
308
309 int cmask = CMASK;
310 extern int customnbuf;
311
312 kern_return_t bsd_autoconf(void);
313 void bsd_utaskbootstrap(void);
314
315 #if CONFIG_DEV_KMEM
316 extern void dev_kmem_init(void);
317 #endif
318 static void process_name(const char *, proc_t);
319
320 static void setconf(void);
321
322 #if CONFIG_BASESYSTEMROOT
323 static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
324 static boolean_t bsdmgroot_bootable(void);
325 #endif // CONFIG_BASESYSTEMROOT
326
327 static bool bsd_rooted_ramdisk(void);
328
329 #if SYSV_SHM
330 extern void sysv_shm_lock_init(void);
331 #endif
332 #if SYSV_SEM
333 extern void sysv_sem_lock_init(void);
334 #endif
335 #if SYSV_MSG
336 extern void sysv_msg_lock_init(void);
337 #endif
338
339 #if CONFIG_MACF
340 #if defined (__i386__) || defined (__x86_64__)
341 /* MACF policy_check configuration flags; see policy_check.c for details */
342 extern int check_policy_init(int);
343 #endif
344 #endif /* CONFIG_MACF */
345
346 /* If we are using CONFIG_DTRACE */
347 #if CONFIG_DTRACE
348 extern void dtrace_postinit(void);
349 #endif
350
351 /*
352 * Initialization code.
353 * Called from cold start routine as
354 * soon as a stack and segmentation
355 * have been established.
356 * Functions:
357 * turn on clock
358 * hand craft 0th process
359 * call all initialization routines
360 * hand craft 1st user process
361 */
362
363 /*
364 * Sets the name for the given task.
365 */
366 static void
process_name(const char * s,proc_t p)367 process_name(const char *s, proc_t p)
368 {
369 strlcpy(p->p_comm, s, sizeof(p->p_comm));
370 strlcpy(p->p_name, s, sizeof(p->p_name));
371 }
372
373 /* To allow these values to be patched, they're globals here */
374 #include <machine/vmparam.h>
375 struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
376 struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
377 struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
378
379 extern struct os_refgrp rlimit_refgrp;
380
381 extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
382 extern int (*mountroot)(void);
383
384 LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
385 LCK_GRP_DECLARE(proc_lck_grp, "proc");
386 LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
387 LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
388 LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
389 LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
390 LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
391 LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
392 LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
393
394
395 LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
396
397 #if XNU_TARGET_OS_OSX
398 /* hook called after root is mounted XXX temporary hack */
399 void (*mountroot_post_hook)(void);
400 void (*unmountroot_pre_hook)(void);
401 #endif
402 void set_rootvnode(vnode_t);
403
404 extern lck_rw_t rootvnode_rw_lock;
405
406 SECURITY_READ_ONLY_LATE(struct kmem_range) bsd_pageable_range = {};
407 KMEM_RANGE_REGISTER_DYNAMIC(bsd_pageable, &bsd_pageable_range, ^() {
408 assert(bsd_pageable_map_size != 0);
409 return (vm_map_size_t) bsd_pageable_map_size;
410 });
411
412 /* called with an iocount and usecount on new_rootvnode */
413 void
set_rootvnode(vnode_t new_rootvnode)414 set_rootvnode(vnode_t new_rootvnode)
415 {
416 mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
417 vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
418 vnode_t old_rootvnode = rootvnode;
419
420 new_rootvnode->v_flag |= VROOT;
421 rootvp = new_devvp;
422 rootvnode = new_rootvnode;
423 kernproc->p_fd.fd_cdir = new_rootvnode;
424 if (new_devvp != NULL) {
425 rootdev = vnode_specrdev(new_devvp);
426 } else if (new_mount != NULL) {
427 rootdev = vfs_statfs(new_mount)->f_fsid.val[0]; /* like ATTR_CMN_DEVID */
428 } else {
429 rootdev = NODEV;
430 }
431
432 if (old_rootvnode) {
433 vnode_rele(old_rootvnode);
434 }
435 }
436
437 #define RAMDEV "md0"
438
439 bool
bsd_rooted_ramdisk(void)440 bsd_rooted_ramdisk(void)
441 {
442 bool is_ramdisk = false;
443 char *dev_path = zalloc(ZV_NAMEI);
444 if (dev_path == NULL) {
445 panic("failed to allocate devpath string!");
446 }
447
448 if (PE_parse_boot_argn("rd", dev_path, MAXPATHLEN)) {
449 if (strncmp(dev_path, RAMDEV, strlen(RAMDEV)) == 0) {
450 is_ramdisk = true;
451 }
452 }
453
454 zfree(ZV_NAMEI, dev_path);
455 return is_ramdisk;
456 }
457
458 /*
459 * This function is called very early on in the Mach startup, from the
460 * function start_kernel_threads() in osfmk/kern/startup.c. It's called
461 * in the context of the current (startup) task using a call to the
462 * function kernel_thread_create() to jump into start_kernel_threads().
463 * Internally, kernel_thread_create() calls thread_create_internal(),
464 * which calls uthread_init(). The function of uthread_init() is
465 * normally to init a uthread structure, and fill out the uu_sigmask,
466 * tro_ucred/tro_proc fields. It skips filling these out in the case of the "task"
467 * being "kernel_task", because the order of operation is inverted. To
468 * account for that, we need to manually fill in at least the contents
469 * of the tro_ucred field so that the uthread structure can be
470 * used like any other.
471 */
472 void
bsd_init(void)473 bsd_init(void)
474 {
475 struct uthread *ut;
476 vnode_t init_rootvnode = NULLVP;
477 struct proc_ro_data kernproc_ro_data = {
478 .p_csflags = CS_VALID,
479 };
480 struct task_ro_data kerntask_ro_data = { };
481 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
482 boolean_t netboot = FALSE;
483 #endif
484
485 #define DEBUG_BSDINIT 0
486
487 #if DEBUG_BSDINIT
488 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
489 #else
490 #define bsd_init_kprintf(x, ...)
491 #endif
492
493 throttle_init();
494
495 printf(copyright);
496
497 #if CONFIG_DEV_KMEM
498 bsd_init_kprintf("calling dev_kmem_init\n");
499 dev_kmem_init();
500 #endif
501
502 /* Initialize kauth subsystem before instancing the first credential */
503 bsd_init_kprintf("calling kauth_init\n");
504 kauth_init();
505
506 /* kernel_task->proc = kernproc; */
507 set_bsdtask_info(kernel_task, (void *)kernproc);
508
509 /* set the cred */
510 kauth_cred_set(&kernproc_ro_data.p_ucred, vfs_context0.vc_ucred);
511 kernproc->p_proc_ro = proc_ro_alloc(kernproc, &kernproc_ro_data,
512 kernel_task, &kerntask_ro_data);
513
514 /* give kernproc a name */
515 bsd_init_kprintf("calling process_name\n");
516 process_name("kernel_task", kernproc);
517
518 /* Allocate proc lock attribute */
519
520 lck_mtx_init(&kernproc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
521 lck_mtx_init(&kernproc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
522 lck_spin_init(&kernproc->p_slock, &proc_slock_grp, &proc_lck_attr);
523
524 /* Init the file descriptor table. */
525 fdt_init(kernproc);
526 kernproc->p_fd.fd_cmask = (mode_t)cmask;
527
528 assert(bsd_simul_execs != 0);
529 execargs_cache_size = bsd_simul_execs;
530 execargs_free_count = bsd_simul_execs;
531 execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
532 ZALIGN(vm_offset_t));
533
534 if (current_task() != kernel_task) {
535 printf("bsd_init: We have a problem, "
536 "current task is not kernel task\n");
537 }
538
539 bsd_init_kprintf("calling get_bsdthread_info\n");
540 ut = current_uthread();
541
542 #if CONFIG_MACF
543 /*
544 * Initialize the MAC Framework
545 */
546 mac_policy_initbsd();
547
548 #if defined (__i386__) || defined (__x86_64__)
549 /*
550 * We currently only support this on i386/x86_64, as that is the
551 * only lock code we have instrumented so far.
552 */
553 int policy_check_flags;
554 PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
555 check_policy_init(policy_check_flags);
556 #endif
557 #endif /* MAC */
558
559 /*
560 * Make a session and group
561 *
562 * No need to hold the pgrp lock,
563 * there are no other BSD threads yet.
564 */
565 struct session *session0 = session_alloc(kernproc);
566 struct pgrp *pgrp0 = pgrp_alloc(0, PGRP_REF_NONE);
567 session0->s_ttypgrpid = 0;
568 pgrp0->pg_session = session0;
569
570 /*
571 * Create process 0.
572 */
573 proc_list_lock();
574 os_ref_init_mask(&kernproc->p_refcount, P_REF_BITS, &p_refgrp, P_REF_NONE);
575 os_ref_init_raw(&kernproc->p_waitref, &p_refgrp);
576
577 /*
578 * Make a group and session, then simulate pinsertchild(),
579 * adjusted for the kernel.
580 */
581 pghash_insert_locked(0, pgrp0);
582
583 LIST_INSERT_HEAD(&pgrp0->pg_members, kernproc, p_pglist);
584 hazard_ptr_init(&kernproc->p_pgrp, pgrp0);
585 LIST_INSERT_HEAD(&allproc, kernproc, p_list);
586
587 LIST_INSERT_HEAD(SESSHASH(0), session0, s_hash);
588 proc_list_unlock();
589
590 kernproc->task = kernel_task;
591
592 #if DEVELOPMENT || DEBUG
593 if (bootarg_disable_aslr) {
594 kernproc->p_flag |= P_DISABLE_ASLR;
595 }
596 #endif
597
598 TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
599
600 bsd_init_kprintf("calling kauth_cred_create\n");
601 /*
602 * Officially associate the kernel with vfs_context0.vc_ucred.
603 */
604 #if CONFIG_MACF
605 mac_cred_label_associate_kernel(vfs_context0.vc_ucred);
606 #endif
607 proc_update_creds_onproc(kernproc);
608
609 TAILQ_INIT(&kernproc->p_aio_activeq);
610 TAILQ_INIT(&kernproc->p_aio_doneq);
611 kernproc->p_aio_total_count = 0;
612
613 /* Create the limits structures. */
614 for (uint32_t i = 0; i < ARRAY_COUNT(limit0.pl_rlimit); i++) {
615 limit0.pl_rlimit[i].rlim_cur =
616 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
617 }
618 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
619 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
620 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
621 limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
622 limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
623 limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
624 os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
625
626 hazard_ptr_init(&kernproc->p_limit, &limit0);
627 kernproc->p_stats = &pstats0;
628 proc_sigacts_copy(kernproc, NULL);
629 kernproc->p_subsystem_root_path = NULL;
630
631 /*
632 * Charge root for one process: launchd.
633 */
634 bsd_init_kprintf("calling chgproccnt\n");
635 (void)chgproccnt(0, 1);
636
637 /*
638 * Allocate a kernel submap for pageable memory
639 * for temporary copying (execve()).
640 */
641 bsd_init_kprintf("calling kmem_suballoc\n");
642 bsd_pageable_map = kmem_suballoc(kernel_map,
643 &bsd_pageable_range.min_address,
644 (vm_size_t)bsd_pageable_map_size,
645 VM_MAP_CREATE_PAGEABLE,
646 VM_FLAGS_FIXED_RANGE_SUBALLOC,
647 KMS_PERMANENT | KMS_NOFAIL,
648 VM_KERN_MEMORY_BSD).kmr_submap;
649
650 /*
651 * Initialize buffers and hash links for buffers
652 *
653 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
654 * happen after a credential has been associated with
655 * the kernel task.
656 */
657 bsd_init_kprintf("calling bsd_bufferinit\n");
658 bsd_bufferinit();
659
660 /*
661 * Initialize the calendar.
662 */
663 bsd_init_kprintf("calling IOKitInitializeTime\n");
664 IOKitInitializeTime();
665
666 /* Initialize the file systems. */
667 bsd_init_kprintf("calling vfsinit\n");
668 vfsinit();
669
670 #if CONFIG_PROC_UUID_POLICY
671 /* Initial proc_uuid_policy subsystem */
672 bsd_init_kprintf("calling proc_uuid_policy_init()\n");
673 proc_uuid_policy_init();
674 #endif
675
676 #if SOCKETS
677 /* Initialize per-CPU cache allocator */
678 mcache_init();
679
680 /* Initialize mbuf's. */
681 bsd_init_kprintf("calling mbinit\n");
682 mbinit();
683 restricted_in_port_init();
684 #endif /* SOCKETS */
685
686 /*
687 * Initializes security event auditing.
688 * XXX: Should/could this occur later?
689 */
690 #if CONFIG_AUDIT
691 bsd_init_kprintf("calling audit_init\n");
692 audit_init();
693 #endif
694
695 /* Initialize kqueues */
696 bsd_init_kprintf("calling knote_init\n");
697 knote_init();
698
699 /* Initialize event handler */
700 bsd_init_kprintf("calling eventhandler_init\n");
701 eventhandler_init();
702
703 /* Initialize for async IO */
704 bsd_init_kprintf("calling aio_init\n");
705 aio_init();
706
707 pthread_init();
708 /* POSIX Shm and Sem */
709 bsd_init_kprintf("calling pshm_cache_init\n");
710 pshm_cache_init();
711 bsd_init_kprintf("calling psem_cache_init\n");
712 psem_cache_init();
713
714 /*
715 * Initialize protocols. Block reception of incoming packets
716 * until everything is ready.
717 */
718 #if NETWORKING
719 bsd_init_kprintf("calling nwk_wq_init\n");
720 nwk_wq_init();
721 bsd_init_kprintf("calling dlil_init\n");
722 dlil_init();
723 #endif /* NETWORKING */
724 #if SOCKETS
725 bsd_init_kprintf("calling socketinit\n");
726 socketinit();
727 bsd_init_kprintf("calling domaininit\n");
728 domaininit();
729 iptap_init();
730 #if FLOW_DIVERT
731 flow_divert_init();
732 #endif /* FLOW_DIVERT */
733 #endif /* SOCKETS */
734 #if SKYWALK
735 bsd_init_kprintf("calling skywalk_init\n");
736 (void) skywalk_init();
737 #endif /* SKYWALK */
738 #if NETWORKING
739 #if NECP
740 /* Initialize Network Extension Control Policies */
741 necp_init();
742 #endif
743 netagent_init();
744 #endif /* NETWORKING */
745
746 #if defined (__x86_64__)
747 hvg_bsd_init();
748 #endif /* DEBUG || DEVELOPMENT */
749
750 #if CONFIG_FREEZE
751 #ifndef CONFIG_MEMORYSTATUS
752 #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
753 #endif
754 /* Initialise background freezing */
755 bsd_init_kprintf("calling memorystatus_freeze_init\n");
756 memorystatus_freeze_init();
757 #endif
758
759 #if CONFIG_MEMORYSTATUS
760 /* Initialize kernel memory status notifications */
761 bsd_init_kprintf("calling memorystatus_init\n");
762 memorystatus_init();
763 #endif /* CONFIG_MEMORYSTATUS */
764
765 bsd_init_kprintf("calling sysctl_mib_init\n");
766 sysctl_mib_init();
767
768 bsd_init_kprintf("calling bsd_autoconf\n");
769 bsd_autoconf();
770
771 #if CONFIG_DTRACE
772 dtrace_postinit();
773 #endif
774
775 /*
776 * We attach the loopback interface *way* down here to ensure
777 * it happens after autoconf(), otherwise it becomes the
778 * "primary" interface.
779 */
780 #include <loop.h>
781 #if NLOOP > 0
782 bsd_init_kprintf("calling loopattach\n");
783 loopattach(); /* XXX */
784 #endif
785 #if NGIF
786 /* Initialize gif interface (after lo0) */
787 gif_init();
788 #endif
789
790 #if PFLOG
791 /* Initialize packet filter log interface */
792 pfloginit();
793 #endif /* PFLOG */
794
795 #if NETHER > 0
796 /* Register the built-in dlil ethernet interface family */
797 bsd_init_kprintf("calling ether_family_init\n");
798 ether_family_init();
799 #endif /* ETHER */
800
801 #if NETWORKING
802 #if CONTENT_FILTER
803 cfil_init();
804 #endif
805
806 #if PACKET_MANGLER
807 pkt_mnglr_init();
808 #endif
809
810 /*
811 * Register subsystems with kernel control handlers
812 */
813 utun_register_control();
814 #if IPSEC
815 ipsec_register_control();
816 #endif /* IPSEC */
817 netsrc_init();
818 nstat_init();
819 tcp_cc_init();
820 #if MPTCP
821 mptcp_control_register();
822 #endif /* MPTCP */
823
824 #if REMOTE_VIF
825 rvi_init();
826 #endif /* REMOTE_VIF */
827
828 /*
829 * The the networking stack is now initialized so it is a good time to call
830 * the clients that are waiting for the networking stack to be usable.
831 */
832 bsd_init_kprintf("calling net_init_run\n");
833 net_init_run();
834 #endif /* NETWORKING */
835
836 bsd_init_kprintf("calling inittodr\n");
837 inittodr(0);
838
839 /* Mount the root file system. */
840 while (TRUE) {
841 int err;
842
843 bsd_init_kprintf("calling setconf\n");
844 setconf();
845 #if CONFIG_NETBOOT
846 netboot = (mountroot == netboot_mountroot);
847 #endif
848
849 bsd_init_kprintf("vfs_mountroot\n");
850 if (0 == (err = vfs_mountroot())) {
851 break;
852 }
853 rootdevice[0] = '\0';
854 #if CONFIG_NETBOOT
855 if (netboot) {
856 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
857 vc_progress_set(FALSE, 0);
858 for (uint32_t i = 1; 1; i *= 2) {
859 printf("bsd_init: failed to mount network root, error %d, %s\n",
860 err, PE_boot_args());
861 printf("We are hanging here...\n");
862 IOSleep(i * 60 * 1000);
863 }
864 /*NOTREACHED*/
865 }
866 #endif
867 printf("cannot mount root, errno = %d\n", err);
868 }
869
870 IOSecureBSDRoot(rootdevice);
871
872 mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
873
874 bsd_init_kprintf("calling VFS_ROOT\n");
875 /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
876 if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, vfs_context_kernel())) {
877 panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
878 }
879 (void)vnode_ref(init_rootvnode);
880 (void)vnode_put(init_rootvnode);
881
882 lck_rw_lock_exclusive(&rootvnode_rw_lock);
883 set_rootvnode(init_rootvnode);
884 lck_rw_unlock_exclusive(&rootvnode_rw_lock);
885 init_rootvnode = NULLVP; /* use rootvnode after this point */
886
887
888 if (!bsd_rooted_ramdisk()) {
889 boolean_t require_rootauth = FALSE;
890
891 #if XNU_TARGET_OS_OSX && defined(__arm64__)
892 #if CONFIG_IMAGEBOOT
893 /* Apple Silicon MacOS */
894 require_rootauth = !imageboot_desired();
895 #endif // CONFIG_IMAGEBOOT
896 #elif !XNU_TARGET_OS_OSX
897 /* Non MacOS */
898 require_rootauth = TRUE;
899 #endif // XNU_TARGET_OS_OSX && defined(__arm64__)
900
901 if (require_rootauth) {
902 /* enforce sealedness */
903 int autherr = VNOP_IOCTL(rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
904 if (autherr) {
905 panic("rootvp not authenticated after mounting");
906 }
907 }
908 }
909
910
911 #if CONFIG_NETBOOT
912 if (netboot) {
913 int err;
914
915 netboot = TRUE;
916 /* post mount setup */
917 if ((err = netboot_setup()) != 0) {
918 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
919 vc_progress_set(FALSE, 0);
920 for (uint32_t i = 1; 1; i *= 2) {
921 printf("bsd_init: NetBoot could not find root, error %d: %s\n",
922 err, PE_boot_args());
923 printf("We are hanging here...\n");
924 IOSleep(i * 60 * 1000);
925 }
926 /*NOTREACHED*/
927 }
928 }
929 #endif
930
931
932 #if CONFIG_IMAGEBOOT
933 /*
934 * See if a system disk image is present. If so, mount it and
935 * switch the root vnode to point to it
936 */
937 imageboot_type_t imageboot_type = imageboot_needed();
938 if (netboot == FALSE && imageboot_type) {
939 /*
940 * An image was found. No turning back: we're booted
941 * with a kernel from the disk image.
942 */
943 bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
944 imageboot_setup(imageboot_type);
945 IOSetImageBoot();
946 }
947
948 #endif /* CONFIG_IMAGEBOOT */
949
950 /* set initial time; all other resource data is already zero'ed */
951 microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);
952
953 #if DEVFS
954 {
955 char mounthere[] = "/dev"; /* !const because of internal casting */
956
957 bsd_init_kprintf("calling devfs_kernel_mount\n");
958 devfs_kernel_mount(mounthere);
959 }
960 #endif /* DEVFS */
961
962 #if CONFIG_BASESYSTEMROOT
963 #if CONFIG_IMAGEBOOT
964 if (bsdmgroot_bootable()) {
965 int error;
966 bool rooted_dmg = false;
967 bool skip_signature_check = false;
968
969 printf("trying to find and mount BaseSystem dmg as root volume\n");
970 #if DEVELOPMENT || DEBUG
971 printf("(set boot-arg -nobsdmgroot to avoid this)\n");
972 #endif // DEVELOPMENT || DEBUG
973
974 char *dmgpath = NULL;
975 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK | Z_NOFAIL);
976
977 error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg, &skip_signature_check);
978 if (error) {
979 bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
980 } else {
981 PE_parse_boot_argn("bsdmgpath", dmgpath, sizeof(dmgpath));
982
983 bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
984
985 error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg, skip_signature_check);
986 if (error) {
987 bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
988 } else {
989 IOSetImageBoot();
990 }
991 }
992 zfree(ZV_NAMEI, dmgpath);
993 }
994 #else /* CONFIG_IMAGEBOOT */
995 #error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
996 #endif /* CONFIG_IMAGEBOOT */
997 #endif /* CONFIG_BASESYSTEMROOT */
998
999 /* Initialize signal state for process 0. */
1000 bsd_init_kprintf("calling siginit\n");
1001 siginit(kernproc);
1002
1003 bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1004 bsd_utaskbootstrap();
1005
1006 pal_kernel_announce();
1007
1008 bsd_init_kprintf("calling mountroot_post_hook\n");
1009
1010 #if XNU_TARGET_OS_OSX
1011 /* invoke post-root-mount hook */
1012 if (mountroot_post_hook != NULL) {
1013 mountroot_post_hook();
1014 }
1015 #endif
1016
1017 #if 0 /* not yet */
1018 consider_zone_gc(FALSE);
1019 #endif
1020
1021 /*
1022 * At this point, we consider the kernel "booted" enough to apply
1023 * stricter timeouts.
1024 */
1025 machine_timeout_bsd_init();
1026
1027 bsd_init_kprintf("done\n");
1028 }
1029
1030 void
bsdinit_task(void)1031 bsdinit_task(void)
1032 {
1033 proc_t p = current_proc();
1034
1035 process_name("init", p);
1036
1037 /* Set up exception-to-signal reflection */
1038 ux_handler_setup();
1039
1040 #if CONFIG_MACF
1041 mac_cred_label_associate_user(proc_ucred(p));
1042 #endif
1043
1044 vm_init_before_launchd();
1045
1046 #if CONFIG_XNUPOST
1047 int result = bsd_list_tests();
1048 result = bsd_do_post();
1049 if (result != 0) {
1050 panic("bsd_do_post: Tests failed with result = 0x%08x", result);
1051 }
1052 #endif
1053
1054 bsd_init_kprintf("bsd_do_post - done");
1055
1056 load_init_program(p);
1057 lock_trace = 1;
1058 }
1059
1060 kern_return_t
bsd_autoconf(void)1061 bsd_autoconf(void)
1062 {
1063 kprintf("bsd_autoconf: calling kminit\n");
1064 kminit();
1065
1066 /*
1067 * Early startup for bsd pseudodevices.
1068 */
1069 {
1070 struct pseudo_init *pi;
1071
1072 for (pi = pseudo_inits; pi->ps_func; pi++) {
1073 (*pi->ps_func)(pi->ps_count);
1074 }
1075 }
1076
1077 return IOKitBSDInit();
1078 }
1079
1080
1081 #include <sys/disklabel.h> /* for MAXPARTITIONS */
1082
1083 static void
setconf(void)1084 setconf(void)
1085 {
1086 u_int32_t flags;
1087 kern_return_t err;
1088
1089 err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1090 if (err) {
1091 printf("setconf: IOFindBSDRoot returned an error (%d);"
1092 "setting rootdevice to 'sd0a'.\n", err); /* XXX DEBUG TEMP */
1093 rootdev = makedev( 6, 0 );
1094 strlcpy(rootdevice, "sd0a", sizeof(rootdevice));
1095 flags = 0;
1096 }
1097
1098 #if CONFIG_NETBOOT
1099 if (flags & 1) {
1100 /* network device */
1101 mountroot = netboot_mountroot;
1102 } else {
1103 #endif
1104 /* otherwise have vfs determine root filesystem */
1105 mountroot = NULL;
1106 #if CONFIG_NETBOOT
1107 }
1108 #endif
1109 }
1110
1111 /*
1112 * Boot into the flavor of Recovery dictated by `mode`.
1113 */
1114 boolean_t
bsd_boot_to_recovery(bsd_bootfail_mode_t mode,uuid_t volume_uuid,boolean_t reboot)1115 bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1116 {
1117 return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1118 }
1119
1120 void
bsd_utaskbootstrap(void)1121 bsd_utaskbootstrap(void)
1122 {
1123 thread_t thread;
1124 struct uthread *ut;
1125
1126 /*
1127 * Clone the bootstrap process from the kernel process, without
1128 * inheriting either task characteristics or memory from the kernel;
1129 */
1130 thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE);
1131
1132 /* Hold the reference as it will be dropped during shutdown */
1133 initproc = proc_find(1);
1134 #if __PROC_INTERNAL_DEBUG
1135 if (initproc == PROC_NULL) {
1136 panic("bsd_utaskbootstrap: initproc not set");
1137 }
1138 #endif
1139
1140 zalloc_first_proc_made();
1141
1142 /*
1143 * Since we aren't going back out the normal way to our parent,
1144 * we have to drop the transition locks explicitly.
1145 */
1146 proc_signalend(initproc, 0);
1147 proc_transend(initproc, 0);
1148
1149 ut = (struct uthread *)get_bsdthread_info(thread);
1150 ut->uu_sigmask = 0;
1151 act_set_astbsd(thread);
1152 task_clear_return_wait(get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
1153 }
1154
1155 static void
parse_bsd_args(void)1156 parse_bsd_args(void)
1157 {
1158 char namep[48];
1159
1160 if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
1161 boothowto |= RB_SINGLE;
1162 }
1163
1164 if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
1165 boothowto |= RB_SAFEBOOT;
1166 }
1167
1168 if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
1169 sizeof(max_nbuf_headers))) {
1170 customnbuf = 1;
1171 }
1172
1173 #if CONFIG_DARKBOOT
1174 /*
1175 * The darkboot flag is specified by the bootloader and is stored in
1176 * boot_args->bootFlags. This flag is available starting revision 2.
1177 */
1178 boot_args *args = (boot_args *) PE_state.bootArgs;
1179 if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1180 darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1181 } else {
1182 darkboot = 0;
1183 }
1184 #endif
1185
1186 #if DEVELOPMENT || DEBUG
1187 if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1188 if (strlen(dyld_alt_path) > 0) {
1189 use_alt_dyld = 1;
1190 }
1191 }
1192 #endif /* DEVELOPMENT || DEBUG */
1193 }
1194 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, parse_bsd_args);
1195
1196 #if CONFIG_BASESYSTEMROOT
1197
1198 extern bool IOGetBootUUID(char *);
1199 extern bool IOGetApfsPrebootUUID(char *);
1200
1201
1202 // This function returns the UUID of the Preboot (and Recovery) folder associated with the
1203 // current boot volume, if applicable. The meaning of the UUID can be
1204 // filesystem-dependent and not all kinds of boots will have a UUID.
1205 // On success, the UUID is copied into the past-in parameter and TRUE is returned.
1206 // In case the current boot has no applicable Preboot UUID, FALSE is returned.
1207 static bool
get_preboot_uuid(uuid_string_t maybe_uuid_string)1208 get_preboot_uuid(uuid_string_t maybe_uuid_string)
1209 {
1210 // try IOGetApfsPrebootUUID
1211 if (IOGetApfsPrebootUUID(maybe_uuid_string)) {
1212 uuid_t maybe_uuid;
1213 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1214 if (error == 0) {
1215 return true;
1216 }
1217 }
1218
1219 // try IOGetBootUUID
1220 if (IOGetBootUUID(maybe_uuid_string)) {
1221 uuid_t maybe_uuid;
1222 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1223 if (error == 0) {
1224 return true;
1225 }
1226 }
1227
1228 // didn't find it
1229 return false;
1230 }
1231
1232 #if defined(__arm64__)
1233 extern bool IOGetBootObjectsPath(char *);
1234 #endif
1235
1236 // Find the BaseSystem.dmg to be used as the initial root volume during certain
1237 // kinds of boots.
1238 // This may mount volumes and lookup vnodes.
1239 // The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1240 // If it returns 0 (no error), then it also writes the absolute path to the
1241 // BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1242 static
1243 int
bsd_find_basesystem_dmg(char * bsdmgpath_out,bool * rooted_dmg,bool * skip_signature_check)1244 bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
1245 {
1246 int error;
1247 size_t len;
1248 char *dmgbasepath;
1249 char *dmgpath;
1250 bool allow_rooted_dmg = false;
1251
1252 dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1253 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1254 vnode_t imagevp = NULLVP;
1255
1256 #if DEVELOPMENT || DEBUG
1257 allow_rooted_dmg = true;
1258 #endif
1259
1260 //must provide output bool
1261 if (rooted_dmg && skip_signature_check) {
1262 *rooted_dmg = false;
1263 *skip_signature_check = false;
1264 } else {
1265 error = EINVAL;
1266 goto done;
1267 }
1268
1269 error = vfs_mount_recovery();
1270 if (error) {
1271 goto done;
1272 }
1273
1274 len = strlcpy(dmgbasepath, "/System/Volumes/Recovery/", MAXPATHLEN);
1275 if (len > MAXPATHLEN) {
1276 error = ENAMETOOLONG;
1277 goto done;
1278 }
1279
1280 if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
1281 *skip_signature_check = true;
1282 allow_rooted_dmg = true;
1283 }
1284
1285 #if defined(__arm64__)
1286 char boot_obj_path[MAXPATHLEN] = "";
1287
1288 if (IOGetBootObjectsPath(boot_obj_path)) {
1289 if (boot_obj_path[0] == '/') {
1290 dmgbasepath[len - 1] = '\0';
1291 }
1292
1293 len = strlcat(dmgbasepath, boot_obj_path, MAXPATHLEN);
1294 if (len > MAXPATHLEN) {
1295 error = ENAMETOOLONG;
1296 goto done;
1297 }
1298
1299 len = strlcat(dmgbasepath, "/usr/standalone/firmware/", MAXPATHLEN);
1300 if (len > MAXPATHLEN) {
1301 error = ENAMETOOLONG;
1302 goto done;
1303 }
1304
1305 if (allow_rooted_dmg) {
1306 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1307 if (len > MAXPATHLEN) {
1308 error = ENAMETOOLONG;
1309 goto done;
1310 }
1311
1312 len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1313 if (len > MAXPATHLEN) {
1314 error = ENAMETOOLONG;
1315 goto done;
1316 }
1317
1318 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1319 if (error == 0) {
1320 *rooted_dmg = true;
1321 *skip_signature_check = true;
1322 goto done;
1323 }
1324 memset(dmgpath, 0, MAXPATHLEN);
1325 }
1326
1327 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1328 if (len > MAXPATHLEN) {
1329 error = ENAMETOOLONG;
1330 goto done;
1331 }
1332
1333 len = strlcat(dmgpath, "arm64eBaseSystem.dmg", MAXPATHLEN);
1334 if (len > MAXPATHLEN) {
1335 error = ENAMETOOLONG;
1336 goto done;
1337 }
1338
1339 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1340 if (error == 0) {
1341 goto done;
1342 }
1343 memset(dmgpath, 0, MAXPATHLEN);
1344 dmgbasepath[strlen("/System/Volumes/Recovery/")] = '\0';
1345 }
1346 #endif // __arm64__
1347
1348 uuid_string_t preboot_uuid;
1349 if (!get_preboot_uuid(preboot_uuid)) {
1350 // no preboot? bail out
1351 return EINVAL;
1352 }
1353
1354 len = strlcat(dmgbasepath, preboot_uuid, MAXPATHLEN);
1355 if (len > MAXPATHLEN) {
1356 error = ENAMETOOLONG;
1357 goto done;
1358 }
1359
1360 if (allow_rooted_dmg) {
1361 // Try BaseSystem.rooted.dmg
1362 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1363 if (len > MAXPATHLEN) {
1364 error = ENAMETOOLONG;
1365 goto done;
1366 }
1367
1368 len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
1369 if (len > MAXPATHLEN) {
1370 error = ENAMETOOLONG;
1371 goto done;
1372 }
1373
1374 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1375 if (error == 0) {
1376 // we found it! success!
1377 *rooted_dmg = true;
1378 *skip_signature_check = true;
1379 goto done;
1380 }
1381 }
1382
1383 // Try BaseSystem.dmg
1384 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1385 if (len > MAXPATHLEN) {
1386 error = ENAMETOOLONG;
1387 goto done;
1388 }
1389
1390 len = strlcat(dmgpath, "/BaseSystem.dmg", MAXPATHLEN);
1391 if (len > MAXPATHLEN) {
1392 error = ENAMETOOLONG;
1393 goto done;
1394 }
1395
1396 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1397 if (error == 0) {
1398 // success!
1399 goto done;
1400 }
1401
1402 done:
1403 if (error == 0) {
1404 strlcpy(bsdmgpath_out, dmgpath, MAXPATHLEN);
1405 } else {
1406 bsd_init_kprintf("%s: error %d\n", __func__, error);
1407 }
1408 if (imagevp != NULLVP) {
1409 vnode_put(imagevp);
1410 }
1411 zfree(ZV_NAMEI, dmgpath);
1412 zfree(ZV_NAMEI, dmgbasepath);
1413 return error;
1414 }
1415
1416 static boolean_t
bsdmgroot_bootable(void)1417 bsdmgroot_bootable(void)
1418 {
1419 #if defined(__arm64__)
1420 #define BSDMGROOT_DEFAULT true
1421 #else
1422 #define BSDMGROOT_DEFAULT false
1423 #endif
1424
1425 boolean_t resolved = BSDMGROOT_DEFAULT;
1426
1427 boolean_t boot_arg_bsdmgroot = false;
1428 boolean_t boot_arg_nobsdmgroot = false;
1429 int error;
1430 mount_t mp;
1431 boolean_t root_part_of_volume_group = false;
1432 struct vfs_attr vfsattr;
1433
1434 mp = rootvnode->v_mount;
1435 VFSATTR_INIT(&vfsattr);
1436 VFSATTR_WANTED(&vfsattr, f_capabilities);
1437
1438 boot_arg_bsdmgroot = PE_parse_boot_argn("-bsdmgroot", NULL, 0);
1439 boot_arg_nobsdmgroot = PE_parse_boot_argn("-nobsdmgroot", NULL, 0);
1440
1441 error = vfs_getattr(mp, &vfsattr, vfs_context_kernel());
1442 if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1443 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1444 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1445 root_part_of_volume_group = true;
1446 }
1447 }
1448
1449 boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1450
1451 // Start with the #defined default above.
1452 // If booting to single-user mode, default to false, because single-
1453 // user mode inside the BaseSystem is probably not what's wanted.
1454 // If the 'yes' boot-arg is set, we'll allow that even in single-user
1455 // mode, we'll assume you know what you're doing.
1456 // The 'no' boot-arg overpowers the 'yes' boot-arg.
1457 // In any case, we will not attempt to root from BaseSystem if the
1458 // original (booter-chosen) root volume isn't in a volume group.
1459 // This is just out of an abundance of caution: if the boot environment
1460 // seems to be "something other than a standard install",
1461 // we'll be conservative in messing with the root volume.
1462
1463 if (singleuser) {
1464 resolved = false;
1465 }
1466
1467 if (boot_arg_bsdmgroot) {
1468 resolved = true;
1469 }
1470
1471 if (boot_arg_nobsdmgroot) {
1472 resolved = false;
1473 }
1474
1475 if (!root_part_of_volume_group) {
1476 resolved = false;
1477 }
1478
1479 return resolved;
1480 }
1481 #endif // CONFIG_BASESYSTEMROOT
1482
1483 void
bsd_exec_setup(int scale)1484 bsd_exec_setup(int scale)
1485 {
1486 switch (scale) {
1487 case 0:
1488 case 1:
1489 bsd_simul_execs = BSD_SIMUL_EXECS;
1490 break;
1491 case 2:
1492 case 3:
1493 bsd_simul_execs = 65;
1494 break;
1495 case 4:
1496 case 5:
1497 bsd_simul_execs = 129;
1498 break;
1499 case 6:
1500 case 7:
1501 bsd_simul_execs = 257;
1502 break;
1503 default:
1504 bsd_simul_execs = 513;
1505 break;
1506 }
1507 bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1508 }
1509
1510 #if !CONFIG_NETBOOT
1511 int
1512 netboot_root(void);
1513
1514 int
netboot_root(void)1515 netboot_root(void)
1516 {
1517 return 0;
1518 }
1519 #endif
1520