xref: /xnu-10063.141.1/osfmk/arm64/sptm/arm_init_sptm.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /**
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm64/lowglobals.h>
30 #include <kern/timer_queue.h>
31 #include <kern/monotonic.h>
32 #include <machine/commpage.h>
33 #include <pexpert/device_tree.h>
34 #include <arm/cpu_internal.h>
35 #include <arm/misc_protos.h>
36 #include <arm/machine_cpu.h>
37 #include <arm/rtclock.h>
38 #include <vm/vm_map.h>
39 #include <mach/exclaves.h>
40 #include <mach/vm_param.h>
41 #include <libkern/stack_protector.h>
42 #include <console/serial_protos.h>
43 #include <arm64/sptm/pmap/pmap_pt_geometry.h>
44 #include <arm64/sptm/sptm.h>
45 #include <sptm/sptm_common.h>
46 
47 #if CONFIG_TELEMETRY
48 #include <kern/telemetry.h>
49 #endif /* CONFIG_TELEMETRY */
50 
51 #if KPERF
52 #include <kperf/kptimer.h>
53 #endif /* KPERF */
54 
55 #if HIBERNATION
56 #include <IOKit/IOPlatformExpert.h>
57 #endif /* HIBERNATION */
58 
59 /**
60  * Functions defined elsewhere that are required by this source file.
61  */
62 extern void patch_low_glo(void);
63 extern int serial_init(void);
64 extern void sleep_token_buffer_init(void);
65 
66 /**
67  * Bootstrap stacks. Used on the cold boot path to set up the boot CPU's
68  * per-CPU data structure.
69  */
70 extern vm_offset_t intstack_top;
71 extern vm_offset_t excepstack_top;
72 
73 /* First (inclusive) and last (exclusive) physical addresses */
74 extern pmap_paddr_t vm_first_phys;
75 extern pmap_paddr_t vm_last_phys;
76 
77 int debug_task;
78 
79 /**
80  * Set according to what serial-related boot-args have been passed to XUN.
81  */
82 extern int disableConsoleOutput;
83 
84 /**
85  * SPTM devices do not support static kernelcaches, but the rest of XNU
86  * expects this variable to be defined. Set it to false at build time.
87  */
88 SECURITY_READ_ONLY_LATE(bool) static_kernelcache = false;
89 
90 /**
91  * First physical address freely available to xnu.
92  */
93 SECURITY_READ_ONLY_LATE(addr64_t) first_avail_phys = 0;
94 
95 #if HAS_BP_RET
96 /* Enable both branch target retention (0x2) and branch direction retention (0x1) across sleep */
97 uint32_t bp_ret = 3;
98 extern void set_bp_ret(void);
99 #endif
100 
101 #if SCHED_HYGIENE_DEBUG
102 boolean_t sched_hygiene_debug_pmc = 1;
103 #endif
104 
105 #if SCHED_HYGIENE_DEBUG
106 
107 #if XNU_PLATFORM_iPhoneOS
108 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 48000   /* 2ms */
109 #else
110 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 0xd0000 /* 35.499ms */
111 #endif /* XNU_PLATFORM_iPhoneOS */
112 
113 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, interrupt_masked_debug_mode,
114     "machine-timeouts", "interrupt-masked-debug-mode",
115     "interrupt-masked-debug-mode",
116     SCHED_HYGIENE_MODE_PANIC,
117     TUNABLE_DT_CHECK_CHOSEN);
118 
119 MACHINE_TIMEOUT_DEV_WRITEABLE(interrupt_masked_timeout, "interrupt-masked",
120     DEFAULT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
121     NULL);
122 #if __arm64__
123 #define SSHOT_INTERRUPT_MASKED_TIMEOUT 0xf9999 /* 64-bit: 42.599ms */
124 #endif
125 MACHINE_TIMEOUT_DEV_WRITEABLE(stackshot_interrupt_masked_timeout, "sshot-interrupt-masked",
126     SSHOT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
127     NULL);
128 #undef SSHOT_INTERRUPT_MASKED_TIMEOUT
129 #endif
130 
131 /*
132  * A 6-second timeout will give the watchdog code a chance to run
133  * before a panic is triggered by the xcall routine.
134  */
135 #define XCALL_ACK_TIMEOUT_NS ((uint64_t) 6000000000)
136 uint64_t xcall_ack_timeout_abstime;
137 
138 boot_args const_boot_args __attribute__((section("__DATA, __const")));
139 boot_args      *BootArgs __attribute__((section("__DATA, __const")));
140 
141 /**
142  * The SPTM provides a second set of boot arguments, on top of those
143  * provided by iBoot.
144  */
145 SECURITY_READ_ONLY_LATE(sptm_bootstrap_args_xnu_t) const_sptm_args;
146 SECURITY_READ_ONLY_LATE(const sptm_bootstrap_args_xnu_t *) SPTMArgs;
147 SECURITY_READ_ONLY_LATE(const bool *) sptm_xnu_triggered_panic_ptr;
148 
149 extern char osbuild_config[];
150 
151 TUNABLE(uint32_t, arm_diag, "diag", 0);
152 #ifdef  APPLETYPHOON
153 static unsigned cpus_defeatures = 0x0;
154 extern void cpu_defeatures_set(unsigned int);
155 #endif
156 
157 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
158 extern volatile boolean_t arm64_stall_sleep;
159 #endif
160 
161 extern boolean_t force_immediate_debug_halt;
162 
163 #if HAS_APPLE_PAC
164 SECURITY_READ_ONLY_LATE(boolean_t) diversify_user_jop = TRUE;
165 #endif
166 
167 SECURITY_READ_ONLY_LATE(uint64_t) gDramBase;
168 SECURITY_READ_ONLY_LATE(uint64_t) gDramSize;
169 
170 SECURITY_READ_ONLY_LATE(bool) serial_console_enabled = false;
171 SECURITY_READ_ONLY_LATE(bool) enable_processor_exit = false;
172 
173 /**
174  * SPTM TODO: The following flag is set up based on the presence and
175  *            configuration of the 'sptm_stability_hacks' boot-arg; this
176  *            is used in certain codepaths that do not properly function
177  *            today in SPTM systems to make the system more stable and fully
178  *            able to boot to user space.
179  */
180 SECURITY_READ_ONLY_LATE(bool) sptm_stability_hacks = false;
181 
182 /*
183  * Forward definition
184  */
185 void arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_args);
186 
187 #if __arm64__
188 unsigned int page_shift_user32; /* for page_size as seen by a 32-bit task */
189 
190 extern void configure_misc_apple_boot_args(void);
191 extern void configure_misc_apple_regs(bool is_boot_cpu);
192 extern void configure_timer_apple_regs(void);
193 #endif /* __arm64__ */
194 
195 
196 /*
197  * JOP rebasing
198  */
199 
200 #define dyldLogFunc(msg, ...)
201 #include <mach/dyld_kernel_fixups.h>
202 
203 extern uint32_t __thread_starts_sect_start[] __asm("section$start$__TEXT$__thread_starts");
204 extern uint32_t __thread_starts_sect_end[]   __asm("section$end$__TEXT$__thread_starts");
205 #if defined(HAS_APPLE_PAC)
206 extern void OSRuntimeSignStructors(kernel_mach_header_t * header);
207 extern void OSRuntimeSignStructorsInFileset(kernel_mach_header_t * header);
208 #endif /* defined(HAS_APPLE_PAC) */
209 
210 extern vm_offset_t vm_kernel_slide;
211 extern vm_offset_t segLOWESTKC, segHIGHESTKC, segLOWESTROKC, segHIGHESTROKC;
212 extern vm_offset_t segLOWESTAuxKC, segHIGHESTAuxKC, segLOWESTROAuxKC, segHIGHESTROAuxKC;
213 extern vm_offset_t segLOWESTRXAuxKC, segHIGHESTRXAuxKC, segHIGHESTNLEAuxKC;
214 
215 void arm_slide_rebase_and_sign_image(void);
216 MARK_AS_FIXUP_TEXT void
arm_slide_rebase_and_sign_image(void)217 arm_slide_rebase_and_sign_image(void)
218 {
219 	kernel_mach_header_t *k_mh, *kc_mh = NULL;
220 	kernel_segment_command_t *seg;
221 	uintptr_t slide;
222 
223 	/*
224 	 * The kernel is part of a MH_FILESET kernel collection, determine slide
225 	 * based on first segment's mach-o vmaddr (requires first kernel load
226 	 * command to be LC_SEGMENT_64 of the __TEXT segment)
227 	 */
228 	k_mh = &_mh_execute_header;
229 	seg = (kernel_segment_command_t *)((uintptr_t)k_mh + sizeof(*k_mh));
230 	assert(seg->cmd == LC_SEGMENT_KERNEL);
231 	slide = (uintptr_t)k_mh - seg->vmaddr;
232 
233 	/*
234 	 * The kernel collection linker guarantees that the boot collection mach
235 	 * header vmaddr is the hardcoded kernel link address (as specified to
236 	 * ld64 when linking the kernel).
237 	 */
238 	kc_mh = (kernel_mach_header_t*)(VM_KERNEL_LINK_ADDRESS + slide);
239 	assert(kc_mh->filetype == MH_FILESET);
240 
241 	/*
242 	 * rebase and sign jops
243 	 * Note that we can't call any functions before this point, so
244 	 * we have to hard-code the knowledge that the base of the KC
245 	 * is the KC's mach-o header. This would change if any
246 	 * segment's VA started *before* the text segment
247 	 * (as the HIB segment does on x86).
248 	 */
249 	const void *collection_base_pointers[KCNumKinds] = {[0] = kc_mh, };
250 	kernel_collection_slide((struct mach_header_64 *)kc_mh, collection_base_pointers);
251 	PE_set_kc_header(KCKindPrimary, kc_mh, slide);
252 
253 	/*
254 	 * iBoot doesn't slide load command vmaddrs in an MH_FILESET kernel
255 	 * collection, so adjust them now, and determine the vmaddr range
256 	 * covered by read-only segments for the CTRR rorgn.
257 	 */
258 	kernel_collection_adjust_mh_addrs((struct mach_header_64 *)kc_mh, slide, false,
259 	    (uintptr_t *)&segLOWESTKC, (uintptr_t *)&segHIGHESTKC,
260 	    (uintptr_t *)&segLOWESTROKC, (uintptr_t *)&segHIGHESTROKC,
261 	    NULL, NULL, NULL);
262 
263 	/*
264 	 * Initialize slide global here to avoid duplicating this logic in
265 	 * arm_vm_init()
266 	 */
267 	vm_kernel_slide = slide;
268 }
269 
270 void
arm_auxkc_init(void * mh,void * base)271 arm_auxkc_init(void *mh, void *base)
272 {
273 	/*
274 	 * The kernel collection linker guarantees that the lowest vmaddr in an
275 	 * AuxKC collection is 0 (but note that the mach header is higher up since
276 	 * RW segments precede RO segments in the AuxKC).
277 	 */
278 	uintptr_t slide = (uintptr_t)base;
279 	kernel_mach_header_t *akc_mh = (kernel_mach_header_t*)mh;
280 
281 	assert(akc_mh->filetype == MH_FILESET);
282 	PE_set_kc_header_and_base(KCKindAuxiliary, akc_mh, base, slide);
283 
284 	/* rebase and sign jops */
285 	const void *collection_base_pointers[KCNumKinds];
286 	memcpy(collection_base_pointers, PE_get_kc_base_pointers(), sizeof(collection_base_pointers));
287 	kernel_collection_slide((struct mach_header_64 *)akc_mh, collection_base_pointers);
288 
289 	kernel_collection_adjust_mh_addrs((struct mach_header_64 *)akc_mh, slide, false,
290 	    (uintptr_t *)&segLOWESTAuxKC, (uintptr_t *)&segHIGHESTAuxKC, (uintptr_t *)&segLOWESTROAuxKC,
291 	    (uintptr_t *)&segHIGHESTROAuxKC, (uintptr_t *)&segLOWESTRXAuxKC, (uintptr_t *)&segHIGHESTRXAuxKC,
292 	    (uintptr_t *)&segHIGHESTNLEAuxKC);
293 #if defined(HAS_APPLE_PAC)
294 	OSRuntimeSignStructorsInFileset(akc_mh);
295 #endif /* defined(HAS_APPLE_PAC) */
296 }
297 
298 /*
299  * boot kernelcache ranges; used for accounting.
300  */
301 SECURITY_READ_ONLY_LATE(const arm_physrange_t *) arm_vm_kernelcache_ranges;
302 SECURITY_READ_ONLY_LATE(int) arm_vm_kernelcache_numranges;
303 
304 #if __ARM_KERNEL_PROTECT__
305 /*
306  * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
307  * mappable space preceeding the kernel (as we unmap the kernel by cutting the
308  * range covered by TTBR1 in half).  This must also cover the exception vectors.
309  */
310 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
311 
312 /* The exception vectors and the kernel cannot share root TTEs. */
313 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
314 
315 /*
316  * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
317  * the exception vectors.
318  */
319 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
320 #endif /* __ARM_KERNEL_PROTECT__ */
321 
322 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
323 
324 #if KASAN
325 extern vm_offset_t shadow_pbase;
326 extern vm_offset_t shadow_ptop;
327 extern vm_offset_t physmap_vbase;
328 extern vm_offset_t physmap_vtop;
329 #endif
330 
331 /*
332  * We explicitly place this in const, as it is not const from a language
333  * perspective, but it is only modified before we actually switch away from
334  * the bootstrap page tables.
335  */
336 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
337 
338 /*
339  * Denotes the end of xnu.
340  */
341 extern void *last_kernel_symbol;
342 
343 extern void arm64_replace_bootstack(cpu_data_t*);
344 extern void PE_slide_devicetree(vm_offset_t);
345 
346 /*
347  * KASLR parameters
348  */
349 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
350 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
351 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
352 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
353 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
354 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
355 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
356 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
357 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
358 
359 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_sptm_offsets;
360 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_txm_offsets;
361 
362 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
363 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
364 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
365 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
366 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
367 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
368 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
369 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
370 
371 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
372 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
373 
374 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
375 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
376 
377 /* Used by <mach/arm/vm_param.h> */
378 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
379 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
380 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
381 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
382 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
383 
384 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
385  * all kexts before the kernel.  This is only for arm64 devices and looks
386  * something like the following:
387  * -- vmaddr order --
388  * 0xffffff8004004000 __PRELINK_TEXT
389  * 0xffffff8007004000 __TEXT (xnu)
390  * 0xffffff80075ec000 __DATA (xnu)
391  * 0xffffff80076dc000 __KLD (xnu)
392  * 0xffffff80076e0000 __LAST (xnu)
393  * 0xffffff80076e4000 __LINKEDIT (xnu)
394  * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
395  * 0xffffff800782c000 __PRELINK_INFO
396  * 0xffffff80078e4000 -- End of kernelcache
397  */
398 
399 /* 24921709 - make XNU ready for KTRR
400  *
401  * Two possible kernel cache layouts, depending on which kcgen is being used.
402  * VAs increasing downwards.
403  * Old KCGEN:
404  *
405  * __PRELINK_TEXT
406  * __TEXT
407  * __DATA_CONST
408  * __TEXT_EXEC
409  * __KLD
410  * __LAST
411  * __DATA
412  * __PRELINK_DATA (expected empty)
413  * __LINKEDIT
414  * __PRELINK_INFO
415  *
416  * New kcgen:
417  *
418  * __PRELINK_TEXT    <--- First KTRR (ReadOnly) segment
419  * __PLK_DATA_CONST
420  * __PLK_TEXT_EXEC
421  * __TEXT
422  * __DATA_CONST
423  * __TEXT_EXEC
424  * __KLD
425  * __LAST            <--- Last KTRR (ReadOnly) segment
426  * __DATA
427  * __BOOTDATA (if present)
428  * __LINKEDIT
429  * __PRELINK_DATA (expected populated now)
430  * __PLK_LINKEDIT
431  * __PRELINK_INFO
432  *
433  */
434 
435 vm_offset_t mem_size;                             /* Size of actual physical memory present
436                                                    * minus any performance buffer and possibly
437                                                    * limited by mem_limit in bytes */
438 uint64_t    mem_actual;                           /* The "One True" physical memory size
439                                                    * actually, it's the highest physical
440                                                    * address + 1 */
441 uint64_t    max_mem;                              /* Size of physical memory (bytes), adjusted
442                                                    * by maxmem */
443 uint64_t    max_mem_actual;                       /* Actual size of physical memory (bytes),
444                                                    * adjusted by the maxmem boot-arg */
445 uint64_t    sane_size;                            /* Memory size to use for defaults
446                                                    * calculations */
447 /* This no longer appears to be used; kill it? */
448 addr64_t    vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
449                                                    * virtual address known
450                                                    * to the VM system */
451 
452 SECURITY_READ_ONLY_LATE(vm_offset_t)              segEXTRADATA;
453 SECURITY_READ_ONLY_LATE(unsigned long)            segSizeEXTRADATA;
454 
455 /* Trust cache portion of EXTRADATA (if within it) */
456 SECURITY_READ_ONLY_LATE(vm_offset_t)              segTRUSTCACHE;
457 SECURITY_READ_ONLY_LATE(unsigned long)            segSizeTRUSTCACHE;
458 
459 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTTEXT;
460 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWEST;
461 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTRO;
462 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTRO;
463 
464 /* Only set when booted from MH_FILESET kernel collections */
465 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTKC;
466 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTKC;
467 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTROKC;
468 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTROKC;
469 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTAuxKC;
470 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTAuxKC;
471 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTROAuxKC;
472 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTROAuxKC;
473 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTRXAuxKC;
474 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTRXAuxKC;
475 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTNLEAuxKC;
476 
477 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segTEXTB;
478 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
479 
480 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segDATACONSTB;
481 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
482 
483 SECURITY_READ_ONLY_LATE(vm_offset_t)   segTEXTEXECB;
484 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
485 
486 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segDATAB;
487 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
488 
489 SECURITY_READ_ONLY_LATE(vm_offset_t)          segBOOTDATAB;
490 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeBOOTDATA;
491 extern vm_offset_t                            intstack_low_guard;
492 extern vm_offset_t                            intstack_high_guard;
493 extern vm_offset_t                            excepstack_high_guard;
494 
495 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLINKB;
496 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
497 
498 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKLDB;
499 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeKLD;
500 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKLDDATAB;
501 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
502 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLASTB;
503 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeLAST;
504 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLASTDATACONSTB;
505 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeLASTDATACONST;
506 
507 SECURITY_READ_ONLY_LATE(vm_offset_t)          sectHIBTEXTB;
508 SECURITY_READ_ONLY_LATE(unsigned long)        sectSizeHIBTEXT;
509 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIBDATAB;
510 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeHIBDATA;
511 SECURITY_READ_ONLY_LATE(vm_offset_t)          sectHIBDATACONSTB;
512 SECURITY_READ_ONLY_LATE(unsigned long)        sectSizeHIBDATACONST;
513 
514 SECURITY_READ_ONLY_LATE(vm_offset_t)          segPRELINKTEXTB;
515 SECURITY_READ_ONLY_LATE(unsigned long)        segSizePRELINKTEXT;
516 
517 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKTEXTEXECB;
518 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
519 
520 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKDATACONSTB;
521 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
522 
523 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPRELINKDATAB;
524 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
525 
526 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKLLVMCOVB = 0;
527 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
528 
529 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKLINKEDITB;
530 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
531 
532 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPRELINKINFOB;
533 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
534 
535 /* Only set when booted from MH_FILESET primary kernel collection */
536 SECURITY_READ_ONLY_LATE(vm_offset_t)          segKCTEXTEXECB;
537 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeKCTEXTEXEC;
538 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKCDATACONSTB;
539 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
540 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKCDATAB;
541 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
542 
543 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
544 
545 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
546 
547 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
548 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
549 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
550 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
551 
552 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base, auxkc_right_above;
553 
554 pmap_paddr_t alloc_ptpage(sptm_pt_level_t level, bool map_static);
555 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
556 extern int dtrace_keep_kernel_symbols(void);
557 
558 /*
559  * Bootstrap the system enough to run with virtual memory.
560  * Map the kernel's code and data, and allocate the system page table.
561  * Page_size must already be set.
562  *
563  * Parameters:
564  * first_avail: first available physical page -
565  *              after kernel page tables
566  * avail_start: PA of first physical page
567  * avail_end:   PA of last physical page
568  */
569 SECURITY_READ_ONLY_LATE(vm_offset_t)     first_avail;
570 SECURITY_READ_ONLY_LATE(vm_offset_t)     static_memory_end;
571 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    avail_start;
572 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    avail_end;
573 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    real_avail_end;
574 SECURITY_READ_ONLY_LATE(unsigned long)   real_phys_size;
575 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
576 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
577 
578 typedef struct {
579 	pmap_paddr_t pa;
580 	vm_map_address_t va;
581 	vm_size_t len;
582 } ptov_table_entry;
583 
584 SECURITY_READ_ONLY_LATE(static boolean_t)               kva_active = FALSE;
585 
586 
587 /**
588  * sptm_supports_local_coredump is set in start_sptm.s when SPTM dispatch logic
589  * calls into XNU to handle a panic from SPTM/TXM/cL4. If this variable is set
590  * to false then osfmk/kern/debug.c:debugger_collect_diagnostic() will skip
591  * taking a local core dump. This defaults to true since as long as the panic
592  * doesn't occur within the SPTM, then the SPTM will support making calls during
593  * the panic path to save the coredump. Only when the panic occurs from within
594  * guarded mode do we let SPTM decide whether it supports local coredumps.
595  */
596 bool sptm_supports_local_coredump = true;
597 
598 /**
599  * Entry point for systems that support an SPTM. Bootstrap stacks
600  * have been set up by the SPTM by this point, and XNU is responsible
601  * for rebasing and signing absolute addresses.
602  */
603 void
arm_init(boot_args * args,sptm_bootstrap_args_xnu_t * sptm_boot_args)604 arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_boot_args)
605 {
606 	unsigned int maxmem;
607 	uint32_t memsize;
608 	uint64_t xmaxmem;
609 	thread_t thread;
610 
611 	extern void xnu_return_to_gl2(void);
612 	const sptm_vaddr_t handler_addr = (sptm_vaddr_t) ptrauth_strip((void *)xnu_return_to_gl2, ptrauth_key_function_pointer);
613 	sptm_register_xnu_exc_return(handler_addr);
614 
615 #if defined(HAS_APPLE_PAC)
616 	kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
617 	OSRuntimeSignStructorsInFileset(kc_mh);
618 #endif /* defined(HAS_APPLE_PAC) */
619 
620 	/* If kernel integrity is supported, use a constant copy of the boot args. */
621 	const_boot_args = *args;
622 	BootArgs = args = &const_boot_args;
623 	const_sptm_args = *sptm_boot_args;
624 	SPTMArgs = sptm_boot_args = &const_sptm_args;
625 	sptm_xnu_triggered_panic_ptr = sptm_boot_args->xnu_triggered_panic;
626 	/*
627 	 * Initialize first_avail_phys from what the SPTM tells us.
628 	 * We're not using iBoot's topOfKernelData, as SPTM and other
629 	 * components have consumed pages themselves.
630 	 */
631 	first_avail_phys = sptm_boot_args->first_avail_phys;
632 
633 	cpu_data_init(&BootCpuData);
634 #if defined(HAS_APPLE_PAC)
635 	/* bootstrap cpu process dependent key for kernel has been loaded by start.s */
636 	BootCpuData.rop_key = ml_default_rop_pid();
637 	BootCpuData.jop_key = ml_default_jop_pid();
638 #endif /* defined(HAS_APPLE_PAC) */
639 
640 	PE_init_platform(FALSE, args); /* Get platform expert set up */
641 
642 	/* Initialize SPTM helper library. */
643 	libsptm_init(&const_sptm_args.libsptm_state);
644 
645 #if __arm64__
646 	configure_timer_apple_regs();
647 	wfe_timeout_configure();
648 	wfe_timeout_init();
649 
650 	configure_misc_apple_boot_args();
651 	configure_misc_apple_regs(true);
652 
653 
654 	{
655 		/*
656 		 * Select the advertised kernel page size.
657 		 */
658 		if (args->memSize > 1ULL * 1024 * 1024 * 1024) {
659 			/*
660 			 * arm64 device with > 1GB of RAM:
661 			 * kernel uses 16KB pages.
662 			 */
663 			PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
664 		} else {
665 			/*
666 			 * arm64 device with <= 1GB of RAM:
667 			 * kernel uses hardware page size
668 			 * (4KB for H6/H7, 16KB for H8+).
669 			 */
670 			PAGE_SHIFT_CONST = ARM_PGSHIFT;
671 		}
672 
673 		/* 32-bit apps always see 16KB page size */
674 		page_shift_user32 = PAGE_MAX_SHIFT;
675 #ifdef  APPLETYPHOON
676 		if (PE_parse_boot_argn("cpus_defeatures", &cpus_defeatures, sizeof(cpus_defeatures))) {
677 			if ((cpus_defeatures & 0xF) != 0) {
678 				cpu_defeatures_set(cpus_defeatures & 0xF);
679 			}
680 		}
681 #endif
682 	}
683 #endif
684 
685 	/* Enable SPTM stability hacks if requested */
686 	PE_parse_boot_argn("sptm_stability_hacks", &sptm_stability_hacks, sizeof(sptm_stability_hacks));
687 
688 	ml_parse_cpu_topology();
689 
690 
691 	master_cpu = ml_get_boot_cpu_number();
692 	assert(master_cpu >= 0 && master_cpu <= ml_get_max_cpu_number());
693 
694 	BootCpuData.cpu_number = (unsigned short)master_cpu;
695 	BootCpuData.intstack_top = (vm_offset_t) &intstack_top;
696 	BootCpuData.istackptr = &intstack_top;
697 	BootCpuData.excepstack_top = (vm_offset_t) &excepstack_top;
698 	CpuDataEntries[master_cpu].cpu_data_vaddr = &BootCpuData;
699 	CpuDataEntries[master_cpu].cpu_data_paddr = (void *)((uintptr_t)(args->physBase)
700 	    + ((uintptr_t)&BootCpuData
701 	    - (uintptr_t)(args->virtBase)));
702 
703 	thread = thread_bootstrap();
704 	thread->machine.CpuDatap = &BootCpuData;
705 	thread->machine.pcpu_data_base = (vm_offset_t)0;
706 	machine_set_current_thread(thread);
707 
708 	/*
709 	 * Preemption is enabled for this thread so that it can lock mutexes without
710 	 * tripping the preemption check. In reality scheduling is not enabled until
711 	 * this thread completes, and there are no other threads to switch to, so
712 	 * preemption level is not really meaningful for the bootstrap thread.
713 	 */
714 	thread->machine.preemption_count = 0;
715 	cpu_bootstrap();
716 
717 	rtclock_early_init();
718 
719 	kernel_debug_string_early("kernel_startup_bootstrap");
720 	kernel_startup_bootstrap();
721 
722 	/*
723 	 * Initialize the timer callout world
724 	 */
725 	timer_call_init();
726 
727 	cpu_init();
728 
729 	processor_bootstrap();
730 
731 	if (PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) {
732 		xmaxmem = (uint64_t) maxmem * (1024 * 1024);
733 	} else if (PE_get_default("hw.memsize", &memsize, sizeof(memsize))) {
734 		xmaxmem = (uint64_t) memsize;
735 	} else {
736 		xmaxmem = 0;
737 	}
738 
739 #if SCHED_HYGIENE_DEBUG
740 	{
741 		int wdt_boot_arg = 0;
742 		bool const wdt_disabled = (PE_parse_boot_argn("wdt", &wdt_boot_arg, sizeof(wdt_boot_arg)) && (wdt_boot_arg == -1));
743 
744 		/* Disable if WDT is disabled */
745 		if (wdt_disabled || kern_feature_override(KF_INTERRUPT_MASKED_DEBUG_OVRD)) {
746 			interrupt_masked_debug_mode = SCHED_HYGIENE_MODE_OFF;
747 		} else if (kern_feature_override(KF_SCHED_HYGIENE_DEBUG_PMC_OVRD)) {
748 			/*
749 			 * The sched hygiene facility can, in adition to checking time, capture
750 			 * metrics provided by the cycle and instruction counters available in some
751 			 * systems. Check if we should enable this feature based on the validation
752 			 * overrides.
753 			 */
754 			sched_hygiene_debug_pmc = 0;
755 		}
756 
757 		if (wdt_disabled || kern_feature_override(KF_PREEMPTION_DISABLED_DEBUG_OVRD)) {
758 			sched_preemption_disable_debug_mode = SCHED_HYGIENE_MODE_OFF;
759 		}
760 	}
761 #endif /* SCHED_HYGIENE_DEBUG */
762 
763 	nanoseconds_to_absolutetime(XCALL_ACK_TIMEOUT_NS, &xcall_ack_timeout_abstime);
764 
765 #if HAS_BP_RET
766 	PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
767 	set_bp_ret(); // Apply branch predictor retention settings to boot CPU
768 #endif
769 
770 	PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt));
771 
772 #if __ARM_PAN_AVAILABLE__
773 	__builtin_arm_wsr("pan", 1);
774 #endif  /* __ARM_PAN_AVAILABLE__ */
775 
776 	arm_vm_init(xmaxmem, args);
777 
778 	if (debug_boot_arg) {
779 		patch_low_glo();
780 	}
781 
782 #if __arm64__ && WITH_CLASSIC_S2R
783 	sleep_token_buffer_init();
784 #endif
785 
786 	PE_consistent_debug_inherit();
787 
788 	/* Setup debugging output. */
789 	const unsigned int serial_exists = serial_init();
790 	kernel_startup_initialize_upto(STARTUP_SUB_KPRINTF);
791 	kprintf("kprintf initialized\n");
792 
793 	serialmode = 0;
794 	if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
795 		/* Do we want a serial keyboard and/or console? */
796 		kprintf("Serial mode specified: %08X\n", serialmode);
797 		disable_iolog_serial_output = (serialmode & SERIALMODE_NO_IOLOG) != 0;
798 		enable_dklog_serial_output = (serialmode & SERIALMODE_DKLOG) != 0;
799 		int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
800 		if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
801 			if (force_sync) {
802 				serialmode |= SERIALMODE_SYNCDRAIN;
803 				kprintf(
804 					"WARNING: Forcing uart driver to output synchronously."
805 					"printf()s/IOLogs will impact kernel performance.\n"
806 					"You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
807 			}
808 		}
809 	}
810 	if (kern_feature_override(KF_SERIAL_OVRD)) {
811 		serialmode = 0;
812 	}
813 
814 	/* Start serial if requested and a serial device was enumerated in serial_init(). */
815 	if ((serialmode & SERIALMODE_OUTPUT) && serial_exists) {
816 		serial_console_enabled = true;
817 		(void)switch_to_serial_console(); /* Switch into serial mode from video console */
818 		disableConsoleOutput = FALSE;     /* Allow printfs to happen */
819 	}
820 	PE_create_console();
821 
822 	/* setup console output */
823 	PE_init_printf(FALSE);
824 
825 #if __arm64__
826 #if DEBUG
827 	dump_kva_space();
828 #endif
829 #endif
830 
831 	cpu_machine_idle_init(TRUE);
832 
833 	PE_init_platform(TRUE, &BootCpuData);
834 
835 	/* Validate SPTM variant. */
836 	__typeof__(const_sptm_args.sptm_variant) expected_sptm_variant;
837 #if DEVELOPMENT || DEBUG
838 	expected_sptm_variant = SPTM_VARIANT_DEVELOPMENT;
839 #else /* RELEASE */
840 	expected_sptm_variant = SPTM_VARIANT_RELEASE;
841 #endif /* RELEASE */
842 	if (const_sptm_args.sptm_variant != expected_sptm_variant) {
843 		panic("arm_init: Mismatch between xnu variant (%s) and SPTM variant (0x%x)",
844 		    osbuild_config, const_sptm_args.sptm_variant);
845 	}
846 
847 #if __arm64__
848 	extern bool cpu_config_correct;
849 	if (!cpu_config_correct) {
850 		panic("The cpumask=N boot arg cannot be used together with cpus=N, and the boot CPU must be enabled");
851 	}
852 
853 	ml_map_cpu_pio();
854 #endif
855 
856 	cpu_timebase_init(TRUE);
857 
858 #if KPERF
859 	/* kptimer_curcpu_up() must be called after cpu_timebase_init */
860 	kptimer_curcpu_up();
861 #endif /* KPERF */
862 
863 	PE_init_cpu();
864 	fiq_context_init(TRUE);
865 
866 
867 #if HIBERNATION
868 	pal_hib_init();
869 #endif /* HIBERNATION */
870 
871 	/*
872 	 * gPhysBase/Size only represent kernel-managed memory. These globals represent
873 	 * the actual DRAM base address and size as reported by iBoot through the
874 	 * device tree.
875 	 */
876 	DTEntry chosen;
877 	unsigned int dt_entry_size;
878 	unsigned long const *dram_base;
879 	unsigned long const *dram_size;
880 	if (SecureDTLookupEntry(NULL, "/chosen", &chosen) != kSuccess) {
881 		panic("%s: Unable to find 'chosen' DT node", __FUNCTION__);
882 	}
883 
884 	if (SecureDTGetProperty(chosen, "dram-base", (void const **)&dram_base, &dt_entry_size) != kSuccess) {
885 		panic("%s: Unable to find 'dram-base' entry in the 'chosen' DT node", __FUNCTION__);
886 	}
887 
888 	if (SecureDTGetProperty(chosen, "dram-size", (void const **)&dram_size, &dt_entry_size) != kSuccess) {
889 		panic("%s: Unable to find 'dram-size' entry in the 'chosen' DT node", __FUNCTION__);
890 	}
891 
892 	gDramBase = *dram_base;
893 	gDramSize = *dram_size;
894 
895 	/*
896 	 * Initialize the stack protector for all future calls
897 	 * to C code. Since kernel_bootstrap() eventually
898 	 * switches stack context without returning through this
899 	 * function, we do not risk failing the check even though
900 	 * we mutate the guard word during execution.
901 	 */
902 	__stack_chk_guard = (unsigned long)early_random();
903 	/* Zero a byte of the protector to guard
904 	 * against string vulnerabilities
905 	 */
906 	__stack_chk_guard &= ~(0xFFULL << 8);
907 	machine_startup(args);
908 }
909 
910 /*
911  * Routine:        arm_init_cpu
912  * Function:
913  *    Runs on S2R resume (all CPUs) and SMP boot (non-boot CPUs only).
914  */
915 
916 void
arm_init_cpu(cpu_data_t * cpu_data_ptr)917 arm_init_cpu(
918 	cpu_data_t      *cpu_data_ptr)
919 {
920 #if __ARM_PAN_AVAILABLE__
921 	__builtin_arm_wsr("pan", 1);
922 #endif
923 
924 #ifdef __arm64__
925 	configure_timer_apple_regs();
926 	configure_misc_apple_regs(false);
927 #endif
928 
929 	cpu_data_ptr->cpu_flags &= ~SleepState;
930 
931 
932 	machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
933 
934 #if HIBERNATION
935 	if ((cpu_data_ptr == &BootCpuData) && (gIOHibernateState == kIOHibernateStateWakingFromHibernate)) {
936 		// the "normal" S2R code captures wake_abstime too early, so on a hibernation resume we fix it up here
937 		extern uint64_t wake_abstime;
938 		wake_abstime = gIOHibernateCurrentHeader->lastHibAbsTime;
939 
940 		// since the hw clock stops ticking across hibernation, we need to apply an offset;
941 		// iBoot computes this offset for us and passes it via the hibernation header
942 		extern uint64_t hwclock_conttime_offset;
943 		hwclock_conttime_offset = gIOHibernateCurrentHeader->hwClockOffset;
944 
945 		// during hibernation, we captured the idle thread's state from inside the PPL context, so we have to
946 		// fix up its preemption count
947 		unsigned int expected_preemption_count = (gEnforceQuiesceSafety ? 2 : 1);
948 		if (get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread) !=
949 		    expected_preemption_count) {
950 			panic("unexpected preemption count %u on boot cpu thread (should be %u)",
951 			    get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread),
952 			    expected_preemption_count);
953 		}
954 		cpu_data_ptr->cpu_active_thread->machine.preemption_count--;
955 	}
956 #endif /* HIBERNATION */
957 
958 #if __arm64__
959 	wfe_timeout_init();
960 	flush_mmu_tlb();
961 #endif
962 
963 	cpu_machine_idle_init(FALSE);
964 
965 	cpu_init();
966 
967 #ifdef  APPLETYPHOON
968 	if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
969 		cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
970 	}
971 #endif
972 	/* Initialize the timebase before serial_init, as some serial
973 	 * drivers use mach_absolute_time() to implement rate control
974 	 */
975 	cpu_timebase_init(FALSE);
976 
977 #if KPERF
978 	/* kptimer_curcpu_up() must be called after cpu_timebase_init */
979 	kptimer_curcpu_up();
980 #endif /* KPERF */
981 
982 	if (cpu_data_ptr == &BootCpuData) {
983 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
984 		/*
985 		 * Prevent CPUs from going into deep sleep until all
986 		 * CPUs are ready to do so.
987 		 */
988 		arm64_stall_sleep = TRUE;
989 #endif
990 		serial_init();
991 		PE_init_platform(TRUE, NULL);
992 		commpage_update_timebase();
993 
994 		exclaves_update_timebase(EXCLAVES_CLOCK_ABSOLUTE,
995 		    rtclock_base_abstime);
996 #if HIBERNATION
997 		if (gIOHibernateState == kIOHibernateStateWakingFromHibernate) {
998 			exclaves_update_timebase(EXCLAVES_CLOCK_CONTINUOUS,
999 			    hwclock_conttime_offset);
1000 		}
1001 #endif /* HIBERNATION */
1002 	}
1003 	PE_init_cpu();
1004 
1005 	fiq_context_init(TRUE);
1006 	cpu_data_ptr->rtcPop = EndOfAllTime;
1007 	timer_resync_deadlines();
1008 
1009 #if DEVELOPMENT || DEBUG
1010 	PE_arm_debug_enable_trace(true);
1011 #endif /* DEVELOPMENT || DEBUG */
1012 
1013 	kprintf("arm_cpu_init(): cpu %d online\n", cpu_data_ptr->cpu_number);
1014 
1015 	if (cpu_data_ptr == &BootCpuData) {
1016 		if (kdebug_enable == 0) {
1017 			__kdebug_only uint64_t elapsed = kdebug_wake();
1018 			KDBG(IOKDBG_CODE(DBG_HIBERNATE, 15), mach_absolute_time() - elapsed);
1019 		}
1020 
1021 #if CONFIG_TELEMETRY
1022 		bootprofile_wake_from_sleep();
1023 #endif /* CONFIG_TELEMETRY */
1024 	}
1025 #if CONFIG_CPU_COUNTERS
1026 	mt_wake_per_core();
1027 #endif /* CONFIG_CPU_COUNTERS */
1028 
1029 #if defined(KERNEL_INTEGRITY_CTRR)
1030 	if (ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] != CTRR_LOCKED) {
1031 		lck_spin_lock(&ctrr_cpu_start_lck);
1032 		ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] = CTRR_LOCKED;
1033 		thread_wakeup(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id]);
1034 		lck_spin_unlock(&ctrr_cpu_start_lck);
1035 	}
1036 #endif
1037 
1038 	slave_main(NULL);
1039 }
1040 
1041 /*
1042  * Routine:		arm_init_idle_cpu
1043  * Function:	Resume from non-retention WFI.  Called from the reset vector.
1044  */
1045 void __attribute__((noreturn))
arm_init_idle_cpu(cpu_data_t * cpu_data_ptr)1046 arm_init_idle_cpu(
1047 	cpu_data_t      *cpu_data_ptr)
1048 {
1049 #if __ARM_PAN_AVAILABLE__
1050 	__builtin_arm_wsr("pan", 1);
1051 #endif
1052 
1053 	machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
1054 
1055 #if __arm64__
1056 	wfe_timeout_init();
1057 	/* Enable asynchronous exceptions */
1058 	__builtin_arm_wsr("DAIFClr", DAIFSC_ASYNCF);
1059 #endif
1060 
1061 #ifdef  APPLETYPHOON
1062 	if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
1063 		cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
1064 	}
1065 #endif
1066 
1067 	/*
1068 	 * Update the active debug object to reflect that debug registers have been reset.
1069 	 * This will force any thread with active debug state to resync the debug registers
1070 	 * if it returns to userspace on this CPU.
1071 	 */
1072 	if (cpu_data_ptr->cpu_user_debug != NULL) {
1073 		arm_debug_set(NULL);
1074 	}
1075 
1076 	fiq_context_init(FALSE);
1077 
1078 	cpu_idle_exit(TRUE);
1079 }
1080 
1081 vm_map_address_t
phystokv(pmap_paddr_t pa)1082 phystokv(pmap_paddr_t pa)
1083 {
1084 	sptm_papt_t va;
1085 	if (sptm_phystokv(pa, &va) != LIBSPTM_SUCCESS) {
1086 		return 0;
1087 	}
1088 	return (vm_map_address_t)va;
1089 }
1090 
1091 vm_map_address_t
phystokv_range(pmap_paddr_t pa,vm_size_t * max_len)1092 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
1093 {
1094 
1095 	vm_size_t len;
1096 
1097 	len = PAGE_SIZE - (pa & PAGE_MASK);
1098 	if (*max_len > len) {
1099 		*max_len = len;
1100 	}
1101 
1102 	return phystokv((sptm_paddr_t)pa);
1103 }
1104 
1105 vm_offset_t
ml_static_vtop(vm_offset_t va)1106 ml_static_vtop(vm_offset_t va)
1107 {
1108 	return (vm_offset_t)kvtophys_nofail((sptm_papt_t)va);
1109 }
1110 
1111 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
1112 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
1113 
1114 // Populate seg...AuxKC and fixup AuxKC permissions
1115 static bool
arm_vm_auxkc_init(void)1116 arm_vm_auxkc_init(void)
1117 {
1118 	if (auxkc_mh == 0 || auxkc_base == 0) {
1119 		return false; // no auxKC.
1120 	}
1121 
1122 	/* Fixup AuxKC and populate seg*AuxKC globals used below */
1123 	arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1124 
1125 	if (segLOWESTAuxKC != segLOWEST) {
1126 		panic("segLOWESTAuxKC (%p) not equal to segLOWEST (%p). auxkc_mh: %p, auxkc_base: %p",
1127 		    (void*)segLOWESTAuxKC, (void*)segLOWEST,
1128 		    (void*)auxkc_mh, (void*)auxkc_base);
1129 	}
1130 
1131 	/*
1132 	 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1133 	 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1134 	 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1135 	 * its RO region before extending it.
1136 	 */
1137 	assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1138 	assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1139 	if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1140 		segHIGHESTROAuxKC = segHIGHESTAuxKC;
1141 	}
1142 
1143 	/*
1144 	 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1145 	 * by CTRR, and the AuxKC RX region must be within the RO region.
1146 	 */
1147 	assert(segHIGHESTROAuxKC == auxkc_right_above);
1148 	assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1149 	assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1150 	assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1151 	assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1152 
1153 	return true;
1154 }
1155 
1156 /*
1157  * Looks up the set of properties that describe the physical load addresses and sizes of the boot
1158  * kernelcache's loaded segments in the device tree and returns (1) the number of segments found
1159  * in *arm_vm_kernelcache_numrangesp and (2) their starting/ending addresses as an array of type
1160  * arm_physrange_t in *arm_vm_kernelcache_rangesp.
1161  * The function returns the total number of pages across all loaded boot kernelcache segments.
1162  * If there is a problem looking up the /chosen/memory-map node in the DT, all arguments are
1163  * zeroed and the function returns 0.
1164  */
1165 static unsigned int
arm_get_bootkc_ranges_from_DT(const arm_physrange_t ** arm_vm_kernelcache_rangesp,int * arm_vm_kernelcache_numrangesp)1166 arm_get_bootkc_ranges_from_DT(const arm_physrange_t **arm_vm_kernelcache_rangesp, int *arm_vm_kernelcache_numrangesp)
1167 {
1168 	DTEntry memory_map;
1169 	int err;
1170 	DTMemoryMapRange const *range;
1171 	unsigned int rangeSize;
1172 #define NUM_BOOTKC_RANGES 5
1173 	static arm_physrange_t bootkc_physranges[NUM_BOOTKC_RANGES] = { {0, } };
1174 	static int bootkc_numranges = 0;
1175 	static unsigned int bootkc_total_pages = 0;
1176 
1177 	assert(arm_vm_kernelcache_rangesp != NULL);
1178 	assert(arm_vm_kernelcache_numrangesp != NULL);
1179 
1180 	/* return cached values if previously computed */
1181 	if (bootkc_numranges == 0) {
1182 		err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1183 		if (err != kSuccess) {
1184 			*arm_vm_kernelcache_numrangesp = 0;
1185 			*arm_vm_kernelcache_rangesp = NULL;
1186 			return 0;
1187 		}
1188 
1189 		/* We're looking for 5 ranges: BootKC-ro, BootKC-rx, BootKC-bx, BootKC-rw, and BootKC-le */
1190 		const char *BootKC_Properties[NUM_BOOTKC_RANGES] = {
1191 			"BootKC-ro", "BootKC-rx", "BootKC-bx", "BootKC-rw", "BootKC-le"
1192 		};
1193 
1194 		for (int i = 0; i < NUM_BOOTKC_RANGES; i++) {
1195 			err = SecureDTGetProperty(memory_map, BootKC_Properties[i], (void const **)&range, &rangeSize);
1196 			if (err == kSuccess && rangeSize == sizeof(DTMemoryMapRange)) {
1197 				bootkc_physranges[i].start_phys = range->paddr;
1198 				bootkc_physranges[i].end_phys = range->paddr + range->length;
1199 				assert((bootkc_physranges[i].end_phys & PAGE_MASK) == 0);
1200 				bootkc_numranges++;
1201 				bootkc_total_pages += (unsigned int) atop_64(bootkc_physranges[i].end_phys - bootkc_physranges[i].start_phys);
1202 			}
1203 		}
1204 	}
1205 
1206 	*arm_vm_kernelcache_numrangesp = bootkc_numranges;
1207 	*arm_vm_kernelcache_rangesp = &bootkc_physranges[0];
1208 	return bootkc_total_pages;
1209 }
1210 
1211 void
arm_vm_prot_init(__unused boot_args * args)1212 arm_vm_prot_init(__unused boot_args * args)
1213 {
1214 	segLOWESTTEXT = UINT64_MAX;
1215 	if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1216 		segLOWESTTEXT = segPRELINKTEXTB;
1217 	}
1218 	assert(segSizeTEXT);
1219 	if (segTEXTB < segLOWESTTEXT) {
1220 		segLOWESTTEXT = segTEXTB;
1221 	}
1222 	assert(segLOWESTTEXT < UINT64_MAX);
1223 
1224 	segEXTRADATA = 0;
1225 	segSizeEXTRADATA = 0;
1226 	segTRUSTCACHE = 0;
1227 	segSizeTRUSTCACHE = 0;
1228 
1229 	segLOWEST = segLOWESTTEXT;
1230 	segLOWESTRO = segLOWESTTEXT;
1231 
1232 	if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1233 		/*
1234 		 * kernel collections have segments below the kernel. In particular the collection mach header
1235 		 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1236 		 */
1237 		segLOWEST = segLOWESTKC;
1238 		if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1239 			segLOWESTRO = segLOWESTROKC;
1240 		}
1241 		if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1242 			segHIGHESTRO = segHIGHESTROKC;
1243 		}
1244 	}
1245 
1246 	DTEntry memory_map;
1247 	int err;
1248 
1249 	// Device Tree portion of EXTRADATA
1250 	if (SecureDTIsLockedDown()) {
1251 		segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1252 		segSizeEXTRADATA = PE_state.deviceTreeSize;
1253 	}
1254 
1255 	// Trust Caches portion of EXTRADATA
1256 	{
1257 		DTMemoryMapRange const *trustCacheRange;
1258 		unsigned int trustCacheRangeSize;
1259 
1260 		err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1261 		assert(err == kSuccess);
1262 
1263 		err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1264 		if (err == kSuccess) {
1265 			if (trustCacheRangeSize != sizeof(DTMemoryMapRange)) {
1266 				panic("Unexpected /chosen/memory-map/TrustCache property size %u != %zu", trustCacheRangeSize, sizeof(DTMemoryMapRange));
1267 			}
1268 
1269 			vm_offset_t const trustCacheRegion = phystokv(trustCacheRange->paddr);
1270 			if (trustCacheRegion < segLOWEST) {
1271 				if (segEXTRADATA != 0) {
1272 					if (trustCacheRegion != segEXTRADATA + segSizeEXTRADATA) {
1273 						panic("Unexpected location of TrustCache region: %#lx != %#lx",
1274 						    trustCacheRegion, segEXTRADATA + segSizeEXTRADATA);
1275 					}
1276 					segSizeEXTRADATA += trustCacheRange->length;
1277 				} else {
1278 					// Not all devices support CTRR device trees.
1279 					segEXTRADATA = trustCacheRegion;
1280 					segSizeEXTRADATA = trustCacheRange->length;
1281 				}
1282 			}
1283 			segTRUSTCACHE = trustCacheRegion;
1284 			segSizeTRUSTCACHE = trustCacheRange->length;
1285 		}
1286 	}
1287 
1288 	if (segSizeEXTRADATA != 0) {
1289 		if (segEXTRADATA <= segLOWEST) {
1290 			segLOWEST = segEXTRADATA;
1291 			if (segEXTRADATA <= segLOWESTRO) {
1292 				segLOWESTRO = segEXTRADATA;
1293 			}
1294 		} else {
1295 			panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1296 		}
1297 	}
1298 
1299 	const DTMemoryMapRange *auxKC_range, *auxKC_header_range;
1300 	unsigned int auxKC_range_size, auxKC_header_range_size;
1301 
1302 	err = SecureDTGetProperty(memory_map, "AuxKC", (const void**)&auxKC_range,
1303 	    &auxKC_range_size);
1304 	if (err != kSuccess) {
1305 		goto noAuxKC;
1306 	}
1307 	assert(auxKC_range_size == sizeof(DTMemoryMapRange));
1308 	err = SecureDTGetProperty(memory_map, "AuxKC-mach_header",
1309 	    (const void**)&auxKC_header_range, &auxKC_header_range_size);
1310 	if (err != kSuccess) {
1311 		goto noAuxKC;
1312 	}
1313 	assert(auxKC_header_range_size == sizeof(DTMemoryMapRange));
1314 
1315 	if (auxKC_header_range->paddr == 0 || auxKC_range->paddr == 0) {
1316 		goto noAuxKC;
1317 	}
1318 
1319 	auxkc_mh = phystokv(auxKC_header_range->paddr);
1320 	auxkc_base = phystokv(auxKC_range->paddr);
1321 
1322 	if (auxkc_base < segLOWEST) {
1323 		auxkc_right_above = segLOWEST;
1324 		segLOWEST = auxkc_base;
1325 	} else {
1326 		panic("auxkc_base (%p) not below segLOWEST (%p)", (void*)auxkc_base, (void*)segLOWEST);
1327 	}
1328 
1329 noAuxKC:
1330 	/* Record the bounds of the kernelcache. */
1331 	vm_kernelcache_base = segLOWEST;
1332 	vm_kernelcache_top = end_kern;
1333 }
1334 
1335 /*
1336  * return < 0 for a < b
1337  *          0 for a == b
1338  *        > 0 for a > b
1339  */
1340 typedef int (*cmpfunc_t)(const void *a, const void *b);
1341 
1342 extern void
1343 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1344 
1345 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1346 
1347 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1348 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1349 
1350 void
arm_vm_prot_finalize(boot_args * args __unused)1351 arm_vm_prot_finalize(boot_args * args __unused)
1352 {
1353 	/*
1354 	 * At this point, we are far enough along in the boot process that it will be
1355 	 * safe to free up all of the memory preceeding the kernel.  It may in fact
1356 	 * be safe to do this earlier.
1357 	 *
1358 	 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1359 	 * as usable.
1360 	 */
1361 
1362 	/* Slide KLDDATA */
1363 	sptm_slide_region(segKLDDATAB, (unsigned int)(segSizeKLDDATA >> PAGE_SHIFT));
1364 
1365 	/*
1366 	 * Replace the boot CPU's stacks with properly-guarded dynamically allocated stacks.
1367 	 * This must happen prior to sliding segBOOTDATAB, which will effectively remove
1368 	 * the existing boot stacks.
1369 	 */
1370 	cpu_stack_alloc(&BootCpuData);
1371 	arm64_replace_bootstack(&BootCpuData);
1372 
1373 	/* Slide early-boot data */
1374 	sptm_slide_region(segBOOTDATAB, (unsigned int)(segSizeBOOTDATA >> PAGE_SHIFT));
1375 
1376 	/* Slide linkedit, unless otherwise requested */
1377 	bool keep_linkedit = false;
1378 	PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1379 #if CONFIG_DTRACE
1380 	if (dtrace_keep_kernel_symbols()) {
1381 		keep_linkedit = true;
1382 	}
1383 #endif /* CONFIG_DTRACE */
1384 #if KASAN_DYNAMIC_BLACKLIST
1385 	/* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime.  As such, the
1386 	 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1387 	keep_linkedit = true;
1388 #endif
1389 
1390 	if (!keep_linkedit) {
1391 		sptm_slide_region(segLINKB, (unsigned int)(segSizeLINK >> PAGE_SHIFT));
1392 		if (segSizePLKLINKEDIT) {
1393 			/* Prelinked kernel LINKEDIT */
1394 			sptm_slide_region(segPLKLINKEDITB, (unsigned int)(segSizePLKLINKEDIT >> PAGE_SHIFT));
1395 		}
1396 	}
1397 
1398 	/* Slide prelinked kernel plists */
1399 	sptm_slide_region(segPRELINKINFOB, (unsigned int)(segSizePRELINKINFO >> PAGE_SHIFT));
1400 
1401 	/*
1402 	 * Free the portion of memory that precedes the first usable region, known
1403 	 * as the physical slide.
1404 	 */
1405 	ml_static_mfree(SPTMArgs->phys_slide_papt, SPTMArgs->phys_slide_size);
1406 
1407 	/*
1408 	 * KTRR support means we will be mucking with these pages and trying to
1409 	 * protect them; we cannot free the pages to the VM if we do this.
1410 	 */
1411 	if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1412 		/* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1413 		ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1414 	}
1415 
1416 	ml_static_mfree(segBOOTDATAB, segSizeBOOTDATA);
1417 
1418 #if __ARM_KERNEL_PROTECT__
1419 	arm_vm_populate_kernel_el0_mappings();
1420 #endif /* __ARM_KERNEL_PROTECT__ */
1421 }
1422 
1423 /*
1424  * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1425  * address accesses. It can be enabled separately for TTBR0 (user) and
1426  * TTBR1 (kernel).
1427  */
1428 void
arm_set_kernel_tbi(void)1429 arm_set_kernel_tbi(void)
1430 {
1431 #if !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI
1432 	uint64_t old_tcr, new_tcr;
1433 
1434 	old_tcr = new_tcr = get_tcr();
1435 	/*
1436 	 * For kernel configurations that require TBI support on
1437 	 * PAC systems, we enable DATA TBI only.
1438 	 */
1439 	new_tcr |= TCR_TBI1_TOPBYTE_IGNORED;
1440 	new_tcr |= TCR_TBID1_ENABLE;
1441 
1442 	if (old_tcr != new_tcr) {
1443 		set_tcr(new_tcr);
1444 		sysreg_restore.tcr_el1 = new_tcr;
1445 	}
1446 #endif /* !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI */
1447 }
1448 
1449 /* allocate a page for a page table: we support static and dynamic mappings.
1450  *
1451  * returns a physical address for the allocated page
1452  *
1453  * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
1454  * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
1455  *
1456  * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
1457  */
1458 pmap_paddr_t
alloc_ptpage(sptm_pt_level_t level,bool map_static)1459 alloc_ptpage(sptm_pt_level_t level, bool map_static)
1460 {
1461 	pmap_paddr_t paddr = 0;
1462 
1463 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
1464 	map_static = FALSE;
1465 #endif
1466 
1467 	/* Set the next free ropage if this is the first call to this function */
1468 	if (!ropage_next) {
1469 		ropage_next = (vm_offset_t)&ropagetable_begin;
1470 	}
1471 
1472 	if (map_static) {
1473 		/* This is a RO allocation. Make sure we have room in the ropagetable area */
1474 		assert(ropage_next < (vm_offset_t)&ropagetable_end);
1475 
1476 		/* Obtain physical address and increment the index into the ropagetable area */
1477 		paddr = (pmap_paddr_t)kvtophys((sptm_papt_t)ropage_next);
1478 		ropage_next += ARM_PGBYTES;
1479 	} else {
1480 		/* This is a RW allocation. Simply grab a page from [avail_start] */
1481 		paddr = avail_start;
1482 		avail_start += ARM_PGBYTES;
1483 	}
1484 
1485 	/* Retype the page to XNU_PAGE_TABLE, with the desired level */
1486 	sptm_retype_params_t retype_params;
1487 	retype_params.level = level;
1488 	sptm_retype(paddr, XNU_DEFAULT, XNU_PAGE_TABLE, retype_params);
1489 
1490 	return paddr;
1491 }
1492 
1493 /**
1494  * Initialize a vm_image_offsets structure with information obtained from a
1495  * Mach-O header for the wanted image.
1496  *
1497  * @param debug_header_entry The entry in the debug header images list to obtain
1498  *                           a pointer to the Mach-O header from. This must be
1499  *                           either the SPTM or TXM debug header entry.
1500  * @param offsets Output pointer of the vm_image_offsets structure to fill in.
1501  */
1502 static void
init_image_offsets(size_t debug_header_entry,vm_image_offsets * offsets)1503 init_image_offsets(size_t debug_header_entry, vm_image_offsets *offsets)
1504 {
1505 	assert(offsets != NULL);
1506 	assert((debug_header_entry == DEBUG_HEADER_ENTRY_SPTM) ||
1507 	    (debug_header_entry == DEBUG_HEADER_ENTRY_TXM));
1508 
1509 	offsets->slid_base = (vm_offset_t)SPTMArgs->debug_header->image[debug_header_entry];
1510 	kernel_mach_header_t *macho = (kernel_mach_header_t*)offsets->slid_base;
1511 	offsets->unslid_base = (vm_offset_t)getsegbynamefromheader(macho, "__TEXT")->vmaddr;
1512 	assert((offsets->slid_base != 0) && (offsets->unslid_base != 0));
1513 	offsets->slide = offsets->slid_base - offsets->unslid_base;
1514 	offsets->unslid_top = getlastaddr(macho);
1515 	offsets->slid_top = offsets->unslid_top + offsets->slide;
1516 }
1517 
1518 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1519 #define ARM64_PHYSMAP_SLIDE_MASK  (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1520 
1521 void
arm_vm_init(uint64_t memory_size,boot_args * args)1522 arm_vm_init(uint64_t memory_size, boot_args * args)
1523 {
1524 	vm_map_address_t va_l1, va_l1_end;
1525 	tt_entry_t       *cpu_l1_tte;
1526 	tt_entry_t       *cpu_l2_tte;
1527 	vm_map_address_t va_l2, va_l2_end;
1528 	vm_map_address_t dynamic_memory_begin;
1529 	uint64_t         mem_segments;
1530 
1531 	/* Get the virtual and physical kernel-managed memory base from boot_args */
1532 	gVirtBase = args->virtBase;
1533 	gPhysBase = args->physBase;
1534 
1535 	/* Get the memory size */
1536 #if KASAN
1537 	real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1538 #else
1539 	real_phys_size = args->memSize;
1540 #endif
1541 
1542 	/**
1543 	 * Ensure the physical region we specify for the VM to manage ends on a
1544 	 * software page boundary.  Note that the software page size (PAGE_SIZE)
1545 	 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1546 	 * We must round the reported memory size down to the nearest PAGE_SIZE
1547 	 * boundary to ensure the VM does not try to manage a page it does not
1548 	 * completely own.  The KASAN shadow region, if present, is managed entirely
1549 	 * in units of the hardware page size and should not need similar treatment.
1550 	 */
1551 	gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1552 
1553 	/* Obtain total memory size, including non-managed memory */
1554 	mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1555 
1556 	if ((memory_size != 0) && (mem_size > memory_size)) {
1557 		mem_size = memory_size;
1558 		max_mem_actual = memory_size;
1559 	} else {
1560 		max_mem_actual = mem_actual;
1561 	}
1562 
1563 	/* Make sure the system does not have more physical memory than what can be mapped */
1564 	if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1565 		panic("Unsupported memory configuration %lx", mem_size);
1566 	}
1567 
1568 	physmap_base = SPTMArgs->physmap_base;
1569 	physmap_end = static_memory_end = SPTMArgs->physmap_end;
1570 
1571 #if KASAN && !defined(ARM_LARGE_MEMORY)
1572 	/* add the KASAN stolen memory to the physmap */
1573 	dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1574 #else
1575 	dynamic_memory_begin = static_memory_end;
1576 #endif
1577 
1578 	if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1579 		panic("Unsupported memory configuration %lx", mem_size);
1580 	}
1581 
1582 	/*
1583 	 * TODO: free bootstrap table memory back to allocator.
1584 	 * on large memory systems bootstrap tables could be quite large.
1585 	 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1586 	 * to trampoline to KVA. this requires only 3 pages to stay resident.
1587 	 */
1588 	avail_start = first_avail_phys;
1589 
1590 	/*
1591 	 * Initialize l1 page table page.
1592 	 *
1593 	 * SPTM TODO: Have a separate root_table_paddr field in the sptm_args
1594 	 *            instead of snooping the libsptm_state (XNU should not be
1595 	 *            snooping the libsptm_state directly in general).
1596 	 */
1597 	cpu_ttep = (pmap_paddr_t)const_sptm_args.libsptm_state.root_table_paddr;
1598 	cpu_tte = (tt_entry_t *)phystokv(cpu_ttep);
1599 	avail_end = gPhysBase + mem_size;
1600 	assert(!(avail_end & PAGE_MASK));
1601 
1602 	/* These need to be set early so pa_valid() works */
1603 	vm_first_phys = gPhysBase;
1604 	vm_last_phys = trunc_page(avail_end);
1605 
1606 #if KASAN
1607 	real_avail_end = gPhysBase + real_phys_size;
1608 #else
1609 	real_avail_end = avail_end;
1610 #endif
1611 
1612 	/*
1613 	 * Now retrieve addresses for various segments from kernel mach-o header
1614 	 */
1615 	segPRELINKTEXTB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1616 	segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1617 	segPLKTEXTEXECB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1618 	segTEXTB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1619 	segDATACONSTB    = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1620 	segTEXTEXECB     = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1621 	segDATAB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1622 
1623 	segBOOTDATAB     = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1624 	segLINKB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1625 	segKLDB          = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1626 	segKLDDATAB      = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
1627 	segPRELINKDATAB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1628 	segPRELINKINFOB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1629 	segPLKLLVMCOVB   = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1630 	segPLKLINKEDITB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1631 	segLASTB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1632 	segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
1633 
1634 	sectHIBTEXTB     = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", &sectSizeHIBTEXT);
1635 	sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", &sectSizeHIBDATACONST);
1636 	segHIBDATAB      = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
1637 
1638 	if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1639 		kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
1640 
1641 		// fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
1642 		segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
1643 		assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC);                        // kernel PLK_TEXT_EXEC must be empty
1644 
1645 		assert(segLASTB);                                                      // kernel LAST can be empty, but it must have
1646 		                                                                       // a valid address for computations below.
1647 
1648 		assert(segKCTEXTEXECB <= segLASTB);                                    // KC TEXT_EXEC must contain kernel LAST
1649 		assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
1650 		segPLKTEXTEXECB = segLASTB + segSizeLAST;
1651 		segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
1652 
1653 		// fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
1654 		segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
1655 		assert(segPLKDATACONSTB && !segSizePLKDATACONST);                      // kernel PLK_DATA_CONST must be empty
1656 		assert(segLASTDATACONSTB && segSizeLASTDATACONST);                     // kernel LASTDATA_CONST must be non-empty
1657 		assert(segKCDATACONSTB <= segLASTDATACONSTB);                          // KC DATA_CONST must contain kernel LASTDATA_CONST
1658 		assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
1659 		segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
1660 		segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
1661 
1662 		// fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
1663 		segKCDATAB      = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
1664 		assert(segPRELINKDATAB && !segSizePRELINKDATA);                        // kernel PRELINK_DATA must be empty
1665 		assert(segKCDATAB <= segPRELINKDATAB);                                 // KC DATA must contain kernel PRELINK_DATA
1666 		assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
1667 		segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
1668 
1669 		// fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
1670 		assert(segPRELINKTEXTB && !segSizePRELINKTEXT);                        // kernel PRELINK_TEXT must be empty
1671 		segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1672 		assert(segPRELINKINFOB && !segSizePRELINKINFO);                        // kernel PRELINK_INFO must be empty
1673 		segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
1674 		segLINKB        = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
1675 	}
1676 
1677 	/* if one of the new segments is present, the other one better be as well */
1678 	if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1679 		assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1680 	}
1681 
1682 	etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1683 	sdata = (vm_offset_t) segDATAB;
1684 	edata = (vm_offset_t) segDATAB + segSizeDATA;
1685 	end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastkerneladdr()); /* Force end to next page */
1686 
1687 	vm_set_page_size();
1688 
1689 	vm_kernel_base = segTEXTB;
1690 	vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1691 	vm_kext_base = segPRELINKTEXTB;
1692 	vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1693 
1694 	vm_prelink_stext = segPRELINKTEXTB;
1695 	if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1696 		vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1697 	} else {
1698 		vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1699 	}
1700 	vm_prelink_sinfo = segPRELINKINFOB;
1701 	vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1702 	vm_slinkedit = segLINKB;
1703 	vm_elinkedit = segLINKB + segSizeLINK;
1704 
1705 	vm_prelink_sdata = segPRELINKDATAB;
1706 	vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1707 
1708 	arm_vm_prot_init(args);
1709 
1710 	/**
1711 	 * Count the number of pages the boot kernelcache occupies.  Additionally,
1712 	 * ml_static_mfree() uses the BootKC ranges from the DT to account for freed kernelcache pages.
1713 	 */
1714 	vm_page_kernelcache_count = arm_get_bootkc_ranges_from_DT(&arm_vm_kernelcache_ranges, &arm_vm_kernelcache_numranges);
1715 
1716 	assert(vm_page_kernelcache_count > 0);
1717 
1718 #if KASAN
1719 	/* record the extent of the physmap */
1720 	physmap_vbase = physmap_base;
1721 	physmap_vtop = physmap_end;
1722 	kasan_init();
1723 #endif /* KASAN */
1724 
1725 #if CONFIG_CPU_COUNTERS
1726 	mt_early_init();
1727 #endif /* CONFIG_CPU_COUNTERS */
1728 
1729 	kva_active = TRUE;
1730 
1731 	if (arm_vm_auxkc_init()) {
1732 		if (segLOWESTROAuxKC < segLOWESTRO) {
1733 			segLOWESTRO = segLOWESTROAuxKC;
1734 		}
1735 		if (segHIGHESTROAuxKC > segHIGHESTRO) {
1736 			segHIGHESTRO = segHIGHESTROAuxKC;
1737 		}
1738 		if (segLOWESTRXAuxKC < segLOWESTTEXT) {
1739 			segLOWESTTEXT = segLOWESTRXAuxKC;
1740 		}
1741 		assert(segLOWEST == segLOWESTAuxKC);
1742 	}
1743 
1744 	sane_size = mem_size - (avail_start - gPhysBase);
1745 	max_mem = mem_size;
1746 	// vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
1747 	vm_kernel_slid_base = segLOWESTTEXT;
1748 	vm_kernel_stext = segTEXTB;
1749 
1750 	if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1751 		vm_kernel_etext = segTEXTEXECB + segSizeTEXTEXEC;
1752 		vm_kernel_slid_top = vm_slinkedit;
1753 	} else {
1754 		assert(segDATACONSTB == segTEXTB + segSizeTEXT);
1755 		assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
1756 		vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
1757 		vm_kernel_slid_top = vm_prelink_einfo;
1758 	}
1759 
1760 	/**
1761 	 * Calculate the address ranges used to determine whether an address is an
1762 	 * SPTM or TXM address, as well as the slides used to slide/unslide those
1763 	 * addresses.
1764 	 *
1765 	 * The debug header contains pointers to the beginning of the images loaded
1766 	 * up by iBoot (which always start with the Mach-O header). The __TEXT
1767 	 * segment should be the first (and lowest) segment in both of these
1768 	 * binaries (the addresses in the Mach-O header are all unslid).
1769 	 */
1770 	init_image_offsets(DEBUG_HEADER_ENTRY_SPTM, &vm_sptm_offsets);
1771 	init_image_offsets(DEBUG_HEADER_ENTRY_TXM, &vm_txm_offsets);
1772 
1773 	dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
1774 
1775 	/* TODO: CONFIG_XNUPOST CTRR test */
1776 
1777 	pmap_bootstrap(dynamic_memory_begin);
1778 
1779 	disable_preemption();
1780 
1781 	/*
1782 	 * Initialize l3 page table pages :
1783 	 *   cover this address range:
1784 	 *    2MB + FrameBuffer size + 10MB for each 256MB segment
1785 	 *
1786 	 * Note: This does not allocate L3 page tables, since page tables for all static
1787 	 *       memory is allocated and inserted into the hierarchy by the SPTM beforehand.
1788 	 *       Instead, this code simply walks the page tables to find those pre-allocated
1789 	 *       tables and allocates PTD objects for them.
1790 	 */
1791 
1792 	mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
1793 
1794 	va_l1 = dynamic_memory_begin;
1795 	va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
1796 	va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
1797 	va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
1798 
1799 	cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1800 
1801 	while (va_l1 < va_l1_end) {
1802 		va_l2 = va_l1;
1803 
1804 		if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
1805 			/* If this is the last L1 entry, it must cover the last mapping. */
1806 			va_l2_end = va_l1_end;
1807 		} else {
1808 			va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
1809 		}
1810 
1811 		cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1812 
1813 		while (va_l2 < va_l2_end) {
1814 			/* Obtain pre-allocated page and setup L3 Table TTE in L2 */
1815 			tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
1816 			pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
1817 			pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1818 
1819 			va_l2 += ARM_TT_L2_SIZE;
1820 			cpu_l2_tte++;
1821 		}
1822 
1823 		va_l1 = va_l2_end;
1824 		cpu_l1_tte++;
1825 	}
1826 
1827 	/*
1828 	 * Initialize l3 page table pages :
1829 	 *   cover this address range:
1830 	 *   ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
1831 	 *
1832 	 * Note: This does not allocate L3 page tables, since page tables for all static
1833 	 *       memory is allocated and inserted into the hierarchy by the SPTM beforehand.
1834 	 *       Instead, this code simply walks the page tables to find those pre-allocated
1835 	 *       tables and allocates PTD objects for them.
1836 	 */
1837 	va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
1838 	va_l1_end = VM_MAX_KERNEL_ADDRESS;
1839 
1840 	cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1841 
1842 	while (va_l1 < va_l1_end) {
1843 		va_l2 = va_l1;
1844 
1845 		if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
1846 			/* If this is the last L1 entry, it must cover the last mapping. */
1847 			va_l2_end = va_l1_end;
1848 		} else {
1849 			va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
1850 		}
1851 
1852 		cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1853 
1854 		while (va_l2 < va_l2_end) {
1855 			/* Obtain pre-allocated page and setup L3 Table TTE in L2 */
1856 			tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
1857 			pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
1858 			pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1859 
1860 			va_l2 += ARM_TT_L2_SIZE;
1861 			cpu_l2_tte++;
1862 		}
1863 
1864 		va_l1 = va_l2_end;
1865 		cpu_l1_tte++;
1866 	}
1867 
1868 	/*
1869 	 * Adjust avail_start so that the range that the VM owns
1870 	 * starts on a PAGE_SIZE aligned boundary.
1871 	 */
1872 	avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
1873 
1874 	/* TODO pmap_static_allocations_done() */
1875 
1876 	first_avail = avail_start;
1877 	patch_low_glo_static_region(first_avail_phys, avail_start - first_avail_phys);
1878 	enable_preemption();
1879 }
1880 
1881 /*
1882  * Returns true if the address lies within __TEXT, __TEXT_EXEC or __DATA_CONST
1883  * segment range. This is what [vm_kernel_stext, vm_kernel_etext) used to cover.
1884  * The segments together may not make a continuous address space anymore and so
1885  * individual intervals are inspected.
1886  */
1887 bool
kernel_text_contains(vm_offset_t addr)1888 kernel_text_contains(vm_offset_t addr)
1889 {
1890 	if (segTEXTB <= addr && addr < (segTEXTB + segSizeTEXT)) {
1891 		return true;
1892 	}
1893 	if (segTEXTEXECB <= addr && addr < (segTEXTEXECB + segSizeTEXTEXEC)) {
1894 		return true;
1895 	}
1896 	return segDATACONSTB <= addr && addr < (segDATACONSTB + segSizeDATACONST);
1897 }
1898