xref: /xnu-12377.81.4/osfmk/arm64/sptm/arm_init_sptm.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /**
2  * Copyright (c) 2022-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm64/lowglobals.h>
30 #include <kern/ecc.h>
31 #include <kern/timer_queue.h>
32 #include <kern/monotonic.h>
33 #include <machine/commpage.h>
34 #include <pexpert/device_tree.h>
35 #include <arm/cpu_internal.h>
36 #include <arm/misc_protos.h>
37 #include <arm/machine_cpu.h>
38 #include <arm/rtclock.h>
39 #include <vm/vm_map.h>
40 #include <mach/exclaves.h>
41 #include <mach/vm_param.h>
42 #include <libkern/stack_protector.h>
43 #include <console/serial_protos.h>
44 #include <arm64/sptm/pmap/pmap_pt_geometry.h>
45 #include <arm64/sptm/sptm.h>
46 #include <sptm/sptm_common.h>
47 #include <vm/vm_page_internal.h>
48 
49 #if CONFIG_TELEMETRY
50 #include <kern/telemetry.h>
51 #endif /* CONFIG_TELEMETRY */
52 
53 #if KPERF
54 #include <kperf/kptimer.h>
55 #endif /* KPERF */
56 
57 #if HIBERNATION
58 #include <IOKit/IOPlatformExpert.h>
59 #include <machine/pal_hibernate.h>
60 #endif /* HIBERNATION */
61 
62 #if __arm64__
63 #include <pexpert/arm64/apt_msg.h>
64 #endif
65 
66 #if HAS_MTE
67 #include <arm64/mte_xnu.h>
68 #endif /* HAS_MTE */
69 
70 /**
71  * Functions defined elsewhere that are required by this source file.
72  */
73 extern void patch_low_glo(void);
74 extern int serial_init(void);
75 extern void sleep_token_buffer_init(void);
76 
77 /**
78  * Bootstrap stacks. Used on the cold boot path to set up the boot CPU's
79  * per-CPU data structure.
80  */
81 extern vm_offset_t intstack_top;
82 extern vm_offset_t excepstack_top;
83 
84 /* First (inclusive) and last (exclusive) physical addresses */
85 extern pmap_paddr_t vm_first_phys;
86 extern pmap_paddr_t vm_last_phys;
87 
88 /* UART hibernation flag - import so we can set it ASAP on resume. */
89 extern MARK_AS_HIBERNATE_DATA bool uart_hibernation;
90 
91 /* Used to cache memSize, as passed by iBoot */
92 SECURITY_READ_ONLY_LATE(uint64_t) memSize = 0;
93 
94 int debug_task;
95 
96 /**
97  * Set according to what serial-related boot-args have been passed to XUN.
98  */
99 extern int disableConsoleOutput;
100 
101 #if XNU_TARGET_OS_OSX
102 /**
103  * Extern the PMAP boot-arg to enable/disable XNU_KERNEL_RESTRICTED.
104  * We need it here because if we detect an auxKC, we disable the mitigation.
105  */
106 extern bool use_xnu_restricted;
107 #endif /* XNU_TARGET_OS_OSX */
108 
109 /**
110  * SPTM devices do not support static kernelcaches, but the rest of XNU
111  * expects this variable to be defined. Set it to false at build time.
112  */
113 SECURITY_READ_ONLY_LATE(bool) static_kernelcache = false;
114 
115 TUNABLE(bool, restore_boot, "-restore", false);
116 
117 /**
118  * First physical address freely available to xnu.
119  */
120 SECURITY_READ_ONLY_LATE(addr64_t) first_avail_phys = 0;
121 
122 #if HAS_BP_RET
123 /* Enable both branch target retention (0x2) and branch direction retention (0x1) across sleep */
124 uint32_t bp_ret = 3;
125 extern void set_bp_ret(void);
126 #endif
127 
128 #if SCHED_HYGIENE_DEBUG
129 
130 #if XNU_PLATFORM_iPhoneOS
131 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 48000   /* 2ms */
132 #elif XNU_PLATFORM_XROS
133 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 12000   /* 500us */
134 #else
135 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 0xd0000 /* 35.499ms */
136 #endif /* XNU_PLATFORM_iPhoneOS */
137 
138 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, interrupt_masked_debug_mode,
139     "machine-timeouts", "interrupt-masked-debug-mode",
140     "interrupt-masked-debug-mode",
141     SCHED_HYGIENE_MODE_PANIC,
142     TUNABLE_DT_CHECK_CHOSEN);
143 
144 MACHINE_TIMEOUT_DEV_WRITEABLE(interrupt_masked_timeout, "interrupt-masked",
145     DEFAULT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
146     NULL);
147 #if __arm64__
148 #define SSHOT_INTERRUPT_MASKED_TIMEOUT 0xf9999 /* 64-bit: 42.599ms */
149 #endif
150 MACHINE_TIMEOUT_DEV_WRITEABLE(stackshot_interrupt_masked_timeout, "sshot-interrupt-masked",
151     SSHOT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
152     NULL);
153 #undef SSHOT_INTERRUPT_MASKED_TIMEOUT
154 #endif
155 
156 /*
157  * A 6-second timeout will give the watchdog code a chance to run
158  * before a panic is triggered by the xcall routine.
159  */
160 #define XCALL_ACK_TIMEOUT_NS ((uint64_t) 6000000000)
161 uint64_t xcall_ack_timeout_abstime;
162 
163 #ifndef __BUILDING_XNU_LIBRARY__
164 #define BOOTARGS_SECTION_ATTR __attribute__((section("__DATA, __const")))
165 #else /* __BUILDING_XNU_LIBRARY__ */
166 /* Special segments are not used when building for user-mode */
167 #define BOOTARGS_SECTION_ATTR
168 #endif /* __BUILDING_XNU_LIBRARY__ */
169 
170 boot_args const_boot_args BOOTARGS_SECTION_ATTR;
171 boot_args      *BootArgs BOOTARGS_SECTION_ATTR;
172 
173 /**
174  * The SPTM provides a second set of boot arguments, on top of those
175  * provided by iBoot.
176  */
177 SECURITY_READ_ONLY_LATE(sptm_bootstrap_args_xnu_t) const_sptm_args;
178 SECURITY_READ_ONLY_LATE(const sptm_bootstrap_args_xnu_t *) SPTMArgs;
179 SECURITY_READ_ONLY_LATE(const bool *) sptm_xnu_triggered_panic_ptr;
180 
181 extern char osbuild_config[];
182 
183 TUNABLE(uint32_t, arm_diag, "diag", 0);
184 #ifdef  APPLETYPHOON
185 static unsigned cpus_defeatures = 0x0;
186 extern void cpu_defeatures_set(unsigned int);
187 #endif
188 
189 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
190 extern volatile boolean_t arm64_stall_sleep;
191 #endif
192 
193 extern boolean_t force_immediate_debug_halt;
194 
195 #if HAS_APPLE_PAC
196 SECURITY_READ_ONLY_LATE(boolean_t) diversify_user_jop = TRUE;
197 #endif
198 
199 #if HAS_MTE
200 #if DEVELOPMENT || DEBUG
201 STATIC_IF_KEY_DEFINE_TRUE(mte_config_kern_enabled);
202 STATIC_IF_KEY_DEFINE_FALSE(mte_config_kern_data_enabled);
203 STATIC_IF_KEY_DEFINE_TRUE(mte_config_user_enabled);
204 STATIC_IF_KEY_DEFINE_FALSE(mte_config_user_data_enabled);
205 STATIC_IF_KEY_DEFINE_FALSE(mte_config_force_all_enabled);
206 STATIC_IF_KEY_DEFINE_FALSE(mte_debug_tco_state);
207 STATIC_IF_KEY_DEFINE_FALSE(mte_panic_on_non_canonical);
208 STATIC_IF_KEY_DEFINE_FALSE(mte_panic_on_async_fault);
209 #endif /* DEVELOPMENT || DEBUG */
210 #endif /* HAS_MTE */
211 
212 #if HAS_MTE
213 SECURITY_READ_ONLY_LATE(bool) is_mte_enabled = true;
214 SECURITY_READ_ONLY_LATE(bool) panic_on_user_induced_iomd_kernel_faults = false;
215 #endif /* HAS_MTE */
216 
217 SECURITY_READ_ONLY_LATE(uint64_t) gDramBase;
218 SECURITY_READ_ONLY_LATE(uint64_t) gDramSize;
219 SECURITY_READ_ONLY_LATE(ppnum_t)  pmap_first_pnum;
220 
221 SECURITY_READ_ONLY_LATE(bool) serial_console_enabled = false;
222 
223 /**
224  * SPTM TODO: The following flag is set up based on the presence and
225  *            configuration of the 'sptm_stability_hacks' boot-arg; this
226  *            is used in certain codepaths that do not properly function
227  *            today in SPTM systems to make the system more stable and fully
228  *            able to boot to user space.
229  */
230 SECURITY_READ_ONLY_LATE(bool) sptm_stability_hacks = false;
231 
232 #if APPLEVIRTUALPLATFORM
233 SECURITY_READ_ONLY_LATE(vm_offset_t) reset_vector_vaddr = 0;
234 #endif /* APPLEVIRTUALPLATFORM */
235 
236 /*
237  * Forward definition
238  */
239 void arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_args);
240 #if KASAN
241 void arm_init_kasan(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_args);
242 #endif /* KASAN */
243 
244 #if __arm64__
245 unsigned int page_shift_user32; /* for page_size as seen by a 32-bit task */
246 
247 extern void configure_misc_apple_boot_args(void);
248 extern void configure_misc_apple_regs(bool is_boot_cpu);
249 extern void configure_timer_apple_regs(void);
250 #endif /* __arm64__ */
251 
252 
253 /*
254  * JOP rebasing
255  */
256 
257 #define dyldLogFunc(msg, ...)
258 #include <mach/dyld_kernel_fixups.h>
259 
260 extern uint32_t __thread_starts_sect_start[] __asm("section$start$__TEXT$__thread_starts");
261 extern uint32_t __thread_starts_sect_end[]   __asm("section$end$__TEXT$__thread_starts");
262 #if defined(HAS_APPLE_PAC)
263 extern void OSRuntimeSignStructors(kernel_mach_header_t * header);
264 extern void OSRuntimeSignStructorsInFileset(kernel_mach_header_t * header);
265 #endif /* defined(HAS_APPLE_PAC) */
266 
267 extern vm_offset_t vm_kernel_slide;
268 extern vm_offset_t segLOWESTKC, segHIGHESTKC, segLOWESTROKC, segHIGHESTROKC;
269 extern vm_offset_t segLOWESTAuxKC, segHIGHESTAuxKC, segLOWESTROAuxKC, segHIGHESTROAuxKC;
270 extern vm_offset_t segLOWESTRXAuxKC, segHIGHESTRXAuxKC, segHIGHESTNLEAuxKC;
271 
272 #if HAS_MTE
273 #if DEVELOPMENT || DEBUG
274 __static_if_init_func
275 static void
mte_config_setup(const char * args)276 mte_config_setup(const char *args)
277 {
278 	mte_config_t config = (mte_config_t)static_if_boot_arg_uint64(args, "mte", MTE_CONFIG_DEFAULT);
279 
280 	if (config & MTE_KERNEL_ENABLE) {
281 		static_if_key_enable(mte_config_kern_enabled);
282 	}
283 
284 	if (config & MTE_KERNEL_ENABLE_PURE_DATA) {
285 		static_if_key_enable(mte_config_kern_data_enabled);
286 	}
287 
288 	if (config & MTE_USER_ENABLE) {
289 		static_if_key_enable(mte_config_user_enabled);
290 	}
291 
292 	if (config & MTE_USER_FORCE_ENABLE_ALL) {
293 		static_if_key_enable(mte_config_force_all_enabled);
294 	}
295 
296 	if (config & MTE_DEBUG_TCO_STATE) {
297 		static_if_key_enable(mte_debug_tco_state);
298 	}
299 
300 	if (config & MTE_PANIC_ON_NON_CANONICAL_PARAM) {
301 		static_if_key_enable(mte_panic_on_non_canonical);
302 	}
303 
304 	if (config & MTE_PANIC_ON_ASYNC_FAULT) {
305 		static_if_key_enable(mte_panic_on_async_fault);
306 	}
307 
308 
309 }
310 
311 STATIC_IF_INIT(mte_config_setup);
312 #endif /* DEVELOPMENT || DEBUG */
313 #endif /* HAS_MTE */
314 
315 void arm_slide_rebase_and_sign_image(void);
316 MARK_AS_FIXUP_TEXT void
arm_slide_rebase_and_sign_image(void)317 arm_slide_rebase_and_sign_image(void)
318 {
319 	kernel_mach_header_t *k_mh, *kc_mh = NULL;
320 	kernel_segment_command_t *seg;
321 	uintptr_t slide;
322 
323 	/*
324 	 * The kernel is part of a MH_FILESET kernel collection, determine slide
325 	 * based on first segment's mach-o vmaddr (requires first kernel load
326 	 * command to be LC_SEGMENT_64 of the __TEXT segment)
327 	 */
328 	k_mh = &_mh_execute_header;
329 	seg = (kernel_segment_command_t *)((uintptr_t)k_mh + sizeof(*k_mh));
330 	assert(seg->cmd == LC_SEGMENT_KERNEL);
331 	slide = (uintptr_t)k_mh - seg->vmaddr;
332 
333 	/*
334 	 * The kernel collection linker guarantees that the boot collection mach
335 	 * header vmaddr is the hardcoded kernel link address (as specified to
336 	 * ld64 when linking the kernel).
337 	 */
338 	kc_mh = (kernel_mach_header_t*)(VM_KERNEL_LINK_ADDRESS + slide);
339 	assert(kc_mh->filetype == MH_FILESET);
340 
341 	/*
342 	 * rebase and sign jops
343 	 * Note that we can't call any functions before this point, so
344 	 * we have to hard-code the knowledge that the base of the KC
345 	 * is the KC's mach-o header. This would change if any
346 	 * segment's VA started *before* the text segment
347 	 * (as the HIB segment does on x86).
348 	 */
349 	const void *collection_base_pointers[KCNumKinds] = {[0] = kc_mh, };
350 	kernel_collection_slide((struct mach_header_64 *)kc_mh, collection_base_pointers);
351 	PE_set_kc_header(KCKindPrimary, kc_mh, slide);
352 
353 	/*
354 	 * iBoot doesn't slide load command vmaddrs in an MH_FILESET kernel
355 	 * collection, so adjust them now, and determine the vmaddr range
356 	 * covered by read-only segments for the CTRR rorgn.
357 	 */
358 	kernel_collection_adjust_mh_addrs((struct mach_header_64 *)kc_mh, slide, false,
359 	    (uintptr_t *)&segLOWESTKC, (uintptr_t *)&segHIGHESTKC,
360 	    (uintptr_t *)&segLOWESTROKC, (uintptr_t *)&segHIGHESTROKC,
361 	    NULL, NULL, NULL);
362 
363 	/*
364 	 * Initialize slide global here to avoid duplicating this logic in
365 	 * arm_vm_init()
366 	 */
367 	vm_kernel_slide = slide;
368 }
369 
370 void arm_static_if_init(boot_args *args);
371 MARK_AS_FIXUP_TEXT void
arm_static_if_init(boot_args * args)372 arm_static_if_init(boot_args *args)
373 {
374 	static_if_init(args->CommandLine);
375 }
376 
377 void
arm_auxkc_init(void * mh,void * base)378 arm_auxkc_init(void *mh, void *base)
379 {
380 	/*
381 	 * The kernel collection linker guarantees that the lowest vmaddr in an
382 	 * AuxKC collection is 0 (but note that the mach header is higher up since
383 	 * RW segments precede RO segments in the AuxKC).
384 	 */
385 	uintptr_t slide = (uintptr_t)base;
386 	kernel_mach_header_t *akc_mh = (kernel_mach_header_t*)mh;
387 
388 	assert(akc_mh->filetype == MH_FILESET);
389 	PE_set_kc_header_and_base(KCKindAuxiliary, akc_mh, base, slide);
390 
391 	/* rebase and sign jops */
392 	const void *collection_base_pointers[KCNumKinds];
393 	memcpy(collection_base_pointers, PE_get_kc_base_pointers(), sizeof(collection_base_pointers));
394 	kernel_collection_slide((struct mach_header_64 *)akc_mh, collection_base_pointers);
395 
396 	kernel_collection_adjust_mh_addrs((struct mach_header_64 *)akc_mh, slide, false,
397 	    (uintptr_t *)&segLOWESTAuxKC, (uintptr_t *)&segHIGHESTAuxKC, (uintptr_t *)&segLOWESTROAuxKC,
398 	    (uintptr_t *)&segHIGHESTROAuxKC, (uintptr_t *)&segLOWESTRXAuxKC, (uintptr_t *)&segHIGHESTRXAuxKC,
399 	    (uintptr_t *)&segHIGHESTNLEAuxKC);
400 #if defined(HAS_APPLE_PAC)
401 	OSRuntimeSignStructorsInFileset(akc_mh);
402 #endif /* defined(HAS_APPLE_PAC) */
403 }
404 
405 /*
406  * boot kernelcache ranges; used for accounting.
407  */
408 SECURITY_READ_ONLY_LATE(const arm_physrange_t *) arm_vm_kernelcache_ranges;
409 SECURITY_READ_ONLY_LATE(int) arm_vm_kernelcache_numranges;
410 
411 #if __ARM_KERNEL_PROTECT__
412 /*
413  * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
414  * mappable space preceeding the kernel (as we unmap the kernel by cutting the
415  * range covered by TTBR1 in half).  This must also cover the exception vectors.
416  */
417 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
418 
419 /* The exception vectors and the kernel cannot share root TTEs. */
420 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
421 
422 /*
423  * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
424  * the exception vectors.
425  */
426 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
427 #endif /* __ARM_KERNEL_PROTECT__ */
428 
429 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
430 
431 #if KASAN
432 extern vm_offset_t shadow_pbase;
433 extern vm_offset_t shadow_ptop;
434 extern vm_offset_t physmap_vbase;
435 extern vm_offset_t physmap_vtop;
436 #endif
437 
438 /*
439  * We explicitly place this in const, as it is not const from a language
440  * perspective, but it is only modified before we actually switch away from
441  * the bootstrap page tables.
442  */
443 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
444 
445 /*
446  * Denotes the end of xnu.
447  */
448 extern void *last_kernel_symbol;
449 
450 extern void arm64_replace_bootstack(cpu_data_t*);
451 extern void PE_slide_devicetree(vm_offset_t);
452 
453 /*
454  * KASLR parameters
455  */
456 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
457 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
458 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
459 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
460 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
461 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
462 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
463 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
464 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
465 
466 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_sptm_offsets;
467 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_txm_offsets;
468 
469 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
470 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
471 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
472 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
473 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
474 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
475 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
476 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
477 
478 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
479 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
480 
481 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
482 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
483 
484 /* Used by <mach/arm/vm_param.h> */
485 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
486 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
487 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
488 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
489 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
490 
491 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
492  * all kexts before the kernel.  This is only for arm64 devices and looks
493  * something like the following:
494  * -- vmaddr order --
495  * 0xffffff8004004000 __PRELINK_TEXT
496  * 0xffffff8007004000 __TEXT (xnu)
497  * 0xffffff80075ec000 __DATA (xnu)
498  * 0xffffff80076dc000 __KLD (xnu)
499  * 0xffffff80076e0000 __LAST (xnu)
500  * 0xffffff80076e4000 __LINKEDIT (xnu)
501  * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
502  * 0xffffff800782c000 __PRELINK_INFO
503  * 0xffffff80078e4000 -- End of kernelcache
504  */
505 
506 /* 24921709 - make XNU ready for KTRR
507  *
508  * Two possible kernel cache layouts, depending on which kcgen is being used.
509  * VAs increasing downwards.
510  * Old KCGEN:
511  *
512  * __PRELINK_TEXT
513  * __TEXT
514  * __DATA_CONST
515  * __TEXT_EXEC
516  * __KLD
517  * __LAST
518  * __DATA
519  * __PRELINK_DATA (expected empty)
520  * __LINKEDIT
521  * __PRELINK_INFO
522  *
523  * New kcgen:
524  *
525  * __PRELINK_TEXT    <--- First KTRR (ReadOnly) segment
526  * __PLK_DATA_CONST
527  * __PLK_TEXT_EXEC
528  * __TEXT
529  * __DATA_CONST
530  * __TEXT_EXEC
531  * __KLD
532  * __LAST            <--- Last KTRR (ReadOnly) segment
533  * __DATA
534  * __BOOTDATA (if present)
535  * __LINKEDIT
536  * __PRELINK_DATA (expected populated now)
537  * __PLK_LINKEDIT
538  * __PRELINK_INFO
539  *
540  */
541 
542 vm_offset_t mem_size;                             /* Size of actual physical memory present
543                                                    * minus any performance buffer and possibly
544                                                    * limited by mem_limit in bytes */
545 uint64_t    mem_actual;                           /* The "One True" physical memory size
546                                                    * actually, it's the highest physical
547                                                    * address + 1 */
548 uint64_t    max_mem;                              /* Size of physical memory (bytes), adjusted
549                                                    * by maxmem */
550 uint64_t    max_mem_actual;                       /* Actual size of physical memory (bytes),
551                                                    * adjusted by the maxmem boot-arg */
552 uint64_t    sane_size;                            /* Memory size to use for defaults
553                                                    * calculations */
554 /* This no longer appears to be used; kill it? */
555 addr64_t    vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
556                                                    * virtual address known
557                                                    * to the VM system */
558 
559 SECURITY_READ_ONLY_LATE(vm_offset_t)              segEXTRADATA;
560 SECURITY_READ_ONLY_LATE(unsigned long)            segSizeEXTRADATA;
561 
562 /* Trust cache portion of EXTRADATA (if within it) */
563 SECURITY_READ_ONLY_LATE(vm_offset_t)              segTRUSTCACHE;
564 SECURITY_READ_ONLY_LATE(unsigned long)            segSizeTRUSTCACHE;
565 
566 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTTEXT;
567 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWEST;
568 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTRO;
569 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTRO;
570 
571 /* Only set when booted from MH_FILESET kernel collections */
572 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTKC;
573 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTKC;
574 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTROKC;
575 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTROKC;
576 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTAuxKC;
577 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTAuxKC;
578 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTROAuxKC;
579 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTROAuxKC;
580 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLOWESTRXAuxKC;
581 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTRXAuxKC;
582 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIGHESTNLEAuxKC;
583 
584 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segTEXTB;
585 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
586 
587 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segDATACONSTB;
588 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
589 
590 SECURITY_READ_ONLY_LATE(vm_offset_t)   segTEXTEXECB;
591 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
592 
593 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segDATAB;
594 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
595 
596 SECURITY_READ_ONLY_LATE(vm_offset_t)          segBOOTDATAB;
597 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeBOOTDATA;
598 extern vm_offset_t                            intstack_low_guard;
599 extern vm_offset_t                            intstack_high_guard;
600 extern vm_offset_t                            excepstack_high_guard;
601 
602 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLINKB;
603 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
604 
605 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKLDB;
606 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeKLD;
607 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKLDDATAB;
608 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
609 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLASTB;
610 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeLAST;
611 SECURITY_READ_ONLY_LATE(vm_offset_t)          segLASTDATACONSTB;
612 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeLASTDATACONST;
613 
614 SECURITY_READ_ONLY_LATE(vm_offset_t)          sectHIBTEXTB;
615 SECURITY_READ_ONLY_LATE(unsigned long)        sectSizeHIBTEXT;
616 SECURITY_READ_ONLY_LATE(vm_offset_t)          segHIBDATAB;
617 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeHIBDATA;
618 SECURITY_READ_ONLY_LATE(vm_offset_t)          sectHIBDATACONSTB;
619 SECURITY_READ_ONLY_LATE(unsigned long)        sectSizeHIBDATACONST;
620 
621 SECURITY_READ_ONLY_LATE(vm_offset_t)          segPRELINKTEXTB;
622 SECURITY_READ_ONLY_LATE(unsigned long)        segSizePRELINKTEXT;
623 
624 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKTEXTEXECB;
625 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
626 
627 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKDATACONSTB;
628 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
629 
630 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPRELINKDATAB;
631 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
632 
633 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKLLVMCOVB = 0;
634 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
635 
636 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPLKLINKEDITB;
637 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
638 
639 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segPRELINKINFOB;
640 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
641 
642 /* Only set when booted from MH_FILESET primary kernel collection */
643 SECURITY_READ_ONLY_LATE(vm_offset_t)          segKCTEXTEXECB;
644 SECURITY_READ_ONLY_LATE(unsigned long)        segSizeKCTEXTEXEC;
645 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKCDATACONSTB;
646 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
647 SECURITY_READ_ONLY_LATE(static vm_offset_t)   segKCDATAB;
648 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
649 
650 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
651 
652 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
653 
654 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
655 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
656 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
657 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
658 
659 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base;
660 
661 pmap_paddr_t alloc_ptpage(sptm_pt_level_t level, bool map_static);
662 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
663 extern int dtrace_keep_kernel_symbols(void);
664 
665 /*
666  * Bootstrap the system enough to run with virtual memory.
667  * Map the kernel's code and data, and allocate the system page table.
668  * Page_size must already be set.
669  *
670  * Parameters:
671  * first_avail: first available physical page -
672  *              after kernel page tables
673  * avail_start: PA of first physical page
674  * avail_end:   PA of last physical page
675  */
676 SECURITY_READ_ONLY_LATE(vm_offset_t)     first_avail;
677 SECURITY_READ_ONLY_LATE(vm_offset_t)     static_memory_end;
678 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    avail_start;
679 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    avail_end;
680 SECURITY_READ_ONLY_LATE(pmap_paddr_t)    real_avail_end;
681 SECURITY_READ_ONLY_LATE(unsigned long)   real_phys_size;
682 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
683 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
684 
685 typedef struct {
686 	pmap_paddr_t pa;
687 	vm_map_address_t va;
688 	vm_size_t len;
689 } ptov_table_entry;
690 
691 SECURITY_READ_ONLY_LATE(static boolean_t)               kva_active = FALSE;
692 
693 #if HAS_ARM_FEAT_SME
694 static SECURITY_READ_ONLY_LATE(bool) enable_sme = true;
695 #endif
696 
697 /**
698  * sptm_supports_local_coredump is set in start_sptm.s when SPTM dispatch logic
699  * calls into XNU to handle a panic from SPTM/TXM/cL4. If this variable is set
700  * to false then osfmk/kern/debug.c:debugger_collect_diagnostic() will skip
701  * taking a local core dump. This defaults to true since as long as the panic
702  * doesn't occur within the SPTM, then the SPTM will support making calls during
703  * the panic path to save the coredump. Only when the panic occurs from within
704  * guarded mode do we let SPTM decide whether it supports local coredumps.
705  */
706 bool sptm_supports_local_coredump = true;
707 
708 #if KASAN
709 /* Prototypes for KASAN functions */
710 void kasan_bootstrap(boot_args *, vm_offset_t pgtable, sptm_bootstrap_args_xnu_t *sptm_boot_args);
711 
712 /**
713  * Entry point for systems that support an SPTM and are booting a KASAN kernel.
714  * This is required because KASAN kernels need to set up the shadow map before
715  * arm_init() can even run.
716  */
717 void
arm_init_kasan(boot_args * args,sptm_bootstrap_args_xnu_t * sptm_boot_args)718 arm_init_kasan(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_boot_args)
719 {
720 	/* Initialize SPTM helper library. */
721 	uint8_t ret = libsptm_init(&sptm_boot_args->libsptm_state);
722 	if (ret != LIBSPTM_SUCCESS) {
723 		panic("%s: libsptm_init failed: %u", __func__, ret);
724 	}
725 
726 	memSize = args->memSize;
727 	kasan_bootstrap(args, phystokv(sptm_boot_args->libsptm_state.root_table_paddr), sptm_boot_args);
728 
729 	arm_init(args, sptm_boot_args);
730 }
731 #endif /* KASAN */
732 
733 /**
734  * Entry point for systems that support an SPTM - except on KASAN kernels,
735  * see above. Bootstrap stacks have been set up by the SPTM by this point,
736  * and XNU is responsible for rebasing and signing absolute addresses.
737  */
738 void
arm_init(boot_args * args,sptm_bootstrap_args_xnu_t * sptm_boot_args)739 arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_boot_args)
740 {
741 	unsigned int maxmem;
742 	uint32_t memsize;
743 	uint64_t xmaxmem;
744 	thread_t thread;
745 	DTEntry chosen;
746 	unsigned int dt_entry_size;
747 
748 	extern void xnu_return_to_gl2(void);
749 	const sptm_vaddr_t handler_addr = (sptm_vaddr_t) ptrauth_strip((void *)xnu_return_to_gl2, ptrauth_key_function_pointer);
750 	sptm_register_xnu_exc_return(handler_addr);
751 
752 #if defined(HAS_APPLE_PAC)
753 	kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
754 	OSRuntimeSignStructorsInFileset(kc_mh);
755 #endif /* defined(HAS_APPLE_PAC) */
756 
757 	/* If kernel integrity is supported, use a constant copy of the boot args. */
758 	const_boot_args = *args;
759 	BootArgs = args = &const_boot_args;
760 	const_sptm_args = *sptm_boot_args;
761 	SPTMArgs = sptm_boot_args = &const_sptm_args;
762 	sptm_xnu_triggered_panic_ptr = sptm_boot_args->xnu_triggered_panic;
763 	/*
764 	 * Initialize first_avail_phys from what the SPTM tells us.
765 	 * We're not using iBoot's topOfKernelData, as SPTM and other
766 	 * components have consumed pages themselves.
767 	 */
768 	first_avail_phys = sptm_boot_args->first_avail_phys;
769 
770 #if APPLEVIRTUALPLATFORM
771 	reset_vector_vaddr = (vm_offset_t) sptm_boot_args->sptm_reset_vector_vaddr;
772 #endif /* APPLEVIRTUALPLATFORM */
773 
774 	cpu_data_init(&BootCpuData);
775 #if defined(HAS_APPLE_PAC)
776 	/* bootstrap cpu process dependent key for kernel has been loaded by start.s */
777 	BootCpuData.rop_key = ml_default_rop_pid();
778 	BootCpuData.jop_key = ml_default_jop_pid();
779 #endif /* defined(HAS_APPLE_PAC) */
780 
781 	PE_init_platform(FALSE, args); /* Get platform expert set up */
782 
783 #if !KASAN
784 	memSize = args->memSize;
785 
786 	/* Initialize SPTM helper library. */
787 	uint8_t ret = libsptm_init(&const_sptm_args.libsptm_state);
788 	if (ret != LIBSPTM_SUCCESS) {
789 		panic("%s: libsptm_init failed: %u", __func__, ret);
790 	}
791 #endif
792 
793 #if __arm64__
794 	configure_timer_apple_regs();
795 	wfe_timeout_configure();
796 	wfe_timeout_init();
797 
798 	configure_misc_apple_boot_args();
799 	configure_misc_apple_regs(true);
800 
801 #if HAS_UPSI_FAILURE_INJECTION
802 	/* UPSI (Universal Panic and Stall Injection) Logic
803 	 * iBoot/XNU are both configured for failure injection at specific stages
804 	 * The injected failure and stage is populated through EDT properties by iBoot
805 	 *
806 	 * iBoot populates the EDT properties for XNU based upon PMU scratch bits
807 	 * This is done because the EDT is available sooner in XNU than the PMU Kext
808 	 */
809 	uint64_t const *upsi_info = NULL;
810 
811 	/* Not usable TUNABLE here because TUNABLEs are parsed at a later point. */
812 	if (SecureDTLookupEntry(NULL, "/chosen", &chosen) != kSuccess) {
813 		panic("%s: Unable to find 'chosen' DT node", __FUNCTION__);
814 	}
815 
816 	/* Check if there is a requested injection stage */
817 	if (SecureDTGetProperty(chosen, "injection_stage", (void const **)&upsi_info,
818 	    &dt_entry_size) == kSuccess) {
819 		assert3u(dt_entry_size, ==, 8);
820 		xnu_upsi_injection_stage = *upsi_info;
821 	}
822 
823 	/* Check if there is a requested injection action */
824 	if (SecureDTGetProperty(chosen, "injection_action", (void const **)&upsi_info,
825 	    &dt_entry_size) == kSuccess) {
826 		assert3u(dt_entry_size, ==, 8);
827 		xnu_upsi_injection_action = *upsi_info;
828 	}
829 
830 	check_for_failure_injection(XNU_STAGE_ARM_INIT);
831 
832 	chosen = NULL; // Force a re-lookup later on since VM addresses are not final at this point
833 	dt_entry_size = 0;
834 #endif // HAS_UPSI_FAILURE_INJECTION
835 
836 #if HAS_ARM_FEAT_SME
837 	(void)PE_parse_boot_argn("enable_sme", &enable_sme, sizeof(enable_sme));
838 	if (enable_sme) {
839 		arm_sme_init(true);
840 	}
841 #endif
842 
843 
844 	{
845 		/*
846 		 * Select the advertised kernel page size.
847 		 */
848 		if (memSize > 1ULL * 1024 * 1024 * 1024) {
849 			/*
850 			 * arm64 device with > 1GB of RAM:
851 			 * kernel uses 16KB pages.
852 			 */
853 			PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
854 		} else {
855 			/*
856 			 * arm64 device with <= 1GB of RAM:
857 			 * kernel uses hardware page size
858 			 * (4KB for H6/H7, 16KB for H8+).
859 			 */
860 			PAGE_SHIFT_CONST = ARM_PGSHIFT;
861 		}
862 
863 		/* 32-bit apps always see 16KB page size */
864 		page_shift_user32 = PAGE_MAX_SHIFT;
865 #ifdef  APPLETYPHOON
866 		if (PE_parse_boot_argn("cpus_defeatures", &cpus_defeatures, sizeof(cpus_defeatures))) {
867 			if ((cpus_defeatures & 0xF) != 0) {
868 				cpu_defeatures_set(cpus_defeatures & 0xF);
869 			}
870 		}
871 #endif
872 	}
873 #endif
874 
875 	/* Enable SPTM stability hacks if requested */
876 	PE_parse_boot_argn("sptm_stability_hacks", &sptm_stability_hacks, sizeof(sptm_stability_hacks));
877 
878 	ml_parse_cpu_topology();
879 
880 	siq_init();
881 
882 	master_cpu = ml_get_boot_cpu_number();
883 	assert(master_cpu >= 0 && master_cpu <= ml_get_max_cpu_number());
884 
885 	BootCpuData.cpu_number = (unsigned short)master_cpu;
886 	BootCpuData.intstack_top = (vm_offset_t) &intstack_top;
887 	BootCpuData.istackptr = &intstack_top;
888 	BootCpuData.excepstack_top = (vm_offset_t) &excepstack_top;
889 	BootCpuData.excepstackptr = &excepstack_top;
890 	CpuDataEntries[master_cpu].cpu_data_vaddr = &BootCpuData;
891 	CpuDataEntries[master_cpu].cpu_data_paddr = (void *)((uintptr_t)(args->physBase)
892 	    + ((uintptr_t)&BootCpuData
893 	    - (uintptr_t)(args->virtBase)));
894 
895 	thread = thread_bootstrap();
896 	thread->machine.CpuDatap = &BootCpuData;
897 	thread->machine.pcpu_data_base_and_cpu_number =
898 	    ml_make_pcpu_base_and_cpu_number(0, BootCpuData.cpu_number);
899 	machine_set_current_thread(thread);
900 
901 	/*
902 	 * Preemption is enabled for this thread so that it can lock mutexes without
903 	 * tripping the preemption check. In reality scheduling is not enabled until
904 	 * this thread completes, and there are no other threads to switch to, so
905 	 * preemption level is not really meaningful for the bootstrap thread.
906 	 */
907 	thread->machine.preemption_count = 0;
908 	cpu_bootstrap();
909 
910 	rtclock_early_init();
911 
912 	kernel_debug_string_early("kernel_startup_bootstrap");
913 	kernel_startup_bootstrap();
914 
915 	/*
916 	 * Initialize the timer callout world
917 	 */
918 	timer_call_init();
919 
920 	cpu_init();
921 
922 	processor_bootstrap();
923 
924 	if (PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) {
925 		xmaxmem = (uint64_t) maxmem * (1024 * 1024);
926 	} else if (PE_get_default("hw.memsize", &memsize, sizeof(memsize))) {
927 		xmaxmem = (uint64_t) memsize;
928 	} else {
929 		xmaxmem = 0;
930 	}
931 
932 #if SCHED_HYGIENE_DEBUG
933 	{
934 		int wdt_boot_arg = 0;
935 		bool const wdt_disabled = (PE_parse_boot_argn("wdt", &wdt_boot_arg, sizeof(wdt_boot_arg)) && (wdt_boot_arg == -1));
936 
937 		/* Disable if WDT is disabled */
938 		if (wdt_disabled || kern_feature_override(KF_INTERRUPT_MASKED_DEBUG_OVRD)) {
939 			interrupt_masked_debug_mode = SCHED_HYGIENE_MODE_OFF;
940 		}
941 		if (wdt_disabled || kern_feature_override(KF_PREEMPTION_DISABLED_DEBUG_OVRD)) {
942 			sched_preemption_disable_debug_mode = SCHED_HYGIENE_MODE_OFF;
943 		}
944 	}
945 #endif /* SCHED_HYGIENE_DEBUG */
946 
947 	nanoseconds_to_absolutetime(XCALL_ACK_TIMEOUT_NS, &xcall_ack_timeout_abstime);
948 
949 #if HAS_BP_RET
950 	PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
951 	set_bp_ret(); // Apply branch predictor retention settings to boot CPU
952 #endif
953 
954 	PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt));
955 
956 #if __ARM_PAN_AVAILABLE__
957 	__builtin_arm_wsr("pan", 1);
958 #endif  /* __ARM_PAN_AVAILABLE__ */
959 
960 #if HAS_MTE
961 	arm_mte_tag_generator_init(true);
962 #endif
963 
964 	/**
965 	 * Check SPTM feature flag for ARM_LARGE_MEMORY irrespective of XNU
966 	 * definition to detect mismatch in cases where ARM_LARGE_MEMORY is
967 	 * defined in SPTM but not in XNU and vice versa.
968 	 */
969 	const uint64_t sptm_is_large_memory = SPTMArgs->feature_flags & SPTM_FEATURE_LARGE_MEMORY;
970 	const uint64_t sptm_is_large_memory_kernonly = SPTMArgs->feature_flags & SPTM_FEATURE_LARGE_MEMORY_KERNONLY;
971 #if ARM_LARGE_MEMORY
972 	const uint64_t xnu_is_large_memory = SPTM_FEATURE_LARGE_MEMORY;
973 #if ARM_LARGE_MEMORY_KERNONLY
974 	const uint64_t xnu_is_large_memory_kernonly = SPTM_FEATURE_LARGE_MEMORY_KERNONLY;
975 #else /* ARM_LARGE_MEMORY_KERNONLY */
976 	const uint64_t xnu_is_large_memory_kernonly = 0;
977 #endif /* ARM_LARGE_MEMORY_KERNONLY */
978 #else /* ARM_LARGE_MEMORY */
979 	const uint64_t xnu_is_large_memory = 0;
980 	const uint64_t xnu_is_large_memory_kernonly = 0;
981 #endif /* ARM_LARGE_MEMORY */
982 
983 	if (sptm_is_large_memory != xnu_is_large_memory) {
984 		panic("Mismatch of ARM_LARGE_MEMORY in SPTM (%#llx)/XNU (%#llx)", sptm_is_large_memory, xnu_is_large_memory);
985 	}
986 
987 	if (sptm_is_large_memory_kernonly != xnu_is_large_memory_kernonly) {
988 		panic("Mismatch of ARM_LARGE_MEMORY_KERNONLY in SPTM (%#llx)/XNU (%#llx)",
989 		    sptm_is_large_memory_kernonly, xnu_is_large_memory_kernonly);
990 	}
991 
992 	/*
993 	 * gPhysBase/Size only represent kernel-managed memory. These globals represent
994 	 * the actual DRAM base address and size as reported by iBoot through the
995 	 * device tree.
996 	 */
997 	unsigned long const *dram_base;
998 	unsigned long const *dram_size;
999 	if (SecureDTLookupEntry(NULL, "/chosen", &chosen) != kSuccess) {
1000 		panic("%s: Unable to find 'chosen' DT node", __FUNCTION__);
1001 	}
1002 
1003 	if (SecureDTGetProperty(chosen, "dram-base", (void const **)&dram_base, &dt_entry_size) != kSuccess) {
1004 		panic("%s: Unable to find 'dram-base' entry in the 'chosen' DT node", __FUNCTION__);
1005 	}
1006 
1007 	if (SecureDTGetProperty(chosen, "dram-size", (void const **)&dram_size, &dt_entry_size) != kSuccess) {
1008 		panic("%s: Unable to find 'dram-size' entry in the 'chosen' DT node", __FUNCTION__);
1009 	}
1010 
1011 	gDramBase = *dram_base;
1012 	gDramSize = *dram_size;
1013 	pmap_first_pnum = (ppnum_t)atop(gDramBase);
1014 
1015 	arm_vm_init(xmaxmem, args);
1016 
1017 	if (debug_boot_arg) {
1018 		patch_low_glo();
1019 	}
1020 
1021 #if __arm64__ && WITH_CLASSIC_S2R
1022 	sleep_token_buffer_init();
1023 #endif
1024 
1025 	PE_consistent_debug_inherit();
1026 
1027 	/* Setup debugging output. */
1028 	const unsigned int serial_exists = serial_init();
1029 	kernel_startup_initialize_upto(STARTUP_SUB_KPRINTF);
1030 	kprintf("kprintf initialized\n");
1031 
1032 	/**
1033 	 * Disable SPTM serial output just after XNU serial initialization
1034 	 * since serial_init() can itself panic in various cases. Most commonly
1035 	 * seen hard to debug issue being user error with bad setting of serial
1036 	 * boot-args such as serial-device/serial-device-name.
1037 	 */
1038 	sptm_serial_disable();
1039 
1040 	serialmode = 0;
1041 	if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
1042 		/* Do we want a serial keyboard and/or console? */
1043 		kprintf("Serial mode specified: %08X\n", serialmode);
1044 		disable_iolog_serial_output = (serialmode & SERIALMODE_NO_IOLOG) != 0;
1045 		enable_dklog_serial_output = restore_boot || (serialmode & SERIALMODE_DKLOG) != 0;
1046 		int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
1047 		if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
1048 			if (force_sync) {
1049 				serialmode |= SERIALMODE_SYNCDRAIN;
1050 				kprintf(
1051 					"WARNING: Forcing uart driver to output synchronously."
1052 					"printf()s/IOLogs will impact kernel performance.\n"
1053 					"You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
1054 			}
1055 		}
1056 	}
1057 	if (kern_feature_override(KF_SERIAL_OVRD)) {
1058 		serialmode = 0;
1059 	}
1060 
1061 	/* Start serial if requested and a serial device was enumerated in serial_init(). */
1062 	if ((serialmode & SERIALMODE_OUTPUT) && serial_exists) {
1063 		serial_console_enabled = true;
1064 		(void)switch_to_serial_console(); /* Switch into serial mode from video console */
1065 		disableConsoleOutput = FALSE;     /* Allow printfs to happen */
1066 	}
1067 	PE_create_console();
1068 
1069 	/* setup console output */
1070 	PE_init_printf(FALSE);
1071 
1072 #if __arm64__
1073 #if DEBUG
1074 	dump_kva_space();
1075 #endif
1076 #endif
1077 
1078 	cpu_machine_idle_init(TRUE);
1079 
1080 	PE_init_platform(TRUE, &BootCpuData);
1081 
1082 	/* Initialize the debug infrastructure system-wide and on the local core. */
1083 	pe_arm_debug_init_early(&BootCpuData);
1084 
1085 #if RELEASE
1086 	/* Validate SPTM variant. */
1087 	if (const_sptm_args.sptm_variant != SPTM_VARIANT_RELEASE) {
1088 		panic("arm_init: Development SPTM / Release XNU is not a supported configuration.");
1089 	}
1090 #endif /* RELEASE */
1091 
1092 #if __arm64__
1093 	extern bool cpu_config_correct;
1094 	if (!cpu_config_correct) {
1095 		panic("The cpumask=N boot arg cannot be used together with cpus=N, and the boot CPU must be enabled");
1096 	}
1097 
1098 	ml_map_cpu_pio();
1099 #endif
1100 
1101 	cpu_timebase_init(TRUE);
1102 
1103 #if KPERF
1104 	/* kptimer_curcpu_up() must be called after cpu_timebase_init */
1105 	kptimer_curcpu_up();
1106 #endif /* KPERF */
1107 
1108 	PE_init_cpu();
1109 #if __arm64__
1110 	apt_msg_init();
1111 	apt_msg_init_cpu();
1112 #endif
1113 	fiq_context_init(TRUE);
1114 
1115 
1116 #if HIBERNATION
1117 	pal_hib_init();
1118 #endif /* HIBERNATION */
1119 
1120 	/*
1121 	 * Initialize the stack protector for all future calls
1122 	 * to C code. Since kernel_bootstrap() eventually
1123 	 * switches stack context without returning through this
1124 	 * function, we do not risk failing the check even though
1125 	 * we mutate the guard word during execution.
1126 	 */
1127 	__stack_chk_guard = (unsigned long)early_random();
1128 	/* Zero a byte of the protector to guard
1129 	 * against string vulnerabilities
1130 	 */
1131 	__stack_chk_guard &= ~(0xFFULL << 8);
1132 	machine_startup(args);
1133 }
1134 
1135 /*
1136  * Routine:        arm_init_cpu
1137  * Function:
1138  *    Runs on S2R resume (all CPUs) and SMP boot (non-boot CPUs only).
1139  */
1140 
1141 void
arm_init_cpu(cpu_data_t * cpu_data_ptr,__unused uint64_t hibernation_args)1142 arm_init_cpu(
1143 	cpu_data_t       *cpu_data_ptr,
1144 	__unused uint64_t hibernation_args)
1145 {
1146 #if HIBERNATION
1147 	sptm_hibernation_args_xnu_t *hibargs = (sptm_hibernation_args_xnu_t *)hibernation_args;
1148 
1149 	if ((hibargs != 0) && (hibargs->hib_header_phys != 0) && (hibargs->handoff_page_count > 0)) {
1150 		/*
1151 		 * We must copy the handoff region before anything else because the physical pages
1152 		 * holding the handoff region are not tracked by xnu as in-use.
1153 		 */
1154 		HibernationCopyHandoffRegionFromPageArray(&hibargs->handoff_pages[0], hibargs->handoff_page_count);
1155 	}
1156 #endif /* HIBERNATION */
1157 
1158 #if __ARM_PAN_AVAILABLE__
1159 	__builtin_arm_wsr("pan", 1);
1160 #endif
1161 
1162 #ifdef __arm64__
1163 	configure_timer_apple_regs();
1164 	configure_misc_apple_regs(false);
1165 #endif
1166 #if HAS_ARM_FEAT_SME
1167 	if (enable_sme) {
1168 		arm_sme_init(false);
1169 	}
1170 #endif
1171 
1172 	os_atomic_andnot(&cpu_data_ptr->cpu_flags, SleepState, relaxed);
1173 
1174 	siq_cpu_init();
1175 
1176 	machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
1177 
1178 #if HAS_MTE
1179 	arm_mte_tag_generator_init(false);
1180 #endif
1181 
1182 #if HIBERNATION
1183 	if (hibargs != 0 && hibargs->hib_header_phys != 0) {
1184 		gIOHibernateState = kIOHibernateStateWakingFromHibernate;
1185 		uart_hibernation = true;
1186 
1187 #if HAS_MTE
1188 		/*
1189 		 * On hibernation exit, the hibtext had copied the hibernation
1190 		 * header into a "borrowed" free physical page, by simply
1191 		 * picking a physical page that was not covered by the
1192 		 * hibernation image (meaning that xnu does not care about its
1193 		 * content). This was done to make sure the hibernation header
1194 		 * itself would not be overwritten by hibernation restore.
1195 		 *
1196 		 * MTE however keeps some nominally "free" pages in so called
1197 		 * "freepage queues". Just like regular free pages, their content
1198 		 * does not matter and they are not hibernated, but they are kept
1199 		 * for easier MTE page hand-out, and as such have MAIR=0x4 set.
1200 		 * I.e., they are effetively MTE-tagged.
1201 		 *
1202 		 * If the hibtext, who has no idea what MAIR a "free" page
1203 		 * has, happens to pick such a page, then the code below will
1204 		 * effectively try to access an MTE tagged page using an
1205 		 * untagged physical aperture pointer, originally resulting in
1206 		 * a tag check exception.
1207 		 *
1208 		 * At this still early point in hibernation, this is easily
1209 		 * circumenvented by temporarily turning off MTE tag checking
1210 		 * altogether.
1211 		 */
1212 		vm_memtag_disable_checking();
1213 #endif /* HAS_MTE */
1214 
1215 		__nosan_memcpy(gIOHibernateCurrentHeader, (void*)phystokv(hibargs->hib_header_phys), sizeof(IOHibernateImageHeader));
1216 
1217 #if HAS_MTE
1218 		vm_memtag_enable_checking();
1219 #endif /* HAS_MTE */
1220 	}
1221 	if ((cpu_data_ptr == &BootCpuData) && (gIOHibernateState == kIOHibernateStateWakingFromHibernate) && ml_is_quiescing()) {
1222 		// the "normal" S2R code captures wake_abstime too early, so on a hibernation resume we fix it up here
1223 		extern uint64_t wake_abstime;
1224 		wake_abstime = gIOHibernateCurrentHeader->lastHibAbsTime;
1225 
1226 		// since the hw clock stops ticking across hibernation, we need to apply an offset;
1227 		// iBoot computes this offset for us and passes it via the hibernation header
1228 		extern uint64_t hwclock_conttime_offset;
1229 		hwclock_conttime_offset = gIOHibernateCurrentHeader->hwClockOffset;
1230 
1231 		// during hibernation, we captured the idle thread's state from inside the PPL context, so we have to
1232 		// fix up its preemption count
1233 		unsigned int expected_preemption_count = (gEnforcePlatformActionSafety ? 2 : 1);
1234 		if (get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread) !=
1235 		    expected_preemption_count) {
1236 			panic("unexpected preemption count %u on boot cpu thread (should be %u)",
1237 			    get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread),
1238 			    expected_preemption_count);
1239 		}
1240 		cpu_data_ptr->cpu_active_thread->machine.preemption_count--;
1241 	}
1242 #endif /* HIBERNATION */
1243 
1244 #if __arm64__
1245 	wfe_timeout_init();
1246 	flush_mmu_tlb();
1247 #endif
1248 
1249 	cpu_machine_idle_init(FALSE);
1250 
1251 	cpu_init();
1252 
1253 #ifdef  APPLETYPHOON
1254 	if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
1255 		cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
1256 	}
1257 #endif
1258 	/* Initialize the timebase before serial_init, as some serial
1259 	 * drivers use mach_absolute_time() to implement rate control
1260 	 */
1261 	cpu_timebase_init(FALSE);
1262 
1263 #if KPERF
1264 	/* kptimer_curcpu_up() must be called after cpu_timebase_init */
1265 	kptimer_curcpu_up();
1266 #endif /* KPERF */
1267 
1268 	if (cpu_data_ptr == &BootCpuData && ml_is_quiescing()) {
1269 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
1270 		/*
1271 		 * Prevent CPUs from going into deep sleep until all
1272 		 * CPUs are ready to do so.
1273 		 */
1274 		arm64_stall_sleep = TRUE;
1275 #endif
1276 		serial_init();
1277 		PE_init_platform(TRUE, NULL);
1278 		commpage_update_timebase();
1279 
1280 		exclaves_update_timebase(EXCLAVES_CLOCK_ABSOLUTE,
1281 		    rtclock_base_abstime);
1282 #if HIBERNATION
1283 		if (gIOHibernateState == kIOHibernateStateWakingFromHibernate) {
1284 			exclaves_update_timebase(EXCLAVES_CLOCK_CONTINUOUS,
1285 			    hwclock_conttime_offset);
1286 		}
1287 #endif /* HIBERNATION */
1288 	}
1289 	PE_init_cpu();
1290 #if __arm64__
1291 	apt_msg_init_cpu();
1292 #endif
1293 
1294 	fiq_context_init(TRUE);
1295 	cpu_data_ptr->rtcPop = EndOfAllTime;
1296 	timer_resync_deadlines();
1297 
1298 	/* Start tracing (secondary CPU). */
1299 #if DEVELOPMENT || DEBUG
1300 	PE_arm_debug_enable_trace(true);
1301 #endif /* DEVELOPMENT || DEBUG */
1302 
1303 	kprintf("arm_cpu_init(): cpu %d online\n", cpu_data_ptr->cpu_number);
1304 
1305 	if (cpu_data_ptr == &BootCpuData && ml_is_quiescing()) {
1306 		if (kdebug_enable == 0) {
1307 			__kdebug_only uint64_t elapsed = kdebug_wake();
1308 			KDBG(IOKDBG_CODE(DBG_HIBERNATE, 15), mach_absolute_time() - elapsed);
1309 		}
1310 
1311 #if CONFIG_TELEMETRY
1312 		bootprofile_wake_from_sleep();
1313 #endif /* CONFIG_TELEMETRY */
1314 	}
1315 #if CONFIG_CPU_COUNTERS
1316 	mt_wake_per_core();
1317 #endif /* CONFIG_CPU_COUNTERS */
1318 
1319 #if defined(KERNEL_INTEGRITY_CTRR)
1320 	if (ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] != CTRR_LOCKED) {
1321 		lck_spin_lock(&ctrr_cpu_start_lck);
1322 		ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] = CTRR_LOCKED;
1323 		thread_wakeup(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id]);
1324 		lck_spin_unlock(&ctrr_cpu_start_lck);
1325 	}
1326 #endif
1327 
1328 
1329 	secondary_cpu_main(NULL);
1330 }
1331 
1332 /*
1333  * Routine:		arm_init_idle_cpu
1334  * Function:	Resume from non-retention WFI.  Called from the reset vector.
1335  */
1336 void __attribute__((noreturn))
arm_init_idle_cpu(cpu_data_t * cpu_data_ptr)1337 arm_init_idle_cpu(
1338 	cpu_data_t      *cpu_data_ptr)
1339 {
1340 #if __ARM_PAN_AVAILABLE__
1341 	__builtin_arm_wsr("pan", 1);
1342 #endif
1343 
1344 	machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
1345 
1346 #if __arm64__
1347 	wfe_timeout_init();
1348 #endif
1349 
1350 #ifdef  APPLETYPHOON
1351 	if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
1352 		cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
1353 	}
1354 #endif
1355 
1356 	/*
1357 	 * Update the active debug object to reflect that debug registers have been reset.
1358 	 * This will force any thread with active debug state to resync the debug registers
1359 	 * if it returns to userspace on this CPU.
1360 	 */
1361 	if (cpu_data_ptr->cpu_user_debug != NULL) {
1362 		arm_debug_set(NULL);
1363 	}
1364 
1365 	fiq_context_init(FALSE);
1366 
1367 	cpu_idle_exit(TRUE);
1368 }
1369 
1370 vm_map_address_t
phystokv(pmap_paddr_t pa)1371 phystokv(pmap_paddr_t pa)
1372 {
1373 	sptm_papt_t va;
1374 	if (sptm_phystokv(pa, &va) != LIBSPTM_SUCCESS) {
1375 		return 0;
1376 	}
1377 	return (vm_map_address_t)va;
1378 }
1379 
1380 vm_map_address_t
phystokv_range(pmap_paddr_t pa,vm_size_t * max_len)1381 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
1382 {
1383 
1384 	vm_size_t len;
1385 
1386 	len = PAGE_SIZE - (pa & PAGE_MASK);
1387 	if (*max_len > len) {
1388 		*max_len = len;
1389 	}
1390 
1391 	return phystokv((sptm_paddr_t)pa);
1392 }
1393 
1394 vm_offset_t
ml_static_vtop(vm_offset_t va)1395 ml_static_vtop(vm_offset_t va)
1396 {
1397 	return (vm_offset_t)kvtophys_nofail((sptm_papt_t)va);
1398 }
1399 
1400 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
1401 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
1402 
1403 // Populate seg...AuxKC and fixup AuxKC permissions
1404 static bool
arm_vm_auxkc_init(void)1405 arm_vm_auxkc_init(void)
1406 {
1407 	if (auxkc_mh == 0 || auxkc_base == 0) {
1408 		return false; // no auxKC.
1409 	}
1410 
1411 	/* Fixup AuxKC and populate seg*AuxKC globals used below */
1412 	arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1413 
1414 	/*
1415 	 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1416 	 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1417 	 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1418 	 * its RO region before extending it.
1419 	 */
1420 	assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1421 	assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1422 	if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1423 		segHIGHESTROAuxKC = segHIGHESTAuxKC;
1424 	}
1425 
1426 	/*
1427 	 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1428 	 * by CTRR, and the AuxKC RX region must be within the RO region.
1429 	 */
1430 	assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1431 	assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1432 	assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1433 	assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1434 
1435 	return true;
1436 }
1437 
1438 /*
1439  * Looks up the set of properties that describe the physical load addresses and sizes of the boot
1440  * kernelcache's loaded segments in the device tree and returns (1) the number of segments found
1441  * in *arm_vm_kernelcache_numrangesp and (2) their starting/ending addresses as an array of type
1442  * arm_physrange_t in *arm_vm_kernelcache_rangesp.
1443  * The function returns the total number of pages across all loaded boot kernelcache segments.
1444  * If there is a problem looking up the /chosen/memory-map node in the DT, all arguments are
1445  * zeroed and the function returns 0.
1446  */
1447 static unsigned int
arm_get_bootkc_ranges_from_DT(const arm_physrange_t ** arm_vm_kernelcache_rangesp,int * arm_vm_kernelcache_numrangesp)1448 arm_get_bootkc_ranges_from_DT(const arm_physrange_t **arm_vm_kernelcache_rangesp, int *arm_vm_kernelcache_numrangesp)
1449 {
1450 	DTEntry memory_map;
1451 	int err;
1452 	DTMemoryMapRange const *range;
1453 	unsigned int rangeSize;
1454 #define NUM_BOOTKC_RANGES 5
1455 	static arm_physrange_t bootkc_physranges[NUM_BOOTKC_RANGES] = { {0, } };
1456 	static int bootkc_numranges = 0;
1457 	static unsigned int bootkc_total_pages = 0;
1458 
1459 	assert(arm_vm_kernelcache_rangesp != NULL);
1460 	assert(arm_vm_kernelcache_numrangesp != NULL);
1461 
1462 	/* return cached values if previously computed */
1463 	if (bootkc_numranges == 0) {
1464 		err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1465 		if (err != kSuccess) {
1466 			*arm_vm_kernelcache_numrangesp = 0;
1467 			*arm_vm_kernelcache_rangesp = NULL;
1468 			return 0;
1469 		}
1470 
1471 		/* We're looking for 5 ranges: BootKC-ro, BootKC-rx, BootKC-bx, BootKC-rw, and BootKC-le */
1472 		const char *BootKC_Properties[NUM_BOOTKC_RANGES] = {
1473 			"BootKC-ro", "BootKC-rx", "BootKC-bx", "BootKC-rw", "BootKC-le"
1474 		};
1475 
1476 		for (int i = 0; i < NUM_BOOTKC_RANGES; i++) {
1477 			err = SecureDTGetProperty(memory_map, BootKC_Properties[i], (void const **)&range, &rangeSize);
1478 			if (err == kSuccess && rangeSize == sizeof(DTMemoryMapRange)) {
1479 				bootkc_physranges[i].start_phys = range->paddr;
1480 				bootkc_physranges[i].end_phys = range->paddr + range->length;
1481 				assert((bootkc_physranges[i].end_phys & PAGE_MASK) == 0);
1482 				bootkc_numranges++;
1483 				bootkc_total_pages += (unsigned int) atop_64(bootkc_physranges[i].end_phys - bootkc_physranges[i].start_phys);
1484 			}
1485 		}
1486 	}
1487 
1488 	*arm_vm_kernelcache_numrangesp = bootkc_numranges;
1489 	*arm_vm_kernelcache_rangesp = &bootkc_physranges[0];
1490 	return bootkc_total_pages;
1491 }
1492 
1493 void
arm_vm_prot_init(__unused boot_args * args)1494 arm_vm_prot_init(__unused boot_args * args)
1495 {
1496 	segLOWESTTEXT = UINT64_MAX;
1497 	if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1498 		segLOWESTTEXT = segPRELINKTEXTB;
1499 	}
1500 	assert(segSizeTEXT);
1501 	if (segTEXTB < segLOWESTTEXT) {
1502 		segLOWESTTEXT = segTEXTB;
1503 	}
1504 	assert(segLOWESTTEXT < UINT64_MAX);
1505 
1506 	segEXTRADATA = 0;
1507 	segSizeEXTRADATA = 0;
1508 	segTRUSTCACHE = 0;
1509 	segSizeTRUSTCACHE = 0;
1510 
1511 	segLOWEST = segLOWESTTEXT;
1512 	segLOWESTRO = segLOWESTTEXT;
1513 
1514 	if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1515 		/*
1516 		 * kernel collections have segments below the kernel. In particular the collection mach header
1517 		 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1518 		 */
1519 		segLOWEST = segLOWESTKC;
1520 		if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1521 			segLOWESTRO = segLOWESTROKC;
1522 		}
1523 		if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1524 			segHIGHESTRO = segHIGHESTROKC;
1525 		}
1526 	}
1527 
1528 	DTEntry memory_map;
1529 	int err;
1530 
1531 	// Device Tree portion of EXTRADATA
1532 	if (SecureDTIsLockedDown()) {
1533 		segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1534 		segSizeEXTRADATA = PE_state.deviceTreeSize;
1535 	}
1536 
1537 	// Trust Caches portion of EXTRADATA
1538 	{
1539 		DTMemoryMapRange const *trustCacheRange;
1540 		unsigned int trustCacheRangeSize;
1541 
1542 		err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1543 		assert(err == kSuccess);
1544 
1545 		err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1546 		if (err == kSuccess) {
1547 			if (trustCacheRangeSize != sizeof(DTMemoryMapRange)) {
1548 				panic("Unexpected /chosen/memory-map/TrustCache property size %u != %zu", trustCacheRangeSize, sizeof(DTMemoryMapRange));
1549 			}
1550 
1551 			vm_offset_t const trustCacheRegion = phystokv(trustCacheRange->paddr);
1552 			if (trustCacheRegion < segLOWEST) {
1553 				if (segEXTRADATA != 0) {
1554 					if (trustCacheRegion != segEXTRADATA + segSizeEXTRADATA) {
1555 						panic("Unexpected location of TrustCache region: %#lx != %#lx",
1556 						    trustCacheRegion, segEXTRADATA + segSizeEXTRADATA);
1557 					}
1558 					segSizeEXTRADATA += trustCacheRange->length;
1559 				} else {
1560 					// Not all devices support CTRR device trees.
1561 					segEXTRADATA = trustCacheRegion;
1562 					segSizeEXTRADATA = trustCacheRange->length;
1563 				}
1564 			}
1565 			segTRUSTCACHE = trustCacheRegion;
1566 			segSizeTRUSTCACHE = trustCacheRange->length;
1567 		}
1568 	}
1569 
1570 	if (segSizeEXTRADATA != 0) {
1571 		if (segEXTRADATA <= segLOWEST) {
1572 			segLOWEST = segEXTRADATA;
1573 			if (segEXTRADATA <= segLOWESTRO) {
1574 				segLOWESTRO = segEXTRADATA;
1575 			}
1576 		} else {
1577 			panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1578 		}
1579 	}
1580 
1581 	/* Record the bounds of the kernelcache. */
1582 	vm_kernelcache_base = segLOWEST;
1583 
1584 	auxkc_mh = SPTMArgs->auxkc_mh;
1585 	auxkc_base = SPTMArgs->auxkc_base;
1586 	end_kern = SPTMArgs->auxkc_end;
1587 
1588 	vm_kernelcache_top = end_kern;
1589 }
1590 
1591 static void
arm_vm_slide_region(vm_offset_t phys_start,size_t size)1592 arm_vm_slide_region(vm_offset_t phys_start, size_t size)
1593 {
1594 	sptm_slide_region(phys_start, (unsigned int)(size >> PAGE_SHIFT));
1595 }
1596 
1597 /*
1598  * return < 0 for a < b
1599  *          0 for a == b
1600  *        > 0 for a > b
1601  */
1602 typedef int (*cmpfunc_t)(const void *a, const void *b);
1603 
1604 extern void
1605 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1606 
1607 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1608 
1609 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1610 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1611 
1612 void
arm_vm_prot_finalize(boot_args * args __unused)1613 arm_vm_prot_finalize(boot_args * args __unused)
1614 {
1615 	/*
1616 	 * At this point, we are far enough along in the boot process that it will be
1617 	 * safe to free up all of the memory preceeding the kernel.  It may in fact
1618 	 * be safe to do this earlier.
1619 	 *
1620 	 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1621 	 * as usable.
1622 	 */
1623 
1624 	/* Slide KLDDATA */
1625 	arm_vm_slide_region(segKLDDATAB, segSizeKLDDATA);
1626 
1627 	/*
1628 	 * Replace the boot CPU's stacks with properly-guarded dynamically allocated stacks.
1629 	 * This must happen prior to sliding segBOOTDATAB, which will effectively remove
1630 	 * the existing boot stacks.
1631 	 */
1632 	cpu_stack_alloc(&BootCpuData);
1633 	arm64_replace_bootstack(&BootCpuData);
1634 
1635 	/* Slide early-boot data */
1636 	arm_vm_slide_region(segBOOTDATAB, segSizeBOOTDATA);
1637 
1638 	/* Slide linkedit, unless otherwise requested */
1639 	bool keep_linkedit = false;
1640 	PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1641 #if CONFIG_DTRACE
1642 	if (dtrace_keep_kernel_symbols()) {
1643 		keep_linkedit = true;
1644 	}
1645 #endif /* CONFIG_DTRACE */
1646 #if KASAN_DYNAMIC_DENYLIST
1647 	/* KASAN's dynamic denylist needs to query the LINKEDIT segment at runtime.  As such, the
1648 	 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1649 	keep_linkedit = true;
1650 #endif /* KASAN_DYNAMIC_DENYLIST */
1651 
1652 	if (!keep_linkedit) {
1653 		arm_vm_slide_region(segLINKB, segSizeLINK);
1654 		if (segSizePLKLINKEDIT) {
1655 			/* Prelinked kernel LINKEDIT */
1656 			arm_vm_slide_region(segPLKLINKEDITB, segSizePLKLINKEDIT);
1657 		}
1658 	}
1659 
1660 	/* Slide prelinked kernel plists */
1661 	arm_vm_slide_region(segPRELINKINFOB, segSizePRELINKINFO);
1662 
1663 	/*
1664 	 * Free the portion of memory that precedes the first usable region, known
1665 	 * as the physical slide.
1666 	 */
1667 	ml_static_mfree(SPTMArgs->phys_slide_papt, SPTMArgs->phys_slide_size);
1668 
1669 	/*
1670 	 * KTRR support means we will be mucking with these pages and trying to
1671 	 * protect them; we cannot free the pages to the VM if we do this.
1672 	 */
1673 	if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1674 		/* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1675 		ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1676 	}
1677 
1678 	ml_static_mfree(segBOOTDATAB, segSizeBOOTDATA);
1679 
1680 #if __ARM_KERNEL_PROTECT__
1681 	arm_vm_populate_kernel_el0_mappings();
1682 #endif /* __ARM_KERNEL_PROTECT__ */
1683 }
1684 
1685 /* allocate a page for a page table: we support static and dynamic mappings.
1686  *
1687  * returns a physical address for the allocated page
1688  *
1689  * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
1690  * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
1691  *
1692  * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
1693  */
1694 pmap_paddr_t
alloc_ptpage(sptm_pt_level_t level,bool map_static)1695 alloc_ptpage(sptm_pt_level_t level, bool map_static)
1696 {
1697 	pmap_paddr_t paddr = 0;
1698 
1699 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) || defined(KERNEL_INTEGRITY_PV_CTRR))
1700 	map_static = FALSE;
1701 #endif
1702 
1703 	/* Set the next free ropage if this is the first call to this function */
1704 	if (!ropage_next) {
1705 		ropage_next = (vm_offset_t)&ropagetable_begin;
1706 	}
1707 
1708 	if (map_static) {
1709 		/* This is a RO allocation. Make sure we have room in the ropagetable area */
1710 		assert(ropage_next < (vm_offset_t)&ropagetable_end);
1711 
1712 		/* Obtain physical address and increment the index into the ropagetable area */
1713 		paddr = (pmap_paddr_t)kvtophys((sptm_papt_t)ropage_next);
1714 		ropage_next += ARM_PGBYTES;
1715 	} else {
1716 		/* This is a RW allocation. Simply grab a page from [avail_start] */
1717 		paddr = avail_start;
1718 		avail_start += ARM_PGBYTES;
1719 	}
1720 
1721 	/* Retype the page to XNU_PAGE_TABLE, with the desired level */
1722 	sptm_retype_params_t retype_params;
1723 	retype_params.level = level;
1724 	sptm_retype(paddr, XNU_DEFAULT, XNU_PAGE_TABLE, retype_params);
1725 
1726 	return paddr;
1727 }
1728 
1729 /**
1730  * Initialize a vm_image_offsets structure with information obtained from a
1731  * Mach-O header for the wanted image.
1732  *
1733  * @param debug_header_entry The entry in the debug header images list to obtain
1734  *                           a pointer to the Mach-O header from. This must be
1735  *                           either the SPTM or TXM debug header entry.
1736  * @param offsets Output pointer of the vm_image_offsets structure to fill in.
1737  */
1738 static void
init_image_offsets(size_t debug_header_entry,vm_image_offsets * offsets)1739 init_image_offsets(size_t debug_header_entry, vm_image_offsets *offsets)
1740 {
1741 	assert(offsets != NULL);
1742 	assert((debug_header_entry == DEBUG_HEADER_ENTRY_SPTM) ||
1743 	    (debug_header_entry == DEBUG_HEADER_ENTRY_TXM));
1744 
1745 	offsets->slid_base = (vm_offset_t)SPTMArgs->debug_header->image[debug_header_entry];
1746 	kernel_mach_header_t *macho = (kernel_mach_header_t*)offsets->slid_base;
1747 	offsets->unslid_base = (vm_offset_t)getsegbynamefromheader(macho, "__TEXT")->vmaddr;
1748 	assert((offsets->slid_base != 0) && (offsets->unslid_base != 0));
1749 	offsets->slide = offsets->slid_base - offsets->unslid_base;
1750 	offsets->unslid_top = getlastaddr(macho);
1751 	offsets->slid_top = offsets->unslid_top + offsets->slide;
1752 }
1753 
1754 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1755 #define ARM64_PHYSMAP_SLIDE_MASK  (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1756 
1757 void
arm_vm_init(uint64_t memory_size_override,boot_args * args)1758 arm_vm_init(uint64_t memory_size_override, boot_args * args)
1759 {
1760 	vm_map_address_t va_l1, va_l1_end;
1761 	tt_entry_t       *cpu_l1_tte;
1762 	tt_entry_t       *cpu_l2_tte;
1763 	vm_map_address_t va_l2, va_l2_end;
1764 	vm_map_address_t dynamic_memory_begin;
1765 	uint64_t         mem_segments;
1766 
1767 	/* Get the virtual and physical kernel-managed memory base from boot_args */
1768 	gVirtBase = args->virtBase;
1769 	gPhysBase = args->physBase;
1770 
1771 	/* Get the memory size */
1772 #if KASAN
1773 	real_phys_size = memSize + (shadow_ptop - shadow_pbase);
1774 #else
1775 	real_phys_size = memSize;
1776 #endif
1777 
1778 	/**
1779 	 * Ensure the physical region we specify for the VM to manage ends on a
1780 	 * software page boundary.  Note that the software page size (PAGE_SIZE)
1781 	 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1782 	 * We must round the reported memory size down to the nearest PAGE_SIZE
1783 	 * boundary to ensure the VM does not try to manage a page it does not
1784 	 * completely own.  The KASAN shadow region, if present, is managed entirely
1785 	 * in units of the hardware page size and should not need similar treatment.
1786 	 */
1787 	gPhysSize = mem_size = ((gPhysBase + memSize) & ~PAGE_MASK) - gPhysBase;
1788 
1789 #if HAS_MTE
1790 	boolean_t disable_mte = FALSE;
1791 	PE_parse_boot_argn("-disable_mte", &disable_mte, sizeof(disable_mte));
1792 	is_mte_enabled = !disable_mte;
1793 
1794 	/*
1795 	 * As described above, TCFs taken while the kernel was accessing IOMD memory are attributed to the userspace task
1796 	 * that provided the memory that the IOMD was materialized from. This results in the user task being killed.
1797 	 * To aid debugging, enabling this boot arg will cause the kernel to instead immediately panic when it encounters
1798 	 * a TCF under these circumstances.
1799 	 */
1800 	PE_parse_boot_argn("panic_on_iomd_tagged_access", &panic_on_user_induced_iomd_kernel_faults, sizeof(panic_on_user_induced_iomd_kernel_faults));
1801 #endif /* HAS_MTE */
1802 
1803 #if HAS_MTE && KASAN
1804 	/* Our current KASAN implementations don't work with MTE.
1805 	 *  Therefore, when running under KASAN, disable MTE outright. */
1806 	is_mte_enabled = FALSE;
1807 #endif /* HAS_MTE && KASAN */
1808 
1809 	/* Obtain total memory size, including non-managed memory */
1810 	mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1811 	if ((memory_size_override != 0) && (mem_size > memory_size_override)) {
1812 #if HAS_MTE
1813 		/*
1814 		 * When MTE is enabled, we cannot just override the size of the memory
1815 		 * because the tag storage region is usually at the end of memory
1816 		 * and tag storage pages need to be in the VM array.
1817 		 * Instead, initialize_ram_ranges will adjust the number of available
1818 		 * memory and tag storage pages to the VM
1819 		 */
1820 		if (!is_mte_enabled)
1821 #endif /* HAS_MTE */
1822 		{
1823 			mem_size = memory_size_override;
1824 		}
1825 		max_mem_actual = memory_size_override;
1826 	} else {
1827 		max_mem_actual = mem_actual;
1828 	}
1829 
1830 #if !defined(ARM_LARGE_MEMORY)
1831 	/* Make sure the system does not have more physical memory than what can be mapped */
1832 	if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1833 		panic("Unsupported memory configuration %lx", mem_size);
1834 	}
1835 #endif /* !defined(ARM_LARGE_MEMORY) */
1836 
1837 	physmap_base = SPTMArgs->physmap_base;
1838 	physmap_end = static_memory_end = SPTMArgs->physmap_end;
1839 
1840 #if KASAN && !defined(ARM_LARGE_MEMORY) && !defined(CONFIG_SPTM)
1841 	/* add the KASAN stolen memory to the physmap */
1842 	dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1843 #else
1844 	dynamic_memory_begin = static_memory_end;
1845 #endif
1846 
1847 	if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1848 		panic("Unsupported memory configuration %lx", mem_size);
1849 	}
1850 
1851 	/*
1852 	 * TODO: free bootstrap table memory back to allocator.
1853 	 * on large memory systems bootstrap tables could be quite large.
1854 	 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1855 	 * to trampoline to KVA. this requires only 3 pages to stay resident.
1856 	 */
1857 	avail_start = first_avail_phys;
1858 
1859 	/*
1860 	 * Initialize l1 page table page.
1861 	 *
1862 	 * SPTM TODO: Have a separate root_table_paddr field in the sptm_args
1863 	 *            instead of snooping the libsptm_state (XNU should not be
1864 	 *            snooping the libsptm_state directly in general).
1865 	 */
1866 	cpu_ttep = (pmap_paddr_t)const_sptm_args.libsptm_state.root_table_paddr;
1867 	cpu_tte = (tt_entry_t *)phystokv(cpu_ttep);
1868 	avail_end = gPhysBase + mem_size;
1869 	assert(!(avail_end & PAGE_MASK));
1870 
1871 	/* These need to be set early so pa_valid() works */
1872 	vm_first_phys = gPhysBase;
1873 	vm_last_phys = trunc_page(avail_end);
1874 
1875 #if KASAN
1876 	real_avail_end = gPhysBase + real_phys_size;
1877 #else
1878 	real_avail_end = avail_end;
1879 #endif
1880 
1881 	/*
1882 	 * Now retrieve addresses for various segments from kernel mach-o header
1883 	 */
1884 	segPRELINKTEXTB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1885 	segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1886 	segPLKTEXTEXECB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1887 	segTEXTB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1888 	segDATACONSTB    = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1889 	segTEXTEXECB     = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1890 	segDATAB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1891 
1892 	segBOOTDATAB     = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1893 	segLINKB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1894 	segKLDB          = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1895 	segKLDDATAB      = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
1896 	segPRELINKDATAB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1897 	segPRELINKINFOB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1898 	segPLKLLVMCOVB   = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1899 	segPLKLINKEDITB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1900 	segLASTB         = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1901 	segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
1902 
1903 	sectHIBTEXTB     = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", &sectSizeHIBTEXT);
1904 	sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", &sectSizeHIBDATACONST);
1905 	segHIBDATAB      = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
1906 
1907 	if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1908 		kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
1909 
1910 		// fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
1911 		segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
1912 		assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC);                        // kernel PLK_TEXT_EXEC must be empty
1913 
1914 		assert(segLASTB);                                                      // kernel LAST can be empty, but it must have
1915 		                                                                       // a valid address for computations below.
1916 
1917 		assert(segKCTEXTEXECB <= segLASTB);                                    // KC TEXT_EXEC must contain kernel LAST
1918 		assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
1919 		segPLKTEXTEXECB = segLASTB + segSizeLAST;
1920 		segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
1921 
1922 		// fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
1923 		segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
1924 		assert(segPLKDATACONSTB && !segSizePLKDATACONST);                      // kernel PLK_DATA_CONST must be empty
1925 		assert(segLASTDATACONSTB && segSizeLASTDATACONST);                     // kernel LASTDATA_CONST must be non-empty
1926 		assert(segKCDATACONSTB <= segLASTDATACONSTB);                          // KC DATA_CONST must contain kernel LASTDATA_CONST
1927 		assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
1928 		segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
1929 		segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
1930 
1931 		// fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
1932 		segKCDATAB      = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
1933 		assert(segPRELINKDATAB && !segSizePRELINKDATA);                        // kernel PRELINK_DATA must be empty
1934 		assert(segKCDATAB <= segPRELINKDATAB);                                 // KC DATA must contain kernel PRELINK_DATA
1935 		assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
1936 		segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
1937 
1938 		// fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
1939 		assert(segPRELINKTEXTB && !segSizePRELINKTEXT);                        // kernel PRELINK_TEXT must be empty
1940 		segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1941 		assert(segPRELINKINFOB && !segSizePRELINKINFO);                        // kernel PRELINK_INFO must be empty
1942 		segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
1943 		segLINKB        = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
1944 	}
1945 
1946 	/* if one of the new segments is present, the other one better be as well */
1947 	if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1948 		assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1949 	}
1950 
1951 	etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1952 	sdata = (vm_offset_t) segDATAB;
1953 	edata = (vm_offset_t) segDATAB + segSizeDATA;
1954 	end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastkerneladdr()); /* Force end to next page */
1955 
1956 	vm_set_page_size();
1957 
1958 	vm_kernel_base = segTEXTB;
1959 	vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1960 	vm_kext_base = segPRELINKTEXTB;
1961 	vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1962 
1963 	vm_prelink_stext = segPRELINKTEXTB;
1964 	if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1965 		vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1966 	} else {
1967 		vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1968 	}
1969 	vm_prelink_sinfo = segPRELINKINFOB;
1970 	vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1971 	vm_slinkedit = segLINKB;
1972 	vm_elinkedit = segLINKB + segSizeLINK;
1973 
1974 	vm_prelink_sdata = segPRELINKDATAB;
1975 	vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1976 
1977 	arm_vm_prot_init(args);
1978 
1979 	/**
1980 	 * Count the number of pages the boot kernelcache occupies.  Additionally,
1981 	 * ml_static_mfree() uses the BootKC ranges from the DT to account for freed kernelcache pages.
1982 	 */
1983 	vm_page_kernelcache_count = arm_get_bootkc_ranges_from_DT(&arm_vm_kernelcache_ranges, &arm_vm_kernelcache_numranges);
1984 
1985 	assert(vm_page_kernelcache_count > 0);
1986 
1987 #if KASAN
1988 	/* record the extent of the physmap */
1989 	physmap_vbase = physmap_base;
1990 	physmap_vtop = physmap_end;
1991 	kasan_init();
1992 #endif /* KASAN */
1993 
1994 #if CONFIG_CPU_COUNTERS
1995 	mt_early_init();
1996 #endif /* CONFIG_CPU_COUNTERS */
1997 
1998 	kva_active = TRUE;
1999 
2000 	if (arm_vm_auxkc_init()) {
2001 		if (segLOWESTROAuxKC < segLOWESTRO) {
2002 			segLOWESTRO = segLOWESTROAuxKC;
2003 		}
2004 		if (segHIGHESTROAuxKC > segHIGHESTRO) {
2005 			segHIGHESTRO = segHIGHESTROAuxKC;
2006 		}
2007 		if (segLOWESTRXAuxKC < segLOWESTTEXT) {
2008 			segLOWESTTEXT = segLOWESTRXAuxKC;
2009 		}
2010 
2011 #if XNU_TARGET_OS_OSX
2012 		/**
2013 		 * If we are on macOS with 3P kexts, we disable
2014 		 * XNU_KERNEL_RESTRICTED for now.
2015 		 */
2016 		use_xnu_restricted = false;
2017 
2018 
2019 
2020 #endif /* XNU_TARGET_OS_OSX */
2021 	}
2022 
2023 
2024 	if (memory_size_override && memory_size_override < mem_size) {
2025 		max_mem = memory_size_override;
2026 		sane_size = memory_size_override - (avail_start - gPhysBase);
2027 	} else {
2028 		max_mem = mem_size;
2029 		sane_size = mem_size - (avail_start - gPhysBase);
2030 	}
2031 	// vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
2032 	vm_kernel_slid_base = segLOWESTTEXT;
2033 	vm_kernel_stext = segTEXTB;
2034 
2035 	if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2036 		vm_kernel_etext = segTEXTEXECB + segSizeTEXTEXEC;
2037 		vm_kernel_slid_top = vm_slinkedit;
2038 	} else {
2039 		assert(segDATACONSTB == segTEXTB + segSizeTEXT);
2040 		assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
2041 		vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
2042 		vm_kernel_slid_top = vm_prelink_einfo;
2043 	}
2044 
2045 	/**
2046 	 * Calculate the address ranges used to determine whether an address is an
2047 	 * SPTM or TXM address, as well as the slides used to slide/unslide those
2048 	 * addresses.
2049 	 *
2050 	 * The debug header contains pointers to the beginning of the images loaded
2051 	 * up by iBoot (which always start with the Mach-O header). The __TEXT
2052 	 * segment should be the first (and lowest) segment in both of these
2053 	 * binaries (the addresses in the Mach-O header are all unslid).
2054 	 */
2055 	init_image_offsets(DEBUG_HEADER_ENTRY_SPTM, &vm_sptm_offsets);
2056 	init_image_offsets(DEBUG_HEADER_ENTRY_TXM, &vm_txm_offsets);
2057 
2058 	dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
2059 
2060 	/* TODO: CONFIG_XNUPOST CTRR test */
2061 
2062 	pmap_bootstrap(dynamic_memory_begin);
2063 
2064 	disable_preemption();
2065 
2066 	/*
2067 	 * Initialize l3 page table pages :
2068 	 *   cover this address range:
2069 	 *    2MB + FrameBuffer size + 10MB for each 256MB segment
2070 	 *
2071 	 * Note: This does not allocate L3 page tables, since page tables for all static
2072 	 *       memory is allocated and inserted into the hierarchy by the SPTM beforehand.
2073 	 *       Instead, this code simply walks the page tables to find those pre-allocated
2074 	 *       tables and allocates PTD objects for them.
2075 	 */
2076 
2077 	mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
2078 
2079 	va_l1 = dynamic_memory_begin;
2080 	va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
2081 	va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
2082 	va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
2083 
2084 	cpu_l1_tte = cpu_tte + L1_TABLE_T1_INDEX(va_l1, TCR_EL1_BOOT);
2085 
2086 	while (va_l1 < va_l1_end) {
2087 		va_l2 = va_l1;
2088 
2089 		if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2090 			/* If this is the last L1 entry, it must cover the last mapping. */
2091 			va_l2_end = va_l1_end;
2092 		} else {
2093 			va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2094 		}
2095 
2096 		cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2097 
2098 		while (va_l2 < va_l2_end) {
2099 			/* Obtain pre-allocated page and setup L3 Table TTE in L2 */
2100 			tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
2101 			pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
2102 			pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2103 
2104 			va_l2 += ARM_TT_L2_SIZE;
2105 			cpu_l2_tte++;
2106 		}
2107 
2108 		va_l1 = va_l2_end;
2109 		cpu_l1_tte++;
2110 	}
2111 
2112 	/*
2113 	 * Initialize l3 page table pages :
2114 	 *   cover this address range:
2115 	 *   ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
2116 	 *
2117 	 * Note: This does not allocate L3 page tables, since page tables for all static
2118 	 *       memory is allocated and inserted into the hierarchy by the SPTM beforehand.
2119 	 *       Instead, this code simply walks the page tables to find those pre-allocated
2120 	 *       tables and allocates PTD objects for them.
2121 	 */
2122 	va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
2123 	va_l1_end = VM_MAX_KERNEL_ADDRESS;
2124 
2125 	cpu_l1_tte = cpu_tte + L1_TABLE_T1_INDEX(va_l1, TCR_EL1_BOOT);
2126 
2127 	while (va_l1 < va_l1_end) {
2128 		va_l2 = va_l1;
2129 
2130 		if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2131 			/* If this is the last L1 entry, it must cover the last mapping. */
2132 			va_l2_end = va_l1_end;
2133 		} else {
2134 			va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2135 		}
2136 
2137 		cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2138 
2139 		while (va_l2 < va_l2_end) {
2140 			/* Obtain pre-allocated page and setup L3 Table TTE in L2 */
2141 			tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
2142 			pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
2143 			pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2144 
2145 			va_l2 += ARM_TT_L2_SIZE;
2146 			cpu_l2_tte++;
2147 		}
2148 
2149 		va_l1 = va_l2_end;
2150 		cpu_l1_tte++;
2151 	}
2152 
2153 	/*
2154 	 * Adjust avail_start so that the range that the VM owns
2155 	 * starts on a PAGE_SIZE aligned boundary.
2156 	 */
2157 	avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
2158 
2159 	/* TODO pmap_static_allocations_done() */
2160 
2161 	first_avail = avail_start;
2162 	patch_low_glo_static_region(first_avail_phys, avail_start - first_avail_phys);
2163 	enable_preemption();
2164 }
2165 
2166 /*
2167  * Returns true if the address lies within __TEXT, __TEXT_EXEC or __DATA_CONST
2168  * segment range. This is what [vm_kernel_stext, vm_kernel_etext) used to cover.
2169  * The segments together may not make a continuous address space anymore and so
2170  * individual intervals are inspected.
2171  */
2172 bool
kernel_text_contains(vm_offset_t addr)2173 kernel_text_contains(vm_offset_t addr)
2174 {
2175 	if (segTEXTB <= addr && addr < (segTEXTB + segSizeTEXT)) {
2176 		return true;
2177 	}
2178 	if (segTEXTEXECB <= addr && addr < (segTEXTEXECB + segSizeTEXTEXEC)) {
2179 		return true;
2180 	}
2181 	return segDATACONSTB <= addr && addr < (segDATACONSTB + segSizeDATACONST);
2182 }
2183