1 /**
2 * Copyright (c) 2022-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm64/lowglobals.h>
30 #include <kern/ecc.h>
31 #include <kern/timer_queue.h>
32 #include <kern/monotonic.h>
33 #include <machine/commpage.h>
34 #include <pexpert/device_tree.h>
35 #include <arm/cpu_internal.h>
36 #include <arm/misc_protos.h>
37 #include <arm/machine_cpu.h>
38 #include <arm/rtclock.h>
39 #include <vm/vm_map.h>
40 #include <mach/exclaves.h>
41 #include <mach/vm_param.h>
42 #include <libkern/stack_protector.h>
43 #include <console/serial_protos.h>
44 #include <arm64/sptm/pmap/pmap_pt_geometry.h>
45 #include <arm64/sptm/sptm.h>
46 #include <sptm/sptm_common.h>
47 #include <vm/vm_page_internal.h>
48
49 #if CONFIG_TELEMETRY
50 #include <kern/telemetry.h>
51 #endif /* CONFIG_TELEMETRY */
52
53 #if KPERF
54 #include <kperf/kptimer.h>
55 #endif /* KPERF */
56
57 #if HIBERNATION
58 #include <IOKit/IOPlatformExpert.h>
59 #include <machine/pal_hibernate.h>
60 #endif /* HIBERNATION */
61
62 /**
63 * Functions defined elsewhere that are required by this source file.
64 */
65 extern void patch_low_glo(void);
66 extern int serial_init(void);
67 extern void sleep_token_buffer_init(void);
68
69 /**
70 * Bootstrap stacks. Used on the cold boot path to set up the boot CPU's
71 * per-CPU data structure.
72 */
73 extern vm_offset_t intstack_top;
74 extern vm_offset_t excepstack_top;
75
76 /* First (inclusive) and last (exclusive) physical addresses */
77 extern pmap_paddr_t vm_first_phys;
78 extern pmap_paddr_t vm_last_phys;
79
80 /* UART hibernation flag - import so we can set it ASAP on resume. */
81 extern MARK_AS_HIBERNATE_DATA bool uart_hibernation;
82
83 /* Used to cache memSize, as passed by iBoot */
84 SECURITY_READ_ONLY_LATE(uint64_t) memSize = 0;
85
86 int debug_task;
87
88 /**
89 * Set according to what serial-related boot-args have been passed to XUN.
90 */
91 extern int disableConsoleOutput;
92
93 #if XNU_TARGET_OS_OSX
94 /**
95 * Extern the PMAP boot-arg to enable/disable XNU_KERNEL_RESTRICTED.
96 * We need it here because if we detect an auxKC, we disable the mitigation.
97 */
98 extern bool use_xnu_restricted;
99 #endif /* XNU_TARGET_OS_OSX */
100
101 /**
102 * SPTM devices do not support static kernelcaches, but the rest of XNU
103 * expects this variable to be defined. Set it to false at build time.
104 */
105 SECURITY_READ_ONLY_LATE(bool) static_kernelcache = false;
106
107 TUNABLE(bool, restore_boot, "-restore", false);
108
109 /**
110 * First physical address freely available to xnu.
111 */
112 SECURITY_READ_ONLY_LATE(addr64_t) first_avail_phys = 0;
113
114 #if HAS_BP_RET
115 /* Enable both branch target retention (0x2) and branch direction retention (0x1) across sleep */
116 uint32_t bp_ret = 3;
117 extern void set_bp_ret(void);
118 #endif
119
120 #if SCHED_HYGIENE_DEBUG
121 boolean_t sched_hygiene_debug_pmc = 1;
122 #endif
123
124 #if SCHED_HYGIENE_DEBUG
125
126 #if XNU_PLATFORM_iPhoneOS
127 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 48000 /* 2ms */
128 #elif XNU_PLATFORM_XROS
129 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 12000 /* 500us */
130 #else
131 #define DEFAULT_INTERRUPT_MASKED_TIMEOUT 0xd0000 /* 35.499ms */
132 #endif /* XNU_PLATFORM_iPhoneOS */
133
134 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, interrupt_masked_debug_mode,
135 "machine-timeouts", "interrupt-masked-debug-mode",
136 "interrupt-masked-debug-mode",
137 SCHED_HYGIENE_MODE_PANIC,
138 TUNABLE_DT_CHECK_CHOSEN);
139
140 MACHINE_TIMEOUT_DEV_WRITEABLE(interrupt_masked_timeout, "interrupt-masked",
141 DEFAULT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
142 NULL);
143 #if __arm64__
144 #define SSHOT_INTERRUPT_MASKED_TIMEOUT 0xf9999 /* 64-bit: 42.599ms */
145 #endif
146 MACHINE_TIMEOUT_DEV_WRITEABLE(stackshot_interrupt_masked_timeout, "sshot-interrupt-masked",
147 SSHOT_INTERRUPT_MASKED_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE,
148 NULL);
149 #undef SSHOT_INTERRUPT_MASKED_TIMEOUT
150 #endif
151
152 /*
153 * A 6-second timeout will give the watchdog code a chance to run
154 * before a panic is triggered by the xcall routine.
155 */
156 #define XCALL_ACK_TIMEOUT_NS ((uint64_t) 6000000000)
157 uint64_t xcall_ack_timeout_abstime;
158
159 boot_args const_boot_args __attribute__((section("__DATA, __const")));
160 boot_args *BootArgs __attribute__((section("__DATA, __const")));
161
162 /**
163 * The SPTM provides a second set of boot arguments, on top of those
164 * provided by iBoot.
165 */
166 SECURITY_READ_ONLY_LATE(sptm_bootstrap_args_xnu_t) const_sptm_args;
167 SECURITY_READ_ONLY_LATE(const sptm_bootstrap_args_xnu_t *) SPTMArgs;
168 SECURITY_READ_ONLY_LATE(const bool *) sptm_xnu_triggered_panic_ptr;
169
170 extern char osbuild_config[];
171
172 TUNABLE(uint32_t, arm_diag, "diag", 0);
173 #ifdef APPLETYPHOON
174 static unsigned cpus_defeatures = 0x0;
175 extern void cpu_defeatures_set(unsigned int);
176 #endif
177
178 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
179 extern volatile boolean_t arm64_stall_sleep;
180 #endif
181
182 extern boolean_t force_immediate_debug_halt;
183
184 #if HAS_APPLE_PAC
185 SECURITY_READ_ONLY_LATE(boolean_t) diversify_user_jop = TRUE;
186 #endif
187
188
189 SECURITY_READ_ONLY_LATE(uint64_t) gDramBase;
190 SECURITY_READ_ONLY_LATE(uint64_t) gDramSize;
191
192 SECURITY_READ_ONLY_LATE(bool) serial_console_enabled = false;
193
194 /**
195 * SPTM TODO: The following flag is set up based on the presence and
196 * configuration of the 'sptm_stability_hacks' boot-arg; this
197 * is used in certain codepaths that do not properly function
198 * today in SPTM systems to make the system more stable and fully
199 * able to boot to user space.
200 */
201 SECURITY_READ_ONLY_LATE(bool) sptm_stability_hacks = false;
202
203 #if APPLEVIRTUALPLATFORM
204 SECURITY_READ_ONLY_LATE(vm_offset_t) reset_vector_vaddr = 0;
205 #endif /* APPLEVIRTUALPLATFORM */
206
207 /*
208 * Forward definition
209 */
210 void arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_args);
211 #if KASAN
212 void arm_init_kasan(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_args);
213 #endif /* KASAN */
214
215 #if __arm64__
216 unsigned int page_shift_user32; /* for page_size as seen by a 32-bit task */
217
218 extern void configure_misc_apple_boot_args(void);
219 extern void configure_misc_apple_regs(bool is_boot_cpu);
220 extern void configure_timer_apple_regs(void);
221 #endif /* __arm64__ */
222
223
224 /*
225 * JOP rebasing
226 */
227
228 #define dyldLogFunc(msg, ...)
229 #include <mach/dyld_kernel_fixups.h>
230
231 extern uint32_t __thread_starts_sect_start[] __asm("section$start$__TEXT$__thread_starts");
232 extern uint32_t __thread_starts_sect_end[] __asm("section$end$__TEXT$__thread_starts");
233 #if defined(HAS_APPLE_PAC)
234 extern void OSRuntimeSignStructors(kernel_mach_header_t * header);
235 extern void OSRuntimeSignStructorsInFileset(kernel_mach_header_t * header);
236 #endif /* defined(HAS_APPLE_PAC) */
237
238 extern vm_offset_t vm_kernel_slide;
239 extern vm_offset_t segLOWESTKC, segHIGHESTKC, segLOWESTROKC, segHIGHESTROKC;
240 extern vm_offset_t segLOWESTAuxKC, segHIGHESTAuxKC, segLOWESTROAuxKC, segHIGHESTROAuxKC;
241 extern vm_offset_t segLOWESTRXAuxKC, segHIGHESTRXAuxKC, segHIGHESTNLEAuxKC;
242
243 void arm_slide_rebase_and_sign_image(void);
244 MARK_AS_FIXUP_TEXT void
arm_slide_rebase_and_sign_image(void)245 arm_slide_rebase_and_sign_image(void)
246 {
247 kernel_mach_header_t *k_mh, *kc_mh = NULL;
248 kernel_segment_command_t *seg;
249 uintptr_t slide;
250
251 /*
252 * The kernel is part of a MH_FILESET kernel collection, determine slide
253 * based on first segment's mach-o vmaddr (requires first kernel load
254 * command to be LC_SEGMENT_64 of the __TEXT segment)
255 */
256 k_mh = &_mh_execute_header;
257 seg = (kernel_segment_command_t *)((uintptr_t)k_mh + sizeof(*k_mh));
258 assert(seg->cmd == LC_SEGMENT_KERNEL);
259 slide = (uintptr_t)k_mh - seg->vmaddr;
260
261 /*
262 * The kernel collection linker guarantees that the boot collection mach
263 * header vmaddr is the hardcoded kernel link address (as specified to
264 * ld64 when linking the kernel).
265 */
266 kc_mh = (kernel_mach_header_t*)(VM_KERNEL_LINK_ADDRESS + slide);
267 assert(kc_mh->filetype == MH_FILESET);
268
269 /*
270 * rebase and sign jops
271 * Note that we can't call any functions before this point, so
272 * we have to hard-code the knowledge that the base of the KC
273 * is the KC's mach-o header. This would change if any
274 * segment's VA started *before* the text segment
275 * (as the HIB segment does on x86).
276 */
277 const void *collection_base_pointers[KCNumKinds] = {[0] = kc_mh, };
278 kernel_collection_slide((struct mach_header_64 *)kc_mh, collection_base_pointers);
279 PE_set_kc_header(KCKindPrimary, kc_mh, slide);
280
281 /*
282 * iBoot doesn't slide load command vmaddrs in an MH_FILESET kernel
283 * collection, so adjust them now, and determine the vmaddr range
284 * covered by read-only segments for the CTRR rorgn.
285 */
286 kernel_collection_adjust_mh_addrs((struct mach_header_64 *)kc_mh, slide, false,
287 (uintptr_t *)&segLOWESTKC, (uintptr_t *)&segHIGHESTKC,
288 (uintptr_t *)&segLOWESTROKC, (uintptr_t *)&segHIGHESTROKC,
289 NULL, NULL, NULL);
290
291 /*
292 * Initialize slide global here to avoid duplicating this logic in
293 * arm_vm_init()
294 */
295 vm_kernel_slide = slide;
296 }
297
298 void
arm_auxkc_init(void * mh,void * base)299 arm_auxkc_init(void *mh, void *base)
300 {
301 /*
302 * The kernel collection linker guarantees that the lowest vmaddr in an
303 * AuxKC collection is 0 (but note that the mach header is higher up since
304 * RW segments precede RO segments in the AuxKC).
305 */
306 uintptr_t slide = (uintptr_t)base;
307 kernel_mach_header_t *akc_mh = (kernel_mach_header_t*)mh;
308
309 assert(akc_mh->filetype == MH_FILESET);
310 PE_set_kc_header_and_base(KCKindAuxiliary, akc_mh, base, slide);
311
312 /* rebase and sign jops */
313 const void *collection_base_pointers[KCNumKinds];
314 memcpy(collection_base_pointers, PE_get_kc_base_pointers(), sizeof(collection_base_pointers));
315 kernel_collection_slide((struct mach_header_64 *)akc_mh, collection_base_pointers);
316
317 kernel_collection_adjust_mh_addrs((struct mach_header_64 *)akc_mh, slide, false,
318 (uintptr_t *)&segLOWESTAuxKC, (uintptr_t *)&segHIGHESTAuxKC, (uintptr_t *)&segLOWESTROAuxKC,
319 (uintptr_t *)&segHIGHESTROAuxKC, (uintptr_t *)&segLOWESTRXAuxKC, (uintptr_t *)&segHIGHESTRXAuxKC,
320 (uintptr_t *)&segHIGHESTNLEAuxKC);
321 #if defined(HAS_APPLE_PAC)
322 OSRuntimeSignStructorsInFileset(akc_mh);
323 #endif /* defined(HAS_APPLE_PAC) */
324 }
325
326 /*
327 * boot kernelcache ranges; used for accounting.
328 */
329 SECURITY_READ_ONLY_LATE(const arm_physrange_t *) arm_vm_kernelcache_ranges;
330 SECURITY_READ_ONLY_LATE(int) arm_vm_kernelcache_numranges;
331
332 #if __ARM_KERNEL_PROTECT__
333 /*
334 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
335 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
336 * range covered by TTBR1 in half). This must also cover the exception vectors.
337 */
338 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
339
340 /* The exception vectors and the kernel cannot share root TTEs. */
341 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
342
343 /*
344 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
345 * the exception vectors.
346 */
347 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
348 #endif /* __ARM_KERNEL_PROTECT__ */
349
350 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
351
352 #if KASAN
353 extern vm_offset_t shadow_pbase;
354 extern vm_offset_t shadow_ptop;
355 extern vm_offset_t physmap_vbase;
356 extern vm_offset_t physmap_vtop;
357 #endif
358
359 /*
360 * We explicitly place this in const, as it is not const from a language
361 * perspective, but it is only modified before we actually switch away from
362 * the bootstrap page tables.
363 */
364 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
365
366 /*
367 * Denotes the end of xnu.
368 */
369 extern void *last_kernel_symbol;
370
371 extern void arm64_replace_bootstack(cpu_data_t*);
372 extern void PE_slide_devicetree(vm_offset_t);
373
374 /*
375 * KASLR parameters
376 */
377 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
378 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
379 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
380 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
381 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
382 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
383 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
384 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
385 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
386
387 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_sptm_offsets;
388 SECURITY_READ_ONLY_LATE(vm_image_offsets) vm_txm_offsets;
389
390 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
391 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
392 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
393 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
394 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
395 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
396 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
397 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
398
399 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
400 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
401
402 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
403 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
404
405 /* Used by <mach/arm/vm_param.h> */
406 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
407 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
408 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
409 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
410 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
411
412 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
413 * all kexts before the kernel. This is only for arm64 devices and looks
414 * something like the following:
415 * -- vmaddr order --
416 * 0xffffff8004004000 __PRELINK_TEXT
417 * 0xffffff8007004000 __TEXT (xnu)
418 * 0xffffff80075ec000 __DATA (xnu)
419 * 0xffffff80076dc000 __KLD (xnu)
420 * 0xffffff80076e0000 __LAST (xnu)
421 * 0xffffff80076e4000 __LINKEDIT (xnu)
422 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
423 * 0xffffff800782c000 __PRELINK_INFO
424 * 0xffffff80078e4000 -- End of kernelcache
425 */
426
427 /* 24921709 - make XNU ready for KTRR
428 *
429 * Two possible kernel cache layouts, depending on which kcgen is being used.
430 * VAs increasing downwards.
431 * Old KCGEN:
432 *
433 * __PRELINK_TEXT
434 * __TEXT
435 * __DATA_CONST
436 * __TEXT_EXEC
437 * __KLD
438 * __LAST
439 * __DATA
440 * __PRELINK_DATA (expected empty)
441 * __LINKEDIT
442 * __PRELINK_INFO
443 *
444 * New kcgen:
445 *
446 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
447 * __PLK_DATA_CONST
448 * __PLK_TEXT_EXEC
449 * __TEXT
450 * __DATA_CONST
451 * __TEXT_EXEC
452 * __KLD
453 * __LAST <--- Last KTRR (ReadOnly) segment
454 * __DATA
455 * __BOOTDATA (if present)
456 * __LINKEDIT
457 * __PRELINK_DATA (expected populated now)
458 * __PLK_LINKEDIT
459 * __PRELINK_INFO
460 *
461 */
462
463 vm_offset_t mem_size; /* Size of actual physical memory present
464 * minus any performance buffer and possibly
465 * limited by mem_limit in bytes */
466 uint64_t mem_actual; /* The "One True" physical memory size
467 * actually, it's the highest physical
468 * address + 1 */
469 uint64_t max_mem; /* Size of physical memory (bytes), adjusted
470 * by maxmem */
471 uint64_t max_mem_actual; /* Actual size of physical memory (bytes),
472 * adjusted by the maxmem boot-arg */
473 uint64_t sane_size; /* Memory size to use for defaults
474 * calculations */
475 /* This no longer appears to be used; kill it? */
476 addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
477 * virtual address known
478 * to the VM system */
479
480 SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
481 SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
482
483 /* Trust cache portion of EXTRADATA (if within it) */
484 SECURITY_READ_ONLY_LATE(vm_offset_t) segTRUSTCACHE;
485 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTRUSTCACHE;
486
487 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
488 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
489 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRO;
490 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRO;
491
492 /* Only set when booted from MH_FILESET kernel collections */
493 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTKC;
494 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTKC;
495 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROKC;
496 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROKC;
497 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTAuxKC;
498 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTAuxKC;
499 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROAuxKC;
500 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROAuxKC;
501 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRXAuxKC;
502 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRXAuxKC;
503 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTNLEAuxKC;
504
505 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
506 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
507
508 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
509 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
510
511 SECURITY_READ_ONLY_LATE(vm_offset_t) segTEXTEXECB;
512 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
513
514 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
515 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
516
517 SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
518 SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
519 extern vm_offset_t intstack_low_guard;
520 extern vm_offset_t intstack_high_guard;
521 extern vm_offset_t excepstack_high_guard;
522
523 SECURITY_READ_ONLY_LATE(vm_offset_t) segLINKB;
524 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
525
526 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
527 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKLD;
528 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDDATAB;
529 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
530 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
531 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
532 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
533 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLASTDATACONST;
534
535 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBTEXTB;
536 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBTEXT;
537 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIBDATAB;
538 SECURITY_READ_ONLY_LATE(unsigned long) segSizeHIBDATA;
539 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBDATACONSTB;
540 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBDATACONST;
541
542 SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
543 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
544
545 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
546 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
547
548 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
549 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
550
551 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
552 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
553
554 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
555 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
556
557 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
558 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
559
560 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
561 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
562
563 /* Only set when booted from MH_FILESET primary kernel collection */
564 SECURITY_READ_ONLY_LATE(vm_offset_t) segKCTEXTEXECB;
565 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKCTEXTEXEC;
566 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATACONSTB;
567 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
568 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATAB;
569 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
570
571 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
572
573 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
574
575 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
576 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
577 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
578 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
579
580 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base;
581
582 pmap_paddr_t alloc_ptpage(sptm_pt_level_t level, bool map_static);
583 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
584 extern int dtrace_keep_kernel_symbols(void);
585
586 /*
587 * Bootstrap the system enough to run with virtual memory.
588 * Map the kernel's code and data, and allocate the system page table.
589 * Page_size must already be set.
590 *
591 * Parameters:
592 * first_avail: first available physical page -
593 * after kernel page tables
594 * avail_start: PA of first physical page
595 * avail_end: PA of last physical page
596 */
597 SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
598 SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
599 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
600 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
601 SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
602 SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
603 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
604 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
605
606 typedef struct {
607 pmap_paddr_t pa;
608 vm_map_address_t va;
609 vm_size_t len;
610 } ptov_table_entry;
611
612 SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
613
614 #if HAS_ARM_FEAT_SME
615 static SECURITY_READ_ONLY_LATE(bool) enable_sme = true;
616 #endif
617
618 /**
619 * sptm_supports_local_coredump is set in start_sptm.s when SPTM dispatch logic
620 * calls into XNU to handle a panic from SPTM/TXM/cL4. If this variable is set
621 * to false then osfmk/kern/debug.c:debugger_collect_diagnostic() will skip
622 * taking a local core dump. This defaults to true since as long as the panic
623 * doesn't occur within the SPTM, then the SPTM will support making calls during
624 * the panic path to save the coredump. Only when the panic occurs from within
625 * guarded mode do we let SPTM decide whether it supports local coredumps.
626 */
627 bool sptm_supports_local_coredump = true;
628
629 #if KASAN
630 /* Prototypes for KASAN functions */
631 void kasan_bootstrap(boot_args *, vm_offset_t pgtable, sptm_bootstrap_args_xnu_t *sptm_boot_args);
632
633 /**
634 * Entry point for systems that support an SPTM and are booting a KASAN kernel.
635 * This is required because KASAN kernels need to set up the shadow map before
636 * arm_init() can even run.
637 */
638 void
arm_init_kasan(boot_args * args,sptm_bootstrap_args_xnu_t * sptm_boot_args)639 arm_init_kasan(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_boot_args)
640 {
641 /* Initialize SPTM helper library. */
642 libsptm_init(&sptm_boot_args->libsptm_state);
643
644 memSize = args->memSize;
645 kasan_bootstrap(args, phystokv(sptm_boot_args->libsptm_state.root_table_paddr), sptm_boot_args);
646
647 arm_init(args, sptm_boot_args);
648 }
649 #endif /* KASAN */
650
651 /**
652 * Entry point for systems that support an SPTM - except on KASAN kernels,
653 * see above. Bootstrap stacks have been set up by the SPTM by this point,
654 * and XNU is responsible for rebasing and signing absolute addresses.
655 */
656 void
arm_init(boot_args * args,sptm_bootstrap_args_xnu_t * sptm_boot_args)657 arm_init(boot_args *args, sptm_bootstrap_args_xnu_t *sptm_boot_args)
658 {
659 unsigned int maxmem;
660 uint32_t memsize;
661 uint64_t xmaxmem;
662 thread_t thread;
663
664 extern void xnu_return_to_gl2(void);
665 const sptm_vaddr_t handler_addr = (sptm_vaddr_t) ptrauth_strip((void *)xnu_return_to_gl2, ptrauth_key_function_pointer);
666 sptm_register_xnu_exc_return(handler_addr);
667
668 #if defined(HAS_APPLE_PAC)
669 kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
670 OSRuntimeSignStructorsInFileset(kc_mh);
671 #endif /* defined(HAS_APPLE_PAC) */
672
673 /* If kernel integrity is supported, use a constant copy of the boot args. */
674 const_boot_args = *args;
675 BootArgs = args = &const_boot_args;
676 const_sptm_args = *sptm_boot_args;
677 SPTMArgs = sptm_boot_args = &const_sptm_args;
678 sptm_xnu_triggered_panic_ptr = sptm_boot_args->xnu_triggered_panic;
679 /*
680 * Initialize first_avail_phys from what the SPTM tells us.
681 * We're not using iBoot's topOfKernelData, as SPTM and other
682 * components have consumed pages themselves.
683 */
684 first_avail_phys = sptm_boot_args->first_avail_phys;
685
686 #if APPLEVIRTUALPLATFORM
687 reset_vector_vaddr = (vm_offset_t) sptm_boot_args->sptm_reset_vector_vaddr;
688 #endif /* APPLEVIRTUALPLATFORM */
689
690 cpu_data_init(&BootCpuData);
691 #if defined(HAS_APPLE_PAC)
692 /* bootstrap cpu process dependent key for kernel has been loaded by start.s */
693 BootCpuData.rop_key = ml_default_rop_pid();
694 BootCpuData.jop_key = ml_default_jop_pid();
695 #endif /* defined(HAS_APPLE_PAC) */
696
697 PE_init_platform(FALSE, args); /* Get platform expert set up */
698
699 #if !KASAN
700 memSize = args->memSize;
701
702 /* Initialize SPTM helper library. */
703 libsptm_init(&const_sptm_args.libsptm_state);
704 #endif
705
706 #if __arm64__
707 configure_timer_apple_regs();
708 wfe_timeout_configure();
709 wfe_timeout_init();
710
711 configure_misc_apple_boot_args();
712 configure_misc_apple_regs(true);
713 #if HAS_ARM_FEAT_SME
714 (void)PE_parse_boot_argn("enable_sme", &enable_sme, sizeof(enable_sme));
715 if (enable_sme) {
716 arm_sme_init(true);
717 }
718 #endif
719
720
721 {
722 /*
723 * Select the advertised kernel page size.
724 */
725 if (memSize > 1ULL * 1024 * 1024 * 1024) {
726 /*
727 * arm64 device with > 1GB of RAM:
728 * kernel uses 16KB pages.
729 */
730 PAGE_SHIFT_CONST = PAGE_MAX_SHIFT;
731 } else {
732 /*
733 * arm64 device with <= 1GB of RAM:
734 * kernel uses hardware page size
735 * (4KB for H6/H7, 16KB for H8+).
736 */
737 PAGE_SHIFT_CONST = ARM_PGSHIFT;
738 }
739
740 /* 32-bit apps always see 16KB page size */
741 page_shift_user32 = PAGE_MAX_SHIFT;
742 #ifdef APPLETYPHOON
743 if (PE_parse_boot_argn("cpus_defeatures", &cpus_defeatures, sizeof(cpus_defeatures))) {
744 if ((cpus_defeatures & 0xF) != 0) {
745 cpu_defeatures_set(cpus_defeatures & 0xF);
746 }
747 }
748 #endif
749 }
750 #endif
751
752 /* Enable SPTM stability hacks if requested */
753 PE_parse_boot_argn("sptm_stability_hacks", &sptm_stability_hacks, sizeof(sptm_stability_hacks));
754
755 ml_parse_cpu_topology();
756
757
758 master_cpu = ml_get_boot_cpu_number();
759 assert(master_cpu >= 0 && master_cpu <= ml_get_max_cpu_number());
760
761 BootCpuData.cpu_number = (unsigned short)master_cpu;
762 BootCpuData.intstack_top = (vm_offset_t) &intstack_top;
763 BootCpuData.istackptr = &intstack_top;
764 BootCpuData.excepstack_top = (vm_offset_t) &excepstack_top;
765 BootCpuData.excepstackptr = &excepstack_top;
766 CpuDataEntries[master_cpu].cpu_data_vaddr = &BootCpuData;
767 CpuDataEntries[master_cpu].cpu_data_paddr = (void *)((uintptr_t)(args->physBase)
768 + ((uintptr_t)&BootCpuData
769 - (uintptr_t)(args->virtBase)));
770
771 thread = thread_bootstrap();
772 thread->machine.CpuDatap = &BootCpuData;
773 thread->machine.pcpu_data_base_and_cpu_number =
774 ml_make_pcpu_base_and_cpu_number(0, BootCpuData.cpu_number);
775 machine_set_current_thread(thread);
776
777 /*
778 * Preemption is enabled for this thread so that it can lock mutexes without
779 * tripping the preemption check. In reality scheduling is not enabled until
780 * this thread completes, and there are no other threads to switch to, so
781 * preemption level is not really meaningful for the bootstrap thread.
782 */
783 thread->machine.preemption_count = 0;
784 cpu_bootstrap();
785
786 rtclock_early_init();
787
788 kernel_debug_string_early("kernel_startup_bootstrap");
789 kernel_startup_bootstrap();
790
791 /*
792 * Initialize the timer callout world
793 */
794 timer_call_init();
795
796 cpu_init();
797
798 processor_bootstrap();
799
800 if (PE_parse_boot_argn("maxmem", &maxmem, sizeof(maxmem))) {
801 xmaxmem = (uint64_t) maxmem * (1024 * 1024);
802 } else if (PE_get_default("hw.memsize", &memsize, sizeof(memsize))) {
803 xmaxmem = (uint64_t) memsize;
804 } else {
805 xmaxmem = 0;
806 }
807
808 #if SCHED_HYGIENE_DEBUG
809 {
810 int wdt_boot_arg = 0;
811 bool const wdt_disabled = (PE_parse_boot_argn("wdt", &wdt_boot_arg, sizeof(wdt_boot_arg)) && (wdt_boot_arg == -1));
812
813 /* Disable if WDT is disabled */
814 if (wdt_disabled || kern_feature_override(KF_INTERRUPT_MASKED_DEBUG_OVRD)) {
815 interrupt_masked_debug_mode = SCHED_HYGIENE_MODE_OFF;
816 } else if (kern_feature_override(KF_SCHED_HYGIENE_DEBUG_PMC_OVRD)) {
817 /*
818 * The sched hygiene facility can, in adition to checking time, capture
819 * metrics provided by the cycle and instruction counters available in some
820 * systems. Check if we should enable this feature based on the validation
821 * overrides.
822 */
823 sched_hygiene_debug_pmc = 0;
824 }
825
826 if (wdt_disabled || kern_feature_override(KF_PREEMPTION_DISABLED_DEBUG_OVRD)) {
827 sched_preemption_disable_debug_mode = SCHED_HYGIENE_MODE_OFF;
828 }
829 }
830 #endif /* SCHED_HYGIENE_DEBUG */
831
832 nanoseconds_to_absolutetime(XCALL_ACK_TIMEOUT_NS, &xcall_ack_timeout_abstime);
833
834 #if HAS_BP_RET
835 PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
836 set_bp_ret(); // Apply branch predictor retention settings to boot CPU
837 #endif
838
839 PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt));
840
841 #if __ARM_PAN_AVAILABLE__
842 __builtin_arm_wsr("pan", 1);
843 #endif /* __ARM_PAN_AVAILABLE__ */
844
845
846 arm_vm_init(xmaxmem, args);
847
848 if (debug_boot_arg) {
849 patch_low_glo();
850 }
851
852 #if __arm64__ && WITH_CLASSIC_S2R
853 sleep_token_buffer_init();
854 #endif
855
856 PE_consistent_debug_inherit();
857
858 /* Setup debugging output. */
859 const unsigned int serial_exists = serial_init();
860 kernel_startup_initialize_upto(STARTUP_SUB_KPRINTF);
861 kprintf("kprintf initialized\n");
862
863 serialmode = 0;
864 if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) {
865 /* Do we want a serial keyboard and/or console? */
866 kprintf("Serial mode specified: %08X\n", serialmode);
867 disable_iolog_serial_output = (serialmode & SERIALMODE_NO_IOLOG) != 0;
868 enable_dklog_serial_output = restore_boot || (serialmode & SERIALMODE_DKLOG) != 0;
869 int force_sync = serialmode & SERIALMODE_SYNCDRAIN;
870 if (force_sync || PE_parse_boot_argn("drain_uart_sync", &force_sync, sizeof(force_sync))) {
871 if (force_sync) {
872 serialmode |= SERIALMODE_SYNCDRAIN;
873 kprintf(
874 "WARNING: Forcing uart driver to output synchronously."
875 "printf()s/IOLogs will impact kernel performance.\n"
876 "You are advised to avoid using 'drain_uart_sync' boot-arg.\n");
877 }
878 }
879 }
880 if (kern_feature_override(KF_SERIAL_OVRD)) {
881 serialmode = 0;
882 }
883
884 /* Start serial if requested and a serial device was enumerated in serial_init(). */
885 if ((serialmode & SERIALMODE_OUTPUT) && serial_exists) {
886 serial_console_enabled = true;
887 (void)switch_to_serial_console(); /* Switch into serial mode from video console */
888 disableConsoleOutput = FALSE; /* Allow printfs to happen */
889 }
890 PE_create_console();
891
892 /* setup console output */
893 PE_init_printf(FALSE);
894
895 #if __arm64__
896 #if DEBUG
897 dump_kva_space();
898 #endif
899 #endif
900
901 cpu_machine_idle_init(TRUE);
902
903 PE_init_platform(TRUE, &BootCpuData);
904
905 #if RELEASE
906 /* Validate SPTM variant. */
907 if (const_sptm_args.sptm_variant != SPTM_VARIANT_RELEASE) {
908 panic("arm_init: Development SPTM / Release XNU is not a supported configuration.");
909 }
910 #endif /* RELEASE */
911
912 #if __arm64__
913 extern bool cpu_config_correct;
914 if (!cpu_config_correct) {
915 panic("The cpumask=N boot arg cannot be used together with cpus=N, and the boot CPU must be enabled");
916 }
917
918 ml_map_cpu_pio();
919 #endif
920
921 cpu_timebase_init(TRUE);
922
923 #if KPERF
924 /* kptimer_curcpu_up() must be called after cpu_timebase_init */
925 kptimer_curcpu_up();
926 #endif /* KPERF */
927
928 PE_init_cpu();
929 fiq_context_init(TRUE);
930
931
932 #if HIBERNATION
933 pal_hib_init();
934 #endif /* HIBERNATION */
935
936 /*
937 * gPhysBase/Size only represent kernel-managed memory. These globals represent
938 * the actual DRAM base address and size as reported by iBoot through the
939 * device tree.
940 */
941 DTEntry chosen;
942 unsigned int dt_entry_size;
943 unsigned long const *dram_base;
944 unsigned long const *dram_size;
945 if (SecureDTLookupEntry(NULL, "/chosen", &chosen) != kSuccess) {
946 panic("%s: Unable to find 'chosen' DT node", __FUNCTION__);
947 }
948
949 if (SecureDTGetProperty(chosen, "dram-base", (void const **)&dram_base, &dt_entry_size) != kSuccess) {
950 panic("%s: Unable to find 'dram-base' entry in the 'chosen' DT node", __FUNCTION__);
951 }
952
953 if (SecureDTGetProperty(chosen, "dram-size", (void const **)&dram_size, &dt_entry_size) != kSuccess) {
954 panic("%s: Unable to find 'dram-size' entry in the 'chosen' DT node", __FUNCTION__);
955 }
956
957 gDramBase = *dram_base;
958 gDramSize = *dram_size;
959
960 /*
961 * Initialize the stack protector for all future calls
962 * to C code. Since kernel_bootstrap() eventually
963 * switches stack context without returning through this
964 * function, we do not risk failing the check even though
965 * we mutate the guard word during execution.
966 */
967 __stack_chk_guard = (unsigned long)early_random();
968 /* Zero a byte of the protector to guard
969 * against string vulnerabilities
970 */
971 __stack_chk_guard &= ~(0xFFULL << 8);
972 machine_startup(args);
973 }
974
975 /*
976 * Routine: arm_init_cpu
977 * Function:
978 * Runs on S2R resume (all CPUs) and SMP boot (non-boot CPUs only).
979 */
980
981 void
arm_init_cpu(cpu_data_t * cpu_data_ptr,__unused uint64_t hibernation_args)982 arm_init_cpu(
983 cpu_data_t *cpu_data_ptr,
984 __unused uint64_t hibernation_args)
985 {
986 #if HIBERNATION
987 sptm_hibernation_args_xnu_t *hibargs = (sptm_hibernation_args_xnu_t *)hibernation_args;
988
989 if ((hibargs != 0) && (hibargs->hib_header_phys != 0) && (hibargs->handoff_page_count > 0)) {
990 /*
991 * We must copy the handoff region before anything else because the physical pages
992 * holding the handoff region are not tracked by xnu as in-use.
993 */
994 HibernationCopyHandoffRegionFromPageArray(&hibargs->handoff_pages[0], hibargs->handoff_page_count);
995 }
996 #endif /* HIBERNATION */
997
998 #if __ARM_PAN_AVAILABLE__
999 __builtin_arm_wsr("pan", 1);
1000 #endif
1001
1002 #ifdef __arm64__
1003 configure_timer_apple_regs();
1004 configure_misc_apple_regs(false);
1005 #endif
1006 #if HAS_ARM_FEAT_SME
1007 if (enable_sme) {
1008 arm_sme_init(false);
1009 }
1010 #endif
1011
1012 os_atomic_andnot(&cpu_data_ptr->cpu_flags, SleepState, relaxed);
1013
1014
1015 machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
1016
1017
1018 #if HIBERNATION
1019 if (hibargs != 0 && hibargs->hib_header_phys != 0) {
1020 gIOHibernateState = kIOHibernateStateWakingFromHibernate;
1021 uart_hibernation = true;
1022 __nosan_memcpy(gIOHibernateCurrentHeader, (void*)phystokv(hibargs->hib_header_phys), sizeof(IOHibernateImageHeader));
1023 }
1024 if ((cpu_data_ptr == &BootCpuData) && (gIOHibernateState == kIOHibernateStateWakingFromHibernate) && ml_is_quiescing()) {
1025 // the "normal" S2R code captures wake_abstime too early, so on a hibernation resume we fix it up here
1026 extern uint64_t wake_abstime;
1027 wake_abstime = gIOHibernateCurrentHeader->lastHibAbsTime;
1028
1029 // since the hw clock stops ticking across hibernation, we need to apply an offset;
1030 // iBoot computes this offset for us and passes it via the hibernation header
1031 extern uint64_t hwclock_conttime_offset;
1032 hwclock_conttime_offset = gIOHibernateCurrentHeader->hwClockOffset;
1033
1034 // during hibernation, we captured the idle thread's state from inside the PPL context, so we have to
1035 // fix up its preemption count
1036 unsigned int expected_preemption_count = (gEnforcePlatformActionSafety ? 2 : 1);
1037 if (get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread) !=
1038 expected_preemption_count) {
1039 panic("unexpected preemption count %u on boot cpu thread (should be %u)",
1040 get_preemption_level_for_thread(cpu_data_ptr->cpu_active_thread),
1041 expected_preemption_count);
1042 }
1043 cpu_data_ptr->cpu_active_thread->machine.preemption_count--;
1044 }
1045 #endif /* HIBERNATION */
1046
1047 #if __arm64__
1048 wfe_timeout_init();
1049 flush_mmu_tlb();
1050 #endif
1051
1052 cpu_machine_idle_init(FALSE);
1053
1054 cpu_init();
1055
1056 #ifdef APPLETYPHOON
1057 if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
1058 cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
1059 }
1060 #endif
1061 /* Initialize the timebase before serial_init, as some serial
1062 * drivers use mach_absolute_time() to implement rate control
1063 */
1064 cpu_timebase_init(FALSE);
1065
1066 #if KPERF
1067 /* kptimer_curcpu_up() must be called after cpu_timebase_init */
1068 kptimer_curcpu_up();
1069 #endif /* KPERF */
1070
1071 if (cpu_data_ptr == &BootCpuData && ml_is_quiescing()) {
1072 #if __arm64__ && __ARM_GLOBAL_SLEEP_BIT__
1073 /*
1074 * Prevent CPUs from going into deep sleep until all
1075 * CPUs are ready to do so.
1076 */
1077 arm64_stall_sleep = TRUE;
1078 #endif
1079 serial_init();
1080 PE_init_platform(TRUE, NULL);
1081 commpage_update_timebase();
1082
1083 exclaves_update_timebase(EXCLAVES_CLOCK_ABSOLUTE,
1084 rtclock_base_abstime);
1085 #if HIBERNATION
1086 if (gIOHibernateState == kIOHibernateStateWakingFromHibernate) {
1087 exclaves_update_timebase(EXCLAVES_CLOCK_CONTINUOUS,
1088 hwclock_conttime_offset);
1089 }
1090 #endif /* HIBERNATION */
1091 }
1092 PE_init_cpu();
1093
1094 fiq_context_init(TRUE);
1095 cpu_data_ptr->rtcPop = EndOfAllTime;
1096 timer_resync_deadlines();
1097
1098 #if DEVELOPMENT || DEBUG
1099 PE_arm_debug_enable_trace(true);
1100 #endif /* DEVELOPMENT || DEBUG */
1101
1102 kprintf("arm_cpu_init(): cpu %d online\n", cpu_data_ptr->cpu_number);
1103
1104 if (cpu_data_ptr == &BootCpuData && ml_is_quiescing()) {
1105 if (kdebug_enable == 0) {
1106 __kdebug_only uint64_t elapsed = kdebug_wake();
1107 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 15), mach_absolute_time() - elapsed);
1108 }
1109
1110 #if CONFIG_TELEMETRY
1111 bootprofile_wake_from_sleep();
1112 #endif /* CONFIG_TELEMETRY */
1113 }
1114 #if CONFIG_CPU_COUNTERS
1115 mt_wake_per_core();
1116 #endif /* CONFIG_CPU_COUNTERS */
1117
1118 #if defined(KERNEL_INTEGRITY_CTRR)
1119 if (ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] != CTRR_LOCKED) {
1120 lck_spin_lock(&ctrr_cpu_start_lck);
1121 ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] = CTRR_LOCKED;
1122 thread_wakeup(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id]);
1123 lck_spin_unlock(&ctrr_cpu_start_lck);
1124 }
1125 #endif
1126
1127
1128 secondary_cpu_main(NULL);
1129 }
1130
1131 /*
1132 * Routine: arm_init_idle_cpu
1133 * Function: Resume from non-retention WFI. Called from the reset vector.
1134 */
1135 void __attribute__((noreturn))
arm_init_idle_cpu(cpu_data_t * cpu_data_ptr)1136 arm_init_idle_cpu(
1137 cpu_data_t *cpu_data_ptr)
1138 {
1139 #if __ARM_PAN_AVAILABLE__
1140 __builtin_arm_wsr("pan", 1);
1141 #endif
1142
1143 machine_set_current_thread(cpu_data_ptr->cpu_active_thread);
1144
1145 #if __arm64__
1146 wfe_timeout_init();
1147 #endif
1148
1149 #ifdef APPLETYPHOON
1150 if ((cpus_defeatures & (0xF << 4 * cpu_data_ptr->cpu_number)) != 0) {
1151 cpu_defeatures_set((cpus_defeatures >> 4 * cpu_data_ptr->cpu_number) & 0xF);
1152 }
1153 #endif
1154
1155 /*
1156 * Update the active debug object to reflect that debug registers have been reset.
1157 * This will force any thread with active debug state to resync the debug registers
1158 * if it returns to userspace on this CPU.
1159 */
1160 if (cpu_data_ptr->cpu_user_debug != NULL) {
1161 arm_debug_set(NULL);
1162 }
1163
1164 fiq_context_init(FALSE);
1165
1166 cpu_idle_exit(TRUE);
1167 }
1168
1169 vm_map_address_t
phystokv(pmap_paddr_t pa)1170 phystokv(pmap_paddr_t pa)
1171 {
1172 sptm_papt_t va;
1173 if (sptm_phystokv(pa, &va) != LIBSPTM_SUCCESS) {
1174 return 0;
1175 }
1176 return (vm_map_address_t)va;
1177 }
1178
1179 vm_map_address_t
phystokv_range(pmap_paddr_t pa,vm_size_t * max_len)1180 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
1181 {
1182
1183 vm_size_t len;
1184
1185 len = PAGE_SIZE - (pa & PAGE_MASK);
1186 if (*max_len > len) {
1187 *max_len = len;
1188 }
1189
1190 return phystokv((sptm_paddr_t)pa);
1191 }
1192
1193 vm_offset_t
ml_static_vtop(vm_offset_t va)1194 ml_static_vtop(vm_offset_t va)
1195 {
1196 return (vm_offset_t)kvtophys_nofail((sptm_papt_t)va);
1197 }
1198
1199 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
1200 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
1201
1202 // Populate seg...AuxKC and fixup AuxKC permissions
1203 static bool
arm_vm_auxkc_init(void)1204 arm_vm_auxkc_init(void)
1205 {
1206 if (auxkc_mh == 0 || auxkc_base == 0) {
1207 return false; // no auxKC.
1208 }
1209
1210 /* Fixup AuxKC and populate seg*AuxKC globals used below */
1211 arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1212
1213 /*
1214 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1215 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1216 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1217 * its RO region before extending it.
1218 */
1219 assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1220 assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1221 if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1222 segHIGHESTROAuxKC = segHIGHESTAuxKC;
1223 }
1224
1225 /*
1226 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1227 * by CTRR, and the AuxKC RX region must be within the RO region.
1228 */
1229 assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1230 assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1231 assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1232 assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1233
1234 return true;
1235 }
1236
1237 /*
1238 * Looks up the set of properties that describe the physical load addresses and sizes of the boot
1239 * kernelcache's loaded segments in the device tree and returns (1) the number of segments found
1240 * in *arm_vm_kernelcache_numrangesp and (2) their starting/ending addresses as an array of type
1241 * arm_physrange_t in *arm_vm_kernelcache_rangesp.
1242 * The function returns the total number of pages across all loaded boot kernelcache segments.
1243 * If there is a problem looking up the /chosen/memory-map node in the DT, all arguments are
1244 * zeroed and the function returns 0.
1245 */
1246 static unsigned int
arm_get_bootkc_ranges_from_DT(const arm_physrange_t ** arm_vm_kernelcache_rangesp,int * arm_vm_kernelcache_numrangesp)1247 arm_get_bootkc_ranges_from_DT(const arm_physrange_t **arm_vm_kernelcache_rangesp, int *arm_vm_kernelcache_numrangesp)
1248 {
1249 DTEntry memory_map;
1250 int err;
1251 DTMemoryMapRange const *range;
1252 unsigned int rangeSize;
1253 #define NUM_BOOTKC_RANGES 5
1254 static arm_physrange_t bootkc_physranges[NUM_BOOTKC_RANGES] = { {0, } };
1255 static int bootkc_numranges = 0;
1256 static unsigned int bootkc_total_pages = 0;
1257
1258 assert(arm_vm_kernelcache_rangesp != NULL);
1259 assert(arm_vm_kernelcache_numrangesp != NULL);
1260
1261 /* return cached values if previously computed */
1262 if (bootkc_numranges == 0) {
1263 err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1264 if (err != kSuccess) {
1265 *arm_vm_kernelcache_numrangesp = 0;
1266 *arm_vm_kernelcache_rangesp = NULL;
1267 return 0;
1268 }
1269
1270 /* We're looking for 5 ranges: BootKC-ro, BootKC-rx, BootKC-bx, BootKC-rw, and BootKC-le */
1271 const char *BootKC_Properties[NUM_BOOTKC_RANGES] = {
1272 "BootKC-ro", "BootKC-rx", "BootKC-bx", "BootKC-rw", "BootKC-le"
1273 };
1274
1275 for (int i = 0; i < NUM_BOOTKC_RANGES; i++) {
1276 err = SecureDTGetProperty(memory_map, BootKC_Properties[i], (void const **)&range, &rangeSize);
1277 if (err == kSuccess && rangeSize == sizeof(DTMemoryMapRange)) {
1278 bootkc_physranges[i].start_phys = range->paddr;
1279 bootkc_physranges[i].end_phys = range->paddr + range->length;
1280 assert((bootkc_physranges[i].end_phys & PAGE_MASK) == 0);
1281 bootkc_numranges++;
1282 bootkc_total_pages += (unsigned int) atop_64(bootkc_physranges[i].end_phys - bootkc_physranges[i].start_phys);
1283 }
1284 }
1285 }
1286
1287 *arm_vm_kernelcache_numrangesp = bootkc_numranges;
1288 *arm_vm_kernelcache_rangesp = &bootkc_physranges[0];
1289 return bootkc_total_pages;
1290 }
1291
1292 void
arm_vm_prot_init(__unused boot_args * args)1293 arm_vm_prot_init(__unused boot_args * args)
1294 {
1295 segLOWESTTEXT = UINT64_MAX;
1296 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1297 segLOWESTTEXT = segPRELINKTEXTB;
1298 }
1299 assert(segSizeTEXT);
1300 if (segTEXTB < segLOWESTTEXT) {
1301 segLOWESTTEXT = segTEXTB;
1302 }
1303 assert(segLOWESTTEXT < UINT64_MAX);
1304
1305 segEXTRADATA = 0;
1306 segSizeEXTRADATA = 0;
1307 segTRUSTCACHE = 0;
1308 segSizeTRUSTCACHE = 0;
1309
1310 segLOWEST = segLOWESTTEXT;
1311 segLOWESTRO = segLOWESTTEXT;
1312
1313 if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1314 /*
1315 * kernel collections have segments below the kernel. In particular the collection mach header
1316 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1317 */
1318 segLOWEST = segLOWESTKC;
1319 if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1320 segLOWESTRO = segLOWESTROKC;
1321 }
1322 if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1323 segHIGHESTRO = segHIGHESTROKC;
1324 }
1325 }
1326
1327 DTEntry memory_map;
1328 int err;
1329
1330 // Device Tree portion of EXTRADATA
1331 if (SecureDTIsLockedDown()) {
1332 segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1333 segSizeEXTRADATA = PE_state.deviceTreeSize;
1334 }
1335
1336 // Trust Caches portion of EXTRADATA
1337 {
1338 DTMemoryMapRange const *trustCacheRange;
1339 unsigned int trustCacheRangeSize;
1340
1341 err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1342 assert(err == kSuccess);
1343
1344 err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1345 if (err == kSuccess) {
1346 if (trustCacheRangeSize != sizeof(DTMemoryMapRange)) {
1347 panic("Unexpected /chosen/memory-map/TrustCache property size %u != %zu", trustCacheRangeSize, sizeof(DTMemoryMapRange));
1348 }
1349
1350 vm_offset_t const trustCacheRegion = phystokv(trustCacheRange->paddr);
1351 if (trustCacheRegion < segLOWEST) {
1352 if (segEXTRADATA != 0) {
1353 if (trustCacheRegion != segEXTRADATA + segSizeEXTRADATA) {
1354 panic("Unexpected location of TrustCache region: %#lx != %#lx",
1355 trustCacheRegion, segEXTRADATA + segSizeEXTRADATA);
1356 }
1357 segSizeEXTRADATA += trustCacheRange->length;
1358 } else {
1359 // Not all devices support CTRR device trees.
1360 segEXTRADATA = trustCacheRegion;
1361 segSizeEXTRADATA = trustCacheRange->length;
1362 }
1363 }
1364 segTRUSTCACHE = trustCacheRegion;
1365 segSizeTRUSTCACHE = trustCacheRange->length;
1366 }
1367 }
1368
1369 if (segSizeEXTRADATA != 0) {
1370 if (segEXTRADATA <= segLOWEST) {
1371 segLOWEST = segEXTRADATA;
1372 if (segEXTRADATA <= segLOWESTRO) {
1373 segLOWESTRO = segEXTRADATA;
1374 }
1375 } else {
1376 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1377 }
1378 }
1379
1380 /* Record the bounds of the kernelcache. */
1381 vm_kernelcache_base = segLOWEST;
1382
1383 auxkc_mh = SPTMArgs->auxkc_mh;
1384 auxkc_base = SPTMArgs->auxkc_base;
1385 end_kern = SPTMArgs->auxkc_end;
1386
1387 vm_kernelcache_top = end_kern;
1388 }
1389
1390 /*
1391 * return < 0 for a < b
1392 * 0 for a == b
1393 * > 0 for a > b
1394 */
1395 typedef int (*cmpfunc_t)(const void *a, const void *b);
1396
1397 extern void
1398 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1399
1400 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1401
1402 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1403 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1404
1405 void
arm_vm_prot_finalize(boot_args * args __unused)1406 arm_vm_prot_finalize(boot_args * args __unused)
1407 {
1408 /*
1409 * At this point, we are far enough along in the boot process that it will be
1410 * safe to free up all of the memory preceeding the kernel. It may in fact
1411 * be safe to do this earlier.
1412 *
1413 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1414 * as usable.
1415 */
1416
1417 /* Slide KLDDATA */
1418 sptm_slide_region(segKLDDATAB, (unsigned int)(segSizeKLDDATA >> PAGE_SHIFT));
1419
1420 /*
1421 * Replace the boot CPU's stacks with properly-guarded dynamically allocated stacks.
1422 * This must happen prior to sliding segBOOTDATAB, which will effectively remove
1423 * the existing boot stacks.
1424 */
1425 cpu_stack_alloc(&BootCpuData);
1426 arm64_replace_bootstack(&BootCpuData);
1427
1428 /* Slide early-boot data */
1429 sptm_slide_region(segBOOTDATAB, (unsigned int)(segSizeBOOTDATA >> PAGE_SHIFT));
1430
1431 /* Slide linkedit, unless otherwise requested */
1432 bool keep_linkedit = false;
1433 PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1434 #if CONFIG_DTRACE
1435 if (dtrace_keep_kernel_symbols()) {
1436 keep_linkedit = true;
1437 }
1438 #endif /* CONFIG_DTRACE */
1439 #if KASAN_DYNAMIC_BLACKLIST
1440 /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
1441 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1442 keep_linkedit = true;
1443 #endif
1444
1445 if (!keep_linkedit) {
1446 sptm_slide_region(segLINKB, (unsigned int)(segSizeLINK >> PAGE_SHIFT));
1447 if (segSizePLKLINKEDIT) {
1448 /* Prelinked kernel LINKEDIT */
1449 sptm_slide_region(segPLKLINKEDITB, (unsigned int)(segSizePLKLINKEDIT >> PAGE_SHIFT));
1450 }
1451 }
1452
1453 /* Slide prelinked kernel plists */
1454 sptm_slide_region(segPRELINKINFOB, (unsigned int)(segSizePRELINKINFO >> PAGE_SHIFT));
1455
1456 /*
1457 * Free the portion of memory that precedes the first usable region, known
1458 * as the physical slide.
1459 */
1460 ml_static_mfree(SPTMArgs->phys_slide_papt, SPTMArgs->phys_slide_size);
1461
1462 /*
1463 * KTRR support means we will be mucking with these pages and trying to
1464 * protect them; we cannot free the pages to the VM if we do this.
1465 */
1466 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1467 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1468 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1469 }
1470
1471 ml_static_mfree(segBOOTDATAB, segSizeBOOTDATA);
1472
1473 #if __ARM_KERNEL_PROTECT__
1474 arm_vm_populate_kernel_el0_mappings();
1475 #endif /* __ARM_KERNEL_PROTECT__ */
1476 }
1477
1478 /* allocate a page for a page table: we support static and dynamic mappings.
1479 *
1480 * returns a physical address for the allocated page
1481 *
1482 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
1483 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
1484 *
1485 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
1486 */
1487 pmap_paddr_t
alloc_ptpage(sptm_pt_level_t level,bool map_static)1488 alloc_ptpage(sptm_pt_level_t level, bool map_static)
1489 {
1490 pmap_paddr_t paddr = 0;
1491
1492 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
1493 map_static = FALSE;
1494 #endif
1495
1496 /* Set the next free ropage if this is the first call to this function */
1497 if (!ropage_next) {
1498 ropage_next = (vm_offset_t)&ropagetable_begin;
1499 }
1500
1501 if (map_static) {
1502 /* This is a RO allocation. Make sure we have room in the ropagetable area */
1503 assert(ropage_next < (vm_offset_t)&ropagetable_end);
1504
1505 /* Obtain physical address and increment the index into the ropagetable area */
1506 paddr = (pmap_paddr_t)kvtophys((sptm_papt_t)ropage_next);
1507 ropage_next += ARM_PGBYTES;
1508 } else {
1509 /* This is a RW allocation. Simply grab a page from [avail_start] */
1510 paddr = avail_start;
1511 avail_start += ARM_PGBYTES;
1512 }
1513
1514 /* Retype the page to XNU_PAGE_TABLE, with the desired level */
1515 sptm_retype_params_t retype_params;
1516 retype_params.level = level;
1517 sptm_retype(paddr, XNU_DEFAULT, XNU_PAGE_TABLE, retype_params);
1518
1519 return paddr;
1520 }
1521
1522 /**
1523 * Initialize a vm_image_offsets structure with information obtained from a
1524 * Mach-O header for the wanted image.
1525 *
1526 * @param debug_header_entry The entry in the debug header images list to obtain
1527 * a pointer to the Mach-O header from. This must be
1528 * either the SPTM or TXM debug header entry.
1529 * @param offsets Output pointer of the vm_image_offsets structure to fill in.
1530 */
1531 static void
init_image_offsets(size_t debug_header_entry,vm_image_offsets * offsets)1532 init_image_offsets(size_t debug_header_entry, vm_image_offsets *offsets)
1533 {
1534 assert(offsets != NULL);
1535 assert((debug_header_entry == DEBUG_HEADER_ENTRY_SPTM) ||
1536 (debug_header_entry == DEBUG_HEADER_ENTRY_TXM));
1537
1538 offsets->slid_base = (vm_offset_t)SPTMArgs->debug_header->image[debug_header_entry];
1539 kernel_mach_header_t *macho = (kernel_mach_header_t*)offsets->slid_base;
1540 offsets->unslid_base = (vm_offset_t)getsegbynamefromheader(macho, "__TEXT")->vmaddr;
1541 assert((offsets->slid_base != 0) && (offsets->unslid_base != 0));
1542 offsets->slide = offsets->slid_base - offsets->unslid_base;
1543 offsets->unslid_top = getlastaddr(macho);
1544 offsets->slid_top = offsets->unslid_top + offsets->slide;
1545 }
1546
1547 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1548 #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1549
1550 void
arm_vm_init(uint64_t memory_size,boot_args * args)1551 arm_vm_init(uint64_t memory_size, boot_args * args)
1552 {
1553 vm_map_address_t va_l1, va_l1_end;
1554 tt_entry_t *cpu_l1_tte;
1555 tt_entry_t *cpu_l2_tte;
1556 vm_map_address_t va_l2, va_l2_end;
1557 vm_map_address_t dynamic_memory_begin;
1558 uint64_t mem_segments;
1559
1560 /* Get the virtual and physical kernel-managed memory base from boot_args */
1561 gVirtBase = args->virtBase;
1562 gPhysBase = args->physBase;
1563
1564 /* Get the memory size */
1565 #if KASAN
1566 real_phys_size = memSize + (shadow_ptop - shadow_pbase);
1567 #else
1568 real_phys_size = memSize;
1569 #endif
1570
1571 /**
1572 * Ensure the physical region we specify for the VM to manage ends on a
1573 * software page boundary. Note that the software page size (PAGE_SIZE)
1574 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1575 * We must round the reported memory size down to the nearest PAGE_SIZE
1576 * boundary to ensure the VM does not try to manage a page it does not
1577 * completely own. The KASAN shadow region, if present, is managed entirely
1578 * in units of the hardware page size and should not need similar treatment.
1579 */
1580 gPhysSize = mem_size = ((gPhysBase + memSize) & ~PAGE_MASK) - gPhysBase;
1581
1582
1583 /* Obtain total memory size, including non-managed memory */
1584 mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1585
1586 if ((memory_size != 0) && (mem_size > memory_size)) {
1587 mem_size = memory_size;
1588 max_mem_actual = memory_size;
1589 } else {
1590 max_mem_actual = mem_actual;
1591 }
1592
1593 /* Make sure the system does not have more physical memory than what can be mapped */
1594 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1595 panic("Unsupported memory configuration %lx", mem_size);
1596 }
1597
1598
1599 physmap_base = SPTMArgs->physmap_base;
1600 physmap_end = static_memory_end = SPTMArgs->physmap_end;
1601
1602 #if KASAN && !defined(ARM_LARGE_MEMORY) && !defined(CONFIG_SPTM)
1603 /* add the KASAN stolen memory to the physmap */
1604 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1605 #else
1606 dynamic_memory_begin = static_memory_end;
1607 #endif
1608
1609 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1610 panic("Unsupported memory configuration %lx", mem_size);
1611 }
1612
1613 /*
1614 * TODO: free bootstrap table memory back to allocator.
1615 * on large memory systems bootstrap tables could be quite large.
1616 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1617 * to trampoline to KVA. this requires only 3 pages to stay resident.
1618 */
1619 avail_start = first_avail_phys;
1620
1621 /*
1622 * Initialize l1 page table page.
1623 *
1624 * SPTM TODO: Have a separate root_table_paddr field in the sptm_args
1625 * instead of snooping the libsptm_state (XNU should not be
1626 * snooping the libsptm_state directly in general).
1627 */
1628 cpu_ttep = (pmap_paddr_t)const_sptm_args.libsptm_state.root_table_paddr;
1629 cpu_tte = (tt_entry_t *)phystokv(cpu_ttep);
1630 avail_end = gPhysBase + mem_size;
1631 assert(!(avail_end & PAGE_MASK));
1632
1633 /* These need to be set early so pa_valid() works */
1634 vm_first_phys = gPhysBase;
1635 vm_last_phys = trunc_page(avail_end);
1636
1637 #if KASAN
1638 real_avail_end = gPhysBase + real_phys_size;
1639 #else
1640 real_avail_end = avail_end;
1641 #endif
1642
1643 /*
1644 * Now retrieve addresses for various segments from kernel mach-o header
1645 */
1646 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1647 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1648 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1649 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1650 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1651 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1652 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1653
1654 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1655 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1656 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1657 segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
1658 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1659 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1660 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1661 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1662 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1663 segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
1664
1665 sectHIBTEXTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", §SizeHIBTEXT);
1666 sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", §SizeHIBDATACONST);
1667 segHIBDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
1668
1669 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1670 kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
1671
1672 // fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
1673 segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
1674 assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
1675
1676 assert(segLASTB); // kernel LAST can be empty, but it must have
1677 // a valid address for computations below.
1678
1679 assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
1680 assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
1681 segPLKTEXTEXECB = segLASTB + segSizeLAST;
1682 segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
1683
1684 // fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
1685 segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
1686 assert(segPLKDATACONSTB && !segSizePLKDATACONST); // kernel PLK_DATA_CONST must be empty
1687 assert(segLASTDATACONSTB && segSizeLASTDATACONST); // kernel LASTDATA_CONST must be non-empty
1688 assert(segKCDATACONSTB <= segLASTDATACONSTB); // KC DATA_CONST must contain kernel LASTDATA_CONST
1689 assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
1690 segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
1691 segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
1692
1693 // fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
1694 segKCDATAB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
1695 assert(segPRELINKDATAB && !segSizePRELINKDATA); // kernel PRELINK_DATA must be empty
1696 assert(segKCDATAB <= segPRELINKDATAB); // KC DATA must contain kernel PRELINK_DATA
1697 assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
1698 segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
1699
1700 // fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
1701 assert(segPRELINKTEXTB && !segSizePRELINKTEXT); // kernel PRELINK_TEXT must be empty
1702 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1703 assert(segPRELINKINFOB && !segSizePRELINKINFO); // kernel PRELINK_INFO must be empty
1704 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
1705 segLINKB = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
1706 }
1707
1708 /* if one of the new segments is present, the other one better be as well */
1709 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
1710 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
1711 }
1712
1713 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
1714 sdata = (vm_offset_t) segDATAB;
1715 edata = (vm_offset_t) segDATAB + segSizeDATA;
1716 end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastkerneladdr()); /* Force end to next page */
1717
1718 vm_set_page_size();
1719
1720 vm_kernel_base = segTEXTB;
1721 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
1722 vm_kext_base = segPRELINKTEXTB;
1723 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
1724
1725 vm_prelink_stext = segPRELINKTEXTB;
1726 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
1727 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
1728 } else {
1729 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
1730 }
1731 vm_prelink_sinfo = segPRELINKINFOB;
1732 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
1733 vm_slinkedit = segLINKB;
1734 vm_elinkedit = segLINKB + segSizeLINK;
1735
1736 vm_prelink_sdata = segPRELINKDATAB;
1737 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
1738
1739 arm_vm_prot_init(args);
1740
1741 /**
1742 * Count the number of pages the boot kernelcache occupies. Additionally,
1743 * ml_static_mfree() uses the BootKC ranges from the DT to account for freed kernelcache pages.
1744 */
1745 vm_page_kernelcache_count = arm_get_bootkc_ranges_from_DT(&arm_vm_kernelcache_ranges, &arm_vm_kernelcache_numranges);
1746
1747 assert(vm_page_kernelcache_count > 0);
1748
1749 #if KASAN
1750 /* record the extent of the physmap */
1751 physmap_vbase = physmap_base;
1752 physmap_vtop = physmap_end;
1753 kasan_init();
1754 #endif /* KASAN */
1755
1756 #if CONFIG_CPU_COUNTERS
1757 mt_early_init();
1758 #endif /* CONFIG_CPU_COUNTERS */
1759
1760 kva_active = TRUE;
1761
1762 if (arm_vm_auxkc_init()) {
1763 if (segLOWESTROAuxKC < segLOWESTRO) {
1764 segLOWESTRO = segLOWESTROAuxKC;
1765 }
1766 if (segHIGHESTROAuxKC > segHIGHESTRO) {
1767 segHIGHESTRO = segHIGHESTROAuxKC;
1768 }
1769 if (segLOWESTRXAuxKC < segLOWESTTEXT) {
1770 segLOWESTTEXT = segLOWESTRXAuxKC;
1771 }
1772
1773 #if XNU_TARGET_OS_OSX
1774 /**
1775 * If we are on macOS with 3P kexts, we disable
1776 * XNU_KERNEL_RESTRICTED for now.
1777 */
1778 use_xnu_restricted = false;
1779 #endif /* XNU_TARGET_OS_OSX */
1780 }
1781
1782 sane_size = mem_size - (avail_start - gPhysBase);
1783 max_mem = mem_size;
1784 // vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
1785 vm_kernel_slid_base = segLOWESTTEXT;
1786 vm_kernel_stext = segTEXTB;
1787
1788 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1789 vm_kernel_etext = segTEXTEXECB + segSizeTEXTEXEC;
1790 vm_kernel_slid_top = vm_slinkedit;
1791 } else {
1792 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
1793 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
1794 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
1795 vm_kernel_slid_top = vm_prelink_einfo;
1796 }
1797
1798 /**
1799 * Calculate the address ranges used to determine whether an address is an
1800 * SPTM or TXM address, as well as the slides used to slide/unslide those
1801 * addresses.
1802 *
1803 * The debug header contains pointers to the beginning of the images loaded
1804 * up by iBoot (which always start with the Mach-O header). The __TEXT
1805 * segment should be the first (and lowest) segment in both of these
1806 * binaries (the addresses in the Mach-O header are all unslid).
1807 */
1808 init_image_offsets(DEBUG_HEADER_ENTRY_SPTM, &vm_sptm_offsets);
1809 init_image_offsets(DEBUG_HEADER_ENTRY_TXM, &vm_txm_offsets);
1810
1811 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
1812
1813 /* TODO: CONFIG_XNUPOST CTRR test */
1814
1815 pmap_bootstrap(dynamic_memory_begin);
1816
1817 disable_preemption();
1818
1819 /*
1820 * Initialize l3 page table pages :
1821 * cover this address range:
1822 * 2MB + FrameBuffer size + 10MB for each 256MB segment
1823 *
1824 * Note: This does not allocate L3 page tables, since page tables for all static
1825 * memory is allocated and inserted into the hierarchy by the SPTM beforehand.
1826 * Instead, this code simply walks the page tables to find those pre-allocated
1827 * tables and allocates PTD objects for them.
1828 */
1829
1830 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
1831
1832 va_l1 = dynamic_memory_begin;
1833 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
1834 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
1835 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
1836
1837 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1838
1839 while (va_l1 < va_l1_end) {
1840 va_l2 = va_l1;
1841
1842 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
1843 /* If this is the last L1 entry, it must cover the last mapping. */
1844 va_l2_end = va_l1_end;
1845 } else {
1846 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
1847 }
1848
1849 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1850
1851 while (va_l2 < va_l2_end) {
1852 /* Obtain pre-allocated page and setup L3 Table TTE in L2 */
1853 tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
1854 pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
1855 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1856
1857 va_l2 += ARM_TT_L2_SIZE;
1858 cpu_l2_tte++;
1859 }
1860
1861 va_l1 = va_l2_end;
1862 cpu_l1_tte++;
1863 }
1864
1865 /*
1866 * Initialize l3 page table pages :
1867 * cover this address range:
1868 * ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
1869 *
1870 * Note: This does not allocate L3 page tables, since page tables for all static
1871 * memory is allocated and inserted into the hierarchy by the SPTM beforehand.
1872 * Instead, this code simply walks the page tables to find those pre-allocated
1873 * tables and allocates PTD objects for them.
1874 */
1875 va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
1876 va_l1_end = VM_MAX_KERNEL_ADDRESS;
1877
1878 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1879
1880 while (va_l1 < va_l1_end) {
1881 va_l2 = va_l1;
1882
1883 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
1884 /* If this is the last L1 entry, it must cover the last mapping. */
1885 va_l2_end = va_l1_end;
1886 } else {
1887 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
1888 }
1889
1890 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
1891
1892 while (va_l2 < va_l2_end) {
1893 /* Obtain pre-allocated page and setup L3 Table TTE in L2 */
1894 tt_entry_t *ttp = pmap_tt2e(kernel_pmap, va_l2);
1895 pt_entry_t *ptp = (pt_entry_t *)phystokv(tte_to_pa(*ttp));
1896 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
1897
1898 va_l2 += ARM_TT_L2_SIZE;
1899 cpu_l2_tte++;
1900 }
1901
1902 va_l1 = va_l2_end;
1903 cpu_l1_tte++;
1904 }
1905
1906 /*
1907 * Adjust avail_start so that the range that the VM owns
1908 * starts on a PAGE_SIZE aligned boundary.
1909 */
1910 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
1911
1912 /* TODO pmap_static_allocations_done() */
1913
1914 first_avail = avail_start;
1915 patch_low_glo_static_region(first_avail_phys, avail_start - first_avail_phys);
1916 enable_preemption();
1917 }
1918
1919 /*
1920 * Returns true if the address lies within __TEXT, __TEXT_EXEC or __DATA_CONST
1921 * segment range. This is what [vm_kernel_stext, vm_kernel_etext) used to cover.
1922 * The segments together may not make a continuous address space anymore and so
1923 * individual intervals are inspected.
1924 */
1925 bool
kernel_text_contains(vm_offset_t addr)1926 kernel_text_contains(vm_offset_t addr)
1927 {
1928 if (segTEXTB <= addr && addr < (segTEXTB + segSizeTEXT)) {
1929 return true;
1930 }
1931 if (segTEXTEXECB <= addr && addr < (segTEXTEXECB + segSizeTEXTEXEC)) {
1932 return true;
1933 }
1934 return segDATACONSTB <= addr && addr < (segDATACONSTB + segSizeDATACONST);
1935 }
1936