1 /*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_kdp.h>
30 #include <debug.h>
31
32 #include <kern/assert.h>
33 #include <kern/misc_protos.h>
34 #include <kern/monotonic.h>
35 #include <mach/vm_types.h>
36 #include <mach/vm_param.h>
37 #include <vm/vm_kern.h>
38 #include <vm/vm_page.h>
39 #include <vm/pmap.h>
40
41 #include <machine/atomic.h>
42 #include <arm64/proc_reg.h>
43 #include <arm64/lowglobals.h>
44 #include <arm/cpu_data_internal.h>
45 #include <arm/misc_protos.h>
46 #include <pexpert/arm64/boot.h>
47 #include <pexpert/device_tree.h>
48
49 #include <libkern/kernel_mach_header.h>
50 #include <libkern/section_keywords.h>
51
52 #include <san/kasan.h>
53
54 #if __ARM_KERNEL_PROTECT__
55 /*
56 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
57 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
58 * range covered by TTBR1 in half). This must also cover the exception vectors.
59 */
60 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
61
62 /* The exception vectors and the kernel cannot share root TTEs. */
63 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
64
65 /*
66 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
67 * the exception vectors.
68 */
69 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
70 #endif /* __ARM_KERNEL_PROTECT__ */
71
72 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
73
74 #if KASAN
75 extern vm_offset_t shadow_pbase;
76 extern vm_offset_t shadow_ptop;
77 extern vm_offset_t physmap_vbase;
78 extern vm_offset_t physmap_vtop;
79 #endif
80
81 /*
82 * We explicitly place this in const, as it is not const from a language
83 * perspective, but it is only modified before we actually switch away from
84 * the bootstrap page tables.
85 */
86 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
87
88 /*
89 * Denotes the end of xnu.
90 */
91 extern void *last_kernel_symbol;
92
93 extern void arm64_replace_bootstack(cpu_data_t*);
94 extern void PE_slide_devicetree(vm_offset_t);
95
96 /*
97 * KASLR parameters
98 */
99 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
100 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
101 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
102 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
103 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
104 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
105 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
106 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
107 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
108
109 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
110 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
111 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
112 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
113 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
114 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
115 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
116 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
117
118 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
119 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
120
121 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
122 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
123
124 /* Used by <mach/arm/vm_param.h> */
125 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
126 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
127 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
128 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
129 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
130
131 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
132 * all kexts before the kernel. This is only for arm64 devices and looks
133 * something like the following:
134 * -- vmaddr order --
135 * 0xffffff8004004000 __PRELINK_TEXT
136 * 0xffffff8007004000 __TEXT (xnu)
137 * 0xffffff80075ec000 __DATA (xnu)
138 * 0xffffff80076dc000 __KLD (xnu)
139 * 0xffffff80076e0000 __LAST (xnu)
140 * 0xffffff80076e4000 __LINKEDIT (xnu)
141 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
142 * 0xffffff800782c000 __PRELINK_INFO
143 * 0xffffff80078e4000 -- End of kernelcache
144 */
145
146 /* 24921709 - make XNU ready for KTRR
147 *
148 * Two possible kernel cache layouts, depending on which kcgen is being used.
149 * VAs increasing downwards.
150 * Old KCGEN:
151 *
152 * __PRELINK_TEXT
153 * __TEXT
154 * __DATA_CONST
155 * __TEXT_EXEC
156 * __KLD
157 * __LAST
158 * __DATA
159 * __PRELINK_DATA (expected empty)
160 * __LINKEDIT
161 * __PRELINK_INFO
162 *
163 * New kcgen:
164 *
165 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
166 * __PLK_DATA_CONST
167 * __PLK_TEXT_EXEC
168 * __TEXT
169 * __DATA_CONST
170 * __TEXT_EXEC
171 * __KLD
172 * __LAST <--- Last KTRR (ReadOnly) segment
173 * __DATA
174 * __BOOTDATA (if present)
175 * __LINKEDIT
176 * __PRELINK_DATA (expected populated now)
177 * __PLK_LINKEDIT
178 * __PRELINK_INFO
179 *
180 */
181
182 vm_offset_t mem_size; /* Size of actual physical memory present
183 * minus any performance buffer and possibly
184 * limited by mem_limit in bytes */
185 uint64_t mem_actual; /* The "One True" physical memory size
186 * actually, it's the highest physical
187 * address + 1 */
188 uint64_t max_mem; /* Size of physical memory (bytes), adjusted
189 * by maxmem */
190 uint64_t max_mem_actual; /* Actual size of physical memory (bytes),
191 * adjusted by the maxmem boot-arg */
192 uint64_t sane_size; /* Memory size to use for defaults
193 * calculations */
194 /* This no longer appears to be used; kill it? */
195 addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
196 * virtual address known
197 * to the VM system */
198
199 SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
200 SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
201
202 /* Trust cache portion of EXTRADATA (if within it) */
203 SECURITY_READ_ONLY_LATE(vm_offset_t) segTRUSTCACHE;
204 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTRUSTCACHE;
205
206 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
207 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
208 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRO;
209 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRO;
210
211 /* Only set when booted from MH_FILESET kernel collections */
212 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTKC;
213 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTKC;
214 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROKC;
215 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROKC;
216 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTAuxKC;
217 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTAuxKC;
218 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROAuxKC;
219 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROAuxKC;
220 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRXAuxKC;
221 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRXAuxKC;
222 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTNLEAuxKC;
223
224 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
225 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
226
227 #if XNU_MONITOR
228 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTEXTB;
229 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTEXT;
230
231 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTRAMPB;
232 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTRAMP;
233
234 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATACONSTB;
235 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATACONST;
236 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_start = NULL;
237 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_end = NULL;
238 #if HAS_GUARDED_IO_FILTER
239 SECURITY_READ_ONLY_LATE(void *) iofilter_stacks_start = NULL;
240 SECURITY_READ_ONLY_LATE(void *) iofilter_stacks_end = NULL;
241 #endif
242 #endif
243
244 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
245 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
246
247 SECURITY_READ_ONLY_LATE(vm_offset_t) segTEXTEXECB;
248 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
249
250 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
251 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
252
253 #if XNU_MONITOR
254 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATAB;
255 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATA;
256 #endif
257
258 SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
259 SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
260 extern vm_offset_t intstack_low_guard;
261 extern vm_offset_t intstack_high_guard;
262 extern vm_offset_t excepstack_high_guard;
263
264 SECURITY_READ_ONLY_LATE(vm_offset_t) segLINKB;
265 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
266
267 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
268 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKLD;
269 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDDATAB;
270 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
271 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
272 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
273 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
274 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLASTDATACONST;
275
276 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBTEXTB;
277 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBTEXT;
278 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIBDATAB;
279 SECURITY_READ_ONLY_LATE(unsigned long) segSizeHIBDATA;
280 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBDATACONSTB;
281 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBDATACONST;
282
283 SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
284 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
285
286 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
287 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
288
289 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
290 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
291
292 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
293 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
294
295 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
296 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
297
298 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
299 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
300
301 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
302 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
303
304 /* Only set when booted from MH_FILESET primary kernel collection */
305 SECURITY_READ_ONLY_LATE(vm_offset_t) segKCTEXTEXECB;
306 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKCTEXTEXEC;
307 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATACONSTB;
308 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
309 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATAB;
310 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
311
312 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
313
314 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
315
316 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
317 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
318 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
319 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
320
321 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base, auxkc_right_above;
322
323 vm_offset_t alloc_ptpage(boolean_t map_static);
324 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
325 extern int dtrace_keep_kernel_symbols(void);
326
327 /*
328 * Bootstrap the system enough to run with virtual memory.
329 * Map the kernel's code and data, and allocate the system page table.
330 * Page_size must already be set.
331 *
332 * Parameters:
333 * first_avail: first available physical page -
334 * after kernel page tables
335 * avail_start: PA of first physical page
336 * avail_end: PA of last physical page
337 */
338 SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
339 SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
340 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
341 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
342 SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
343 SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
344 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
345 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
346
347 /*
348 * Bounds of the kernelcache; used for accounting.
349 */
350 SECURITY_READ_ONLY_LATE(vm_offset_t) arm_vm_kernelcache_phys_start;
351 SECURITY_READ_ONLY_LATE(vm_offset_t) arm_vm_kernelcache_phys_end;
352
353 #if __ARM_KERNEL_PROTECT__
354 extern void ExceptionVectorsBase;
355 extern void ExceptionVectorsEnd;
356 #endif /* __ARM_KERNEL_PROTECT__ */
357
358 typedef struct {
359 pmap_paddr_t pa;
360 vm_map_address_t va;
361 vm_size_t len;
362 } ptov_table_entry;
363
364 #define PTOV_TABLE_SIZE 8
365 SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
366 SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
367
368
369 vm_map_address_t
phystokv(pmap_paddr_t pa)370 phystokv(pmap_paddr_t pa)
371 {
372
373 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
374 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
375 return pa - ptov_table[i].pa + ptov_table[i].va;
376 }
377 }
378 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
379 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
380 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
381 }
382 return pa - gPhysBase + gVirtBase;
383 }
384
385 vm_map_address_t
phystokv_range(pmap_paddr_t pa,vm_size_t * max_len)386 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
387 {
388
389 vm_size_t len;
390 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
391 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
392 len = ptov_table[i].len - (pa - ptov_table[i].pa);
393 if (*max_len > len) {
394 *max_len = len;
395 }
396 return pa - ptov_table[i].pa + ptov_table[i].va;
397 }
398 }
399 len = PAGE_SIZE - (pa & PAGE_MASK);
400 if (*max_len > len) {
401 *max_len = len;
402 }
403 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
404 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
405 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
406 }
407 return pa - gPhysBase + gVirtBase;
408 }
409
410 vm_offset_t
ml_static_vtop(vm_offset_t va)411 ml_static_vtop(vm_offset_t va)
412 {
413 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
414 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len))) {
415 return va - ptov_table[i].va + ptov_table[i].pa;
416 }
417 }
418 if (__improbable((va < gVirtBase) || (((vm_address_t)(va) - gVirtBase) >= gPhysSize))) {
419 panic("%s: illegal VA: %p; virt base 0x%llx, size 0x%llx", __func__,
420 (void*)va, (unsigned long long)gVirtBase, (unsigned long long)gPhysSize);
421 }
422 return (vm_address_t)(va) - gVirtBase + gPhysBase;
423 }
424
425 /*
426 * This rounds the given address up to the nearest boundary for a PTE contiguous
427 * hint.
428 */
429 static vm_offset_t
round_up_pte_hint_address(vm_offset_t address)430 round_up_pte_hint_address(vm_offset_t address)
431 {
432 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
433 return (address + (hint_size - 1)) & ~(hint_size - 1);
434 }
435
436 /* allocate a page for a page table: we support static and dynamic mappings.
437 *
438 * returns a virtual address for the allocated page
439 *
440 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
441 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
442 *
443 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
444 */
445
446 vm_offset_t
alloc_ptpage(boolean_t map_static)447 alloc_ptpage(boolean_t map_static)
448 {
449 vm_offset_t vaddr;
450
451 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
452 map_static = FALSE;
453 #endif
454
455 if (!ropage_next) {
456 ropage_next = (vm_offset_t)&ropagetable_begin;
457 }
458
459 if (map_static) {
460 assert(ropage_next < (vm_offset_t)&ropagetable_end);
461
462 vaddr = ropage_next;
463 ropage_next += ARM_PGBYTES;
464
465 return vaddr;
466 } else {
467 vaddr = phystokv(avail_start);
468 avail_start += ARM_PGBYTES;
469
470 return vaddr;
471 }
472 }
473
474 #if DEBUG
475
476 void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
477
478 void
dump_kva_l2(vm_offset_t tt_base,tt_entry_t * tt,int indent,uint64_t * rosz_out,uint64_t * rwsz_out)479 dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out)
480 {
481 unsigned int i;
482 boolean_t cur_ro, prev_ro = 0;
483 int start_entry = -1;
484 tt_entry_t cur, prev = 0;
485 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
486 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
487 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
488 kvtophys((vm_offset_t)tt) < roend;
489
490 for (i = 0; i < TTE_PGENTRIES; i++) {
491 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
492 cur = tt[i] & ARM_TTE_TABLE_MASK;
493
494 if (tt_static) {
495 /* addresses mapped by this entry are static if it is a block mapping,
496 * or the table was allocated from the RO page table region */
497 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
498 } else {
499 cur_ro = 0;
500 }
501
502 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
503 uintptr_t start, end, sz;
504
505 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
506 start += tt_base;
507 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
508 end += tt_base;
509
510 sz = end - start + 1;
511 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
512 indent * 4, "",
513 (uint32_t)(start >> 32), (uint32_t)start,
514 (uint32_t)(end >> 32), (uint32_t)end,
515 prev_ro ? "Static " : "Dynamic",
516 (sz >> 20));
517
518 if (prev_ro) {
519 *rosz_out += sz;
520 } else {
521 *rwsz_out += sz;
522 }
523 }
524
525 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
526 start_entry = i;
527 }
528
529 prev = cur;
530 prev_ro = cur_ro;
531 }
532 }
533
534 void
dump_kva_space()535 dump_kva_space()
536 {
537 uint64_t tot_rosz = 0, tot_rwsz = 0;
538 int ro_ptpages, rw_ptpages;
539 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
540 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
541 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
542 kvtophys((vm_offset_t)cpu_tte) < roend;
543 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
544
545 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
546
547 for (unsigned int i = 0; i < TTE_PGENTRIES; i++) {
548 pmap_paddr_t cur;
549 boolean_t cur_ro;
550 uintptr_t start, end;
551 uint64_t rosz = 0, rwsz = 0;
552
553 if ((cpu_tte[i] & ARM_TTE_VALID) == 0) {
554 continue;
555 }
556
557 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
558 start = (uint64_t)i << ARM_TT_L1_SHIFT;
559 start = start + kva_base;
560 end = start + (ARM_TT_L1_SIZE - 1);
561 cur_ro = cur >= robegin && cur < roend;
562
563 printf("0x%08x_%08x-0x%08x_%08x %s\n",
564 (uint32_t)(start >> 32), (uint32_t)start,
565 (uint32_t)(end >> 32), (uint32_t)end,
566 cur_ro ? "Static " : "Dynamic");
567
568 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
569 tot_rosz += rosz;
570 tot_rwsz += rwsz;
571 }
572
573 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
574 tot_rosz >> 20,
575 tot_rwsz >> 20,
576 (tot_rosz >> 20) + (tot_rwsz >> 20));
577
578 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
579 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
580 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
581 }
582
583 #endif /* DEBUG */
584
585 #if __ARM_KERNEL_PROTECT__ || XNU_MONITOR
586 /*
587 * arm_vm_map:
588 * root_ttp: The kernel virtual address for the root of the target page tables
589 * vaddr: The target virtual address
590 * pte: A page table entry value (may be ARM_PTE_EMPTY)
591 *
592 * This function installs pte at vaddr in root_ttp. Any page table pages needed
593 * to install pte will be allocated by this function.
594 */
595 static void
arm_vm_map(tt_entry_t * root_ttp,vm_offset_t vaddr,pt_entry_t pte)596 arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
597 {
598 vm_offset_t ptpage = 0;
599 tt_entry_t * ttp = root_ttp;
600
601 tt_entry_t * l1_ttep = NULL;
602 tt_entry_t l1_tte = 0;
603
604 tt_entry_t * l2_ttep = NULL;
605 tt_entry_t l2_tte = 0;
606 pt_entry_t * ptep = NULL;
607 pt_entry_t cpte = 0;
608
609 /*
610 * Walk the target page table to find the PTE for the given virtual
611 * address. Allocate any page table pages needed to do this.
612 */
613 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
614 l1_tte = *l1_ttep;
615
616 if (l1_tte == ARM_TTE_EMPTY) {
617 ptpage = alloc_ptpage(TRUE);
618 bzero((void *)ptpage, ARM_PGBYTES);
619 l1_tte = kvtophys(ptpage);
620 l1_tte &= ARM_TTE_TABLE_MASK;
621 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA);
622 *l1_ttep = l1_tte;
623 ptpage = 0;
624 }
625
626 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
627
628 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
629 l2_tte = *l2_ttep;
630
631 if (l2_tte == ARM_TTE_EMPTY) {
632 ptpage = alloc_ptpage(TRUE);
633 bzero((void *)ptpage, ARM_PGBYTES);
634 l2_tte = kvtophys(ptpage);
635 l2_tte &= ARM_TTE_TABLE_MASK;
636 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
637 *l2_ttep = l2_tte;
638 ptpage = 0;
639 }
640
641 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
642
643 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
644 cpte = *ptep;
645
646 /*
647 * If the existing PTE is not empty, then we are replacing a valid
648 * mapping.
649 */
650 if (cpte != ARM_PTE_EMPTY) {
651 panic("%s: cpte=%#llx is not empty, "
652 "vaddr=%#lx, pte=%#llx",
653 __FUNCTION__, cpte,
654 vaddr, pte);
655 }
656
657 *ptep = pte;
658 }
659
660 #endif // __ARM_KERNEL_PROTECT || XNU_MONITOR
661
662 #if __ARM_KERNEL_PROTECT__
663
664 /*
665 * arm_vm_kernel_el0_map:
666 * vaddr: The target virtual address
667 * pte: A page table entry value (may be ARM_PTE_EMPTY)
668 *
669 * This function installs pte at vaddr for the EL0 kernel mappings.
670 */
671 static void
arm_vm_kernel_el0_map(vm_offset_t vaddr,pt_entry_t pte)672 arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
673 {
674 /* Calculate where vaddr will be in the EL1 kernel page tables. */
675 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
676 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
677 }
678
679 /*
680 * arm_vm_kernel_el1_map:
681 * vaddr: The target virtual address
682 * pte: A page table entry value (may be ARM_PTE_EMPTY)
683 *
684 * This function installs pte at vaddr for the EL1 kernel mappings.
685 */
686 static void
arm_vm_kernel_el1_map(vm_offset_t vaddr,pt_entry_t pte)687 arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte)
688 {
689 arm_vm_map(cpu_tte, vaddr, pte);
690 }
691
692 /*
693 * arm_vm_kernel_pte:
694 * vaddr: The target virtual address
695 *
696 * This function returns the PTE value for the given vaddr from the kernel page
697 * tables. If the region has been been block mapped, we return what an
698 * equivalent PTE value would be (as regards permissions and flags). We also
699 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
700 */
701 static pt_entry_t
arm_vm_kernel_pte(vm_offset_t vaddr)702 arm_vm_kernel_pte(vm_offset_t vaddr)
703 {
704 tt_entry_t * ttp = cpu_tte;
705 tt_entry_t * ttep = NULL;
706 tt_entry_t tte = 0;
707 pt_entry_t * ptep = NULL;
708 pt_entry_t pte = 0;
709
710 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
711 tte = *ttep;
712
713 assert(tte & ARM_TTE_VALID);
714
715 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
716 /* This is a block mapping; return the equivalent PTE value. */
717 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
718 pte |= ARM_PTE_TYPE_VALID;
719 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
720 pte &= ~ARM_PTE_HINT_MASK;
721 return pte;
722 }
723
724 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
725 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
726 tte = *ttep;
727
728 assert(tte & ARM_TTE_VALID);
729
730 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
731 /* This is a block mapping; return the equivalent PTE value. */
732 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
733 pte |= ARM_PTE_TYPE_VALID;
734 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
735 pte &= ~ARM_PTE_HINT_MASK;
736 return pte;
737 }
738
739 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
740
741 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
742 pte = *ptep;
743 pte &= ~ARM_PTE_HINT_MASK;
744 return pte;
745 }
746
747 /*
748 * arm_vm_prepare_kernel_el0_mappings:
749 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
750 * mappings.
751 *
752 * This function expands the kernel page tables to support the EL0 kernel
753 * mappings, and conditionally installs the PTE values for the EL0 kernel
754 * mappings (if alloc_only is false).
755 */
756 static void
arm_vm_prepare_kernel_el0_mappings(bool alloc_only)757 arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
758 {
759 pt_entry_t pte = 0;
760 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
761 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
762 vm_offset_t cur = 0;
763 vm_offset_t cur_fixed = 0;
764
765 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
766 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
767 /*
768 * We map the exception vectors at a different address than that
769 * of the kernelcache to avoid sharing page table pages with the
770 * kernelcache (as this may cause issues with TLB caching of
771 * page table pages.
772 */
773 if (!alloc_only) {
774 pte = arm_vm_kernel_pte(cur);
775 }
776
777 arm_vm_kernel_el1_map(cur_fixed, pte);
778 arm_vm_kernel_el0_map(cur_fixed, pte);
779 }
780
781 __builtin_arm_dmb(DMB_ISH);
782 __builtin_arm_isb(ISB_SY);
783
784 if (!alloc_only) {
785 /*
786 * If we have created the alternate exception vector mappings,
787 * the boot CPU may now switch over to them.
788 */
789 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
790 __builtin_arm_isb(ISB_SY);
791 }
792 }
793
794 /*
795 * arm_vm_populate_kernel_el0_mappings:
796 *
797 * This function adds all required mappings to the EL0 kernel mappings.
798 */
799 static void
arm_vm_populate_kernel_el0_mappings(void)800 arm_vm_populate_kernel_el0_mappings(void)
801 {
802 arm_vm_prepare_kernel_el0_mappings(FALSE);
803 }
804
805 /*
806 * arm_vm_expand_kernel_el0_mappings:
807 *
808 * This function expands the kernel page tables to accomodate the EL0 kernel
809 * mappings.
810 */
811 static void
arm_vm_expand_kernel_el0_mappings(void)812 arm_vm_expand_kernel_el0_mappings(void)
813 {
814 arm_vm_prepare_kernel_el0_mappings(TRUE);
815 }
816 #endif /* __ARM_KERNEL_PROTECT__ */
817
818 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
819 extern void bootstrap_instructions;
820
821 /*
822 * arm_replace_identity_map takes the V=P map that we construct in start.s
823 * and repurposes it in order to have it map only the page we need in order
824 * to turn on the MMU. This prevents us from running into issues where
825 * KTRR will cause us to fault on executable block mappings that cross the
826 * KTRR boundary.
827 */
828 static void
arm_replace_identity_map(void)829 arm_replace_identity_map(void)
830 {
831 vm_offset_t addr;
832 pmap_paddr_t paddr;
833
834 pmap_paddr_t l1_ptp_phys = 0;
835 tt_entry_t *l1_ptp_virt = NULL;
836 tt_entry_t *tte1 = NULL;
837 pmap_paddr_t l2_ptp_phys = 0;
838 tt_entry_t *l2_ptp_virt = NULL;
839 tt_entry_t *tte2 = NULL;
840 pmap_paddr_t l3_ptp_phys = 0;
841 pt_entry_t *l3_ptp_virt = NULL;
842 pt_entry_t *ptep = NULL;
843
844 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
845 paddr = kvtophys(addr);
846
847 /*
848 * Grab references to the V=P page tables, and allocate an L3 page.
849 */
850 l1_ptp_phys = kvtophys((vm_offset_t)&bootstrap_pagetables);
851 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
852 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
853
854 l2_ptp_virt = L2_TABLE_VA(tte1);
855 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
856 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
857
858 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(TRUE);
859 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
860 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
861
862 /*
863 * Replace the large V=P mapping with a mapping that provides only the
864 * mappings needed to turn on the MMU.
865 */
866
867 bzero(l1_ptp_virt, ARM_PGBYTES);
868 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
869
870 bzero(l2_ptp_virt, ARM_PGBYTES);
871 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
872
873 *ptep = (paddr & ARM_PTE_MASK) |
874 ARM_PTE_TYPE_VALID |
875 ARM_PTE_SH(SH_OUTER_MEMORY) |
876 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
877 ARM_PTE_AF |
878 ARM_PTE_AP(AP_RONA) |
879 ARM_PTE_NX;
880 }
881 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
882
883 tt_entry_t *arm_kva_to_tte(vm_offset_t);
884
885 tt_entry_t *
arm_kva_to_tte(vm_offset_t va)886 arm_kva_to_tte(vm_offset_t va)
887 {
888 tt_entry_t *tte1, *tte2;
889 tte1 = cpu_tte + L1_TABLE_INDEX(va);
890 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
891
892 return tte2;
893 }
894
895 #if XNU_MONITOR
896
897 static inline pt_entry_t *
arm_kva_to_pte(vm_offset_t va)898 arm_kva_to_pte(vm_offset_t va)
899 {
900 tt_entry_t *tte2 = arm_kva_to_tte(va);
901 return L3_TABLE_VA(tte2) + L3_TABLE_INDEX(va);
902 }
903
904 #endif
905
906 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
907 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
908
909 /**
910 * Updates a translation table entry (TTE) with the supplied value, unless doing so might render
911 * the pagetable region read-only before subsequent updates have finished. In that case, the TTE
912 * value will be saved off for deferred processing.
913 *
914 * @param ttep address of the TTE to update
915 * @param entry the value to store in ttep
916 * @param pa the base physical address mapped by the TTE
917 * @param ttebase L3-page- or L2-block-aligned base virtual address of the pagetable region
918 * @param granule mask indicating whether L2 block or L3 hint mappings are allowed for this segment
919 * @param deferred_ttep_pair 2-element array of addresses of deferred TTEs
920 * @param deferred_tte_pair 2-element array containing TTE values for deferred assignment to
921 * corresponding elements of deferred_ttep_pair
922 */
923 static void
update_or_defer_tte(tt_entry_t * ttep,tt_entry_t entry,pmap_paddr_t pa,vm_map_address_t ttebase,unsigned granule __unused,tt_entry_t ** deferred_ttep_pair,tt_entry_t * deferred_tte_pair)924 update_or_defer_tte(tt_entry_t *ttep, tt_entry_t entry, pmap_paddr_t pa, vm_map_address_t ttebase,
925 unsigned granule __unused, tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
926 {
927 /*
928 * If we're trying to assign an entry that maps the current TTE region (identified by ttebase),
929 * and the pagetable is already live (indicated by kva_active), defer assignment of the current
930 * entry and possibly the entry after it until all other mappings in the segment have been
931 * updated. Otherwise we may end up immediately marking the pagetable region read-only
932 * leading to a fault later on a later assignment if we manage to outrun the TLB. This can
933 * happen on KTRR/CTRR-enabled devices when marking segDATACONST read-only, as the pagetables
934 * that map that segment must come from the segment itself. We therefore store the initial
935 * recursive TTE in deferred_ttep_pair[0] and its value in deferred_tte_pair[0]. We may also
936 * defer assignment of the TTE following that recursive TTE and store its value in
937 * deferred_tte_pair[1], because the TTE region following the current one may also contain
938 * pagetables and we must avoid marking that region read-only before updating those tables.
939 *
940 * We require that such recursive mappings must exist in regions that can be mapped with L2
941 * block entries if they are sufficiently large. This is what allows us to assume that no
942 * more than 2 deferred TTEs will be required, because:
943 * --If more than 2 adjacent L3 PTEs were required to map our pagetables, that would mean
944 * we would have at least one full L3 pagetable page and would instead use an L2 block.
945 * --If more than 2 adjacent L2 blocks were required to map our pagetables, that would
946 * mean we would have at least one full L2-block-sized region of TTEs and something
947 * is very wrong because no segment should be that large.
948 */
949 if ((deferred_ttep_pair != NULL) && (deferred_ttep_pair[0] != NULL) && (ttep == (deferred_ttep_pair[0] + 1))) {
950 assert(deferred_tte_pair[1] == 0);
951 deferred_ttep_pair[1] = ttep;
952 deferred_tte_pair[1] = entry;
953 } else if (kva_active && (phystokv(pa) == ttebase)) {
954 assert(deferred_ttep_pair != NULL);
955 assert(granule & ARM64_GRANULE_ALLOW_BLOCK);
956 if (deferred_ttep_pair[0] == NULL) {
957 deferred_ttep_pair[0] = ttep;
958 deferred_tte_pair[0] = entry;
959 } else {
960 assert(deferred_ttep_pair[1] == NULL);
961 deferred_ttep_pair[1] = ttep;
962 deferred_tte_pair[1] = entry;
963 }
964 } else {
965 *ttep = entry;
966 }
967 }
968
969
970 /*
971 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
972 * neccessary) allocate a page for the L3 table and update the corresponding L2
973 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
974 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
975 * not be invoked from a context that does not do L2 iteration separately (basically,
976 * don't call this except from arm_vm_page_granular_prot).
977 *
978 * unsigned granule: 0 => force to page granule, or a combination of
979 * ARM64_GRANULE_* flags declared above.
980 */
981
982 static void
arm_vm_page_granular_helper(vm_offset_t start,vm_offset_t _end,vm_offset_t va,pmap_paddr_t pa_offset,int pte_prot_APX,int pte_prot_XN,unsigned granule,tt_entry_t ** deferred_ttep_pair,tt_entry_t * deferred_tte_pair)983 arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
984 int pte_prot_APX, int pte_prot_XN, unsigned granule,
985 tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
986 {
987 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
988 tt_entry_t *tte2;
989 tt_entry_t tmplate;
990 pmap_paddr_t pa;
991 pt_entry_t *ppte, ptmp;
992 addr64_t ppte_phys;
993 unsigned i;
994
995 va &= ~ARM_TT_L2_OFFMASK;
996 pa = va - gVirtBase + gPhysBase - pa_offset;
997
998 if (pa >= real_avail_end) {
999 return;
1000 }
1001
1002 tte2 = arm_kva_to_tte(va);
1003
1004 assert(_end >= va);
1005 tmplate = *tte2;
1006
1007 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
1008 /* pick up the existing page table. */
1009 ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK));
1010 } else {
1011 // TTE must be reincarnated with page level mappings.
1012
1013 // ... but we don't want to break up blocks on live
1014 // translation tables.
1015 assert(!kva_active);
1016
1017 ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0);
1018 bzero(ppte, ARM_PGBYTES);
1019 ppte_phys = kvtophys((vm_offset_t)ppte);
1020
1021 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
1022 }
1023
1024 vm_offset_t len = _end - va;
1025 if ((pa + len) > real_avail_end) {
1026 _end -= (pa + len - real_avail_end);
1027 }
1028 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
1029
1030 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
1031 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
1032 * a ragged non-PAGE_SIZE-aligned edge. */
1033 vm_offset_t rounded_end = round_page(_end);
1034 /* Apply the desired protections to the specified page range */
1035 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK >> ARM_TT_L3_SHIFT); i++) {
1036 if ((start <= va) && (va < rounded_end)) {
1037 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
1038 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
1039 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
1040 ptmp = ptmp | ARM_PTE_NX;
1041 #if __ARM_KERNEL_PROTECT__
1042 ptmp = ptmp | ARM_PTE_NG;
1043 #endif /* __ARM_KERNEL_PROTECT__ */
1044
1045 if (pte_prot_XN) {
1046 ptmp = ptmp | ARM_PTE_PNX;
1047 }
1048
1049 /*
1050 * If we can, apply the contiguous hint to this range. The hint is
1051 * applicable if the current address falls within a hint-sized range that will
1052 * be fully covered by this mapping request.
1053 */
1054 if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) &&
1055 (granule & ARM64_GRANULE_ALLOW_HINT) && use_contiguous_hint) {
1056 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
1057 ptmp |= ARM_PTE_HINT;
1058 /* Do not attempt to reapply the hint bit to an already-active mapping.
1059 * This very likely means we're attempting to change attributes on an already-active mapping,
1060 * which violates the requirement of the hint bit.*/
1061 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT));
1062 }
1063 /*
1064 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
1065 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
1066 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
1067 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
1068 * could produce a later TLB conflict.
1069 */
1070 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
1071
1072 update_or_defer_tte(&ppte[i], ptmp, pa, (vm_map_address_t)ppte, granule, deferred_ttep_pair, deferred_tte_pair);
1073 }
1074
1075 va += ARM_PGBYTES;
1076 pa += ARM_PGBYTES;
1077 }
1078 }
1079 }
1080
1081 /*
1082 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
1083 * changing them. If a particular chunk necessitates L3 entries (for reasons of
1084 * alignment or length, or an explicit request that the entry be fully expanded), we
1085 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
1086 */
1087 static void
arm_vm_page_granular_prot(vm_offset_t start,unsigned long size,pmap_paddr_t pa_offset,int tte_prot_XN,int pte_prot_APX,int pte_prot_XN,unsigned granule)1088 arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
1089 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
1090 unsigned granule)
1091 {
1092 tt_entry_t *deferred_ttep_pair[2] = {NULL};
1093 tt_entry_t deferred_tte_pair[2] = {0};
1094 vm_offset_t _end = start + size;
1095 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
1096
1097 if (size == 0x0UL) {
1098 return;
1099 }
1100
1101 if (align_start > _end) {
1102 align_start = _end;
1103 }
1104
1105 arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1106
1107 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
1108 if (!(granule & ARM64_GRANULE_ALLOW_BLOCK)) {
1109 arm_vm_page_granular_helper(align_start, align_start + ARM_TT_L2_SIZE, align_start + 1, pa_offset,
1110 pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1111 } else {
1112 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
1113 assert((pa & ARM_TT_L2_OFFMASK) == 0);
1114 tt_entry_t *tte2;
1115 tt_entry_t tmplate;
1116
1117 tte2 = arm_kva_to_tte(align_start);
1118
1119 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
1120 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
1121 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
1122 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
1123 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
1124
1125 #if __ARM_KERNEL_PROTECT__
1126 tmplate = tmplate | ARM_TTE_BLOCK_NG;
1127 #endif /* __ARM_KERNEL_PROTECT__ */
1128 if (tte_prot_XN) {
1129 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
1130 }
1131
1132 update_or_defer_tte(tte2, tmplate, pa, (vm_map_address_t)tte2 & ~ARM_TT_L2_OFFMASK,
1133 granule, deferred_ttep_pair, deferred_tte_pair);
1134 }
1135 }
1136 align_start += ARM_TT_L2_SIZE;
1137 }
1138
1139 if (align_start < _end) {
1140 arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1141 }
1142
1143 if (deferred_ttep_pair[0] != NULL) {
1144 #if DEBUG || DEVELOPMENT
1145 /*
1146 * Flush the TLB to catch bugs that might cause us to prematurely revoke write access from the pagetable page.
1147 * These bugs may otherwise be hidden by TLB entries in most cases, resulting in very rare panics.
1148 * Note that we always flush the TLB at the end of arm_vm_prot_finalize().
1149 */
1150 flush_mmu_tlb();
1151 #endif
1152 /*
1153 * The first TTE in the pair is a recursive mapping of the pagetable region, so we must update it last
1154 * to avoid potentially marking deferred_pte_pair[1] read-only.
1155 */
1156 if (deferred_tte_pair[1] != 0) {
1157 os_atomic_store(deferred_ttep_pair[1], deferred_tte_pair[1], release);
1158 }
1159 os_atomic_store(deferred_ttep_pair[0], deferred_tte_pair[0], release);
1160 }
1161 }
1162
1163 static inline void
arm_vm_page_granular_RNX(vm_offset_t start,unsigned long size,unsigned granule)1164 arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, unsigned granule)
1165 {
1166 arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, granule);
1167 }
1168
1169 static inline void
arm_vm_page_granular_ROX(vm_offset_t start,unsigned long size,unsigned granule)1170 arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, unsigned granule)
1171 {
1172 arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, granule);
1173 }
1174
1175 static inline void
arm_vm_page_granular_RWNX(vm_offset_t start,unsigned long size,unsigned granule)1176 arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, unsigned granule)
1177 {
1178 arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, granule);
1179 }
1180
1181 // Populate seg...AuxKC and fixup AuxKC permissions
1182 static bool
arm_vm_auxkc_init(void)1183 arm_vm_auxkc_init(void)
1184 {
1185 if (auxkc_mh == 0 || auxkc_base == 0) {
1186 return false; // no auxKC.
1187 }
1188
1189 /* Fixup AuxKC and populate seg*AuxKC globals used below */
1190 arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1191
1192 if (segLOWESTAuxKC != segLOWEST) {
1193 panic("segLOWESTAuxKC (%p) not equal to segLOWEST (%p). auxkc_mh: %p, auxkc_base: %p",
1194 (void*)segLOWESTAuxKC, (void*)segLOWEST,
1195 (void*)auxkc_mh, (void*)auxkc_base);
1196 }
1197
1198 /*
1199 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1200 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1201 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1202 * its RO region before extending it.
1203 */
1204 assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1205 assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1206 if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1207 segHIGHESTROAuxKC = segHIGHESTAuxKC;
1208 }
1209
1210 /*
1211 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1212 * by CTRR, and the AuxKC RX region must be within the RO region.
1213 */
1214 assert(segHIGHESTROAuxKC == auxkc_right_above);
1215 assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1216 assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1217 assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1218 assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1219
1220 if (segHIGHESTRXAuxKC < segLOWEST) {
1221 arm_vm_page_granular_RNX(segHIGHESTRXAuxKC, segLOWEST - segHIGHESTRXAuxKC, 0);
1222 }
1223 if (segLOWESTRXAuxKC < segHIGHESTRXAuxKC) {
1224 arm_vm_page_granular_ROX(segLOWESTRXAuxKC, segHIGHESTRXAuxKC - segLOWESTRXAuxKC, 0); // Refined in OSKext::readPrelinkedExtensions
1225 }
1226 if (segLOWESTROAuxKC < segLOWESTRXAuxKC) {
1227 arm_vm_page_granular_RNX(segLOWESTROAuxKC, segLOWESTRXAuxKC - segLOWESTROAuxKC, 0);
1228 }
1229 if (segLOWESTAuxKC < segLOWESTROAuxKC) {
1230 arm_vm_page_granular_RWNX(segLOWESTAuxKC, segLOWESTROAuxKC - segLOWESTAuxKC, 0);
1231 }
1232
1233 return true;
1234 }
1235
1236 void
arm_vm_prot_init(__unused boot_args * args)1237 arm_vm_prot_init(__unused boot_args * args)
1238 {
1239 segLOWESTTEXT = UINT64_MAX;
1240 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1241 segLOWESTTEXT = segPRELINKTEXTB;
1242 }
1243 assert(segSizeTEXT);
1244 if (segTEXTB < segLOWESTTEXT) {
1245 segLOWESTTEXT = segTEXTB;
1246 }
1247 assert(segLOWESTTEXT < UINT64_MAX);
1248
1249 segEXTRADATA = 0;
1250 segSizeEXTRADATA = 0;
1251 segTRUSTCACHE = 0;
1252 segSizeTRUSTCACHE = 0;
1253
1254 segLOWEST = segLOWESTTEXT;
1255 segLOWESTRO = segLOWESTTEXT;
1256
1257 if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1258 /*
1259 * kernel collections have segments below the kernel. In particular the collection mach header
1260 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1261 */
1262 arm_vm_page_granular_RNX(segLOWESTKC, segLOWEST - segLOWESTKC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1263 segLOWEST = segLOWESTKC;
1264 if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1265 segLOWESTRO = segLOWESTROKC;
1266 }
1267 if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1268 segHIGHESTRO = segHIGHESTROKC;
1269 }
1270 }
1271
1272 DTEntry memory_map;
1273 int err;
1274
1275 // Device Tree portion of EXTRADATA
1276 if (SecureDTIsLockedDown()) {
1277 segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1278 segSizeEXTRADATA = PE_state.deviceTreeSize;
1279 }
1280
1281 // Trust Caches portion of EXTRADATA
1282 {
1283 DTMemoryMapRange const *trustCacheRange;
1284 unsigned int trustCacheRangeSize;
1285
1286 err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1287 assert(err == kSuccess);
1288
1289 err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1290 if (err == kSuccess) {
1291 if (trustCacheRangeSize != sizeof(DTMemoryMapRange)) {
1292 panic("Unexpected /chosen/memory-map/TrustCache property size %u != %zu", trustCacheRangeSize, sizeof(DTMemoryMapRange));
1293 }
1294
1295 vm_offset_t const trustCacheRegion = phystokv(trustCacheRange->paddr);
1296 if (trustCacheRegion < segLOWEST) {
1297 if (segEXTRADATA != 0) {
1298 if (trustCacheRegion != segEXTRADATA + segSizeEXTRADATA) {
1299 panic("Unexpected location of TrustCache region: %#lx != %#lx",
1300 trustCacheRegion, segEXTRADATA + segSizeEXTRADATA);
1301 }
1302 segSizeEXTRADATA += trustCacheRange->length;
1303 } else {
1304 // Not all devices support CTRR device trees.
1305 segEXTRADATA = trustCacheRegion;
1306 segSizeEXTRADATA = trustCacheRange->length;
1307 }
1308 }
1309 #if !(DEVELOPMENT || DEBUG)
1310 else {
1311 panic("TrustCache region is in an unexpected place: %#lx > %#lx", trustCacheRegion, segLOWEST);
1312 }
1313 #endif
1314 segTRUSTCACHE = trustCacheRegion;
1315 segSizeTRUSTCACHE = trustCacheRange->length;
1316 }
1317 }
1318
1319 if (segSizeEXTRADATA != 0) {
1320 if (segEXTRADATA <= segLOWEST) {
1321 segLOWEST = segEXTRADATA;
1322 if (segEXTRADATA <= segLOWESTRO) {
1323 segLOWESTRO = segEXTRADATA;
1324 }
1325 } else {
1326 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1327 }
1328
1329 arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1330 }
1331
1332 const DTMemoryMapRange *auxKC_range, *auxKC_header_range;
1333 unsigned int auxKC_range_size, auxKC_header_range_size;
1334
1335 err = SecureDTGetProperty(memory_map, "AuxKC", (const void**)&auxKC_range,
1336 &auxKC_range_size);
1337 if (err != kSuccess) {
1338 goto noAuxKC;
1339 }
1340 assert(auxKC_range_size == sizeof(DTMemoryMapRange));
1341 err = SecureDTGetProperty(memory_map, "AuxKC-mach_header",
1342 (const void**)&auxKC_header_range, &auxKC_header_range_size);
1343 if (err != kSuccess) {
1344 goto noAuxKC;
1345 }
1346 assert(auxKC_header_range_size == sizeof(DTMemoryMapRange));
1347
1348 if (auxKC_header_range->paddr == 0 || auxKC_range->paddr == 0) {
1349 goto noAuxKC;
1350 }
1351
1352 auxkc_mh = phystokv(auxKC_header_range->paddr);
1353 auxkc_base = phystokv(auxKC_range->paddr);
1354
1355 if (auxkc_base < segLOWEST) {
1356 auxkc_right_above = segLOWEST;
1357 segLOWEST = auxkc_base;
1358 } else {
1359 panic("auxkc_base (%p) not below segLOWEST (%p)", (void*)auxkc_base, (void*)segLOWEST);
1360 }
1361
1362 /* Map AuxKC RWNX initially so that arm_vm_auxkc_init can traverse
1363 * it and apply fixups (after we're off the bootstrap translation
1364 * tables).
1365 */
1366 arm_vm_page_granular_RWNX(auxkc_base, auxKC_range->length, 0);
1367
1368 noAuxKC:
1369 /* Map coalesced kext TEXT segment RWNX for now */
1370 arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1371
1372 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1373 arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1374
1375 /* Map coalesced kext TEXT_EXEC segment RX (could be empty) */
1376 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Refined in OSKext::readPrelinkedExtensions
1377
1378 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
1379 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
1380 */
1381 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
1382 if (segSizePRELINKTEXT) {
1383 arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT),
1384 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1385 }
1386 } else {
1387 /*
1388 * If we have the new segments, we should still protect the gap between kext
1389 * read-only pages and kernel read-only pages, in the event that this gap
1390 * exists.
1391 */
1392 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1393 arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST),
1394 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1395 }
1396 }
1397
1398 /*
1399 * Protection on kernel text is loose here to allow shenanigans early on. These
1400 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1401 * we currently patch LowResetVectorBase in cpu.c.
1402 *
1403 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1404 */
1405 arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1406
1407 /* Can DATACONST start out and stay RNX?
1408 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1409 * Make RNX in prot_finalize
1410 */
1411 arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1412
1413 arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1414
1415 #if XNU_MONITOR
1416 arm_vm_page_granular_ROX(segPPLTEXTB, segSizePPLTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1417 arm_vm_page_granular_ROX(segPPLTRAMPB, segSizePPLTRAMP, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1418 arm_vm_page_granular_RNX(segPPLDATACONSTB, segSizePPLDATACONST, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1419 #endif
1420
1421 /* DATA segment will remain RWNX */
1422 arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1423 #if XNU_MONITOR
1424 arm_vm_page_granular_RWNX(segPPLDATAB, segSizePPLDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1425 #endif
1426
1427 arm_vm_page_granular_RWNX(segHIBDATAB, segSizeHIBDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1428
1429 arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, 0);
1430 arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, 0);
1431 arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, 0);
1432 arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, 0);
1433
1434 arm_vm_page_granular_ROX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1435 arm_vm_page_granular_RNX(segKLDDATAB, segSizeKLDDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1436 arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1437 arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
1438 arm_vm_page_granular_ROX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
1439 if (segLASTDATACONSTB) {
1440 arm_vm_page_granular_RWNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // __LASTDATA_CONST may be empty, but we cannot assume this
1441 }
1442 arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Prelink __DATA for kexts (RW data)
1443
1444 if (segSizePLKLLVMCOV > 0) {
1445 arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // LLVM code coverage data
1446 }
1447 arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* PreLinkInfoDictionary */
1448
1449 /* Record the bounds of the kernelcache. */
1450 vm_kernelcache_base = segLOWEST;
1451 vm_kernelcache_top = end_kern;
1452 }
1453
1454 /*
1455 * return < 0 for a < b
1456 * 0 for a == b
1457 * > 0 for a > b
1458 */
1459 typedef int (*cmpfunc_t)(const void *a, const void *b);
1460
1461 extern void
1462 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1463
1464 static int
cmp_ptov_entries(const void * a,const void * b)1465 cmp_ptov_entries(const void *a, const void *b)
1466 {
1467 const ptov_table_entry *entry_a = a;
1468 const ptov_table_entry *entry_b = b;
1469 // Sort in descending order of segment length
1470 if (entry_a->len < entry_b->len) {
1471 return 1;
1472 } else if (entry_a->len > entry_b->len) {
1473 return -1;
1474 } else {
1475 return 0;
1476 }
1477 }
1478
1479 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1480
1481 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1482 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1483
1484 static void
arm_vm_physmap_slide(ptov_table_entry * temp_ptov_table,vm_map_address_t orig_va,vm_size_t len,int pte_prot_APX,unsigned granule)1485 arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, unsigned granule)
1486 {
1487 pmap_paddr_t pa_offset;
1488
1489 if (__improbable(ptov_index >= PTOV_TABLE_SIZE)) {
1490 panic("%s: PTOV table limit exceeded; segment va = 0x%llx, size = 0x%llx", __func__,
1491 (unsigned long long)orig_va, (unsigned long long)len);
1492 }
1493 assert((orig_va & ARM_PGMASK) == 0);
1494 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1495 if (ptov_index == 0) {
1496 temp_ptov_table[ptov_index].va = physmap_base;
1497 } else {
1498 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1499 }
1500 if (granule & ARM64_GRANULE_ALLOW_BLOCK) {
1501 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1502 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1503 if (new_offset < orig_offset) {
1504 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1505 } else if (new_offset > orig_offset) {
1506 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1507 }
1508 }
1509 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1510 temp_ptov_table[ptov_index].len = round_page(len);
1511 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1512 arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, granule);
1513 ++ptov_index;
1514 }
1515
1516 #if XNU_MONITOR
1517
1518 SECURITY_READ_ONLY_LATE(static boolean_t) keep_linkedit = FALSE;
1519
1520 static void
arm_vm_physmap_init(boot_args * args)1521 arm_vm_physmap_init(boot_args *args)
1522 {
1523 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1524 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1525
1526 // This is memory that will either be handed back to the VM layer via ml_static_mfree(),
1527 // or will be available for general-purpose use. Physical aperture mappings for this memory
1528 // must be at page granularity, so that PPL ownership or cache attribute changes can be reflected
1529 // in the physical aperture mappings.
1530
1531 // Slid region between gPhysBase and beginning of protected text
1532 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
1533
1534 // kext bootstrap segments
1535 #if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1536 /* __KLD,__text is covered by the rorgn */
1537 arm_vm_physmap_slide(temp_ptov_table, segKLDB, segSizeKLD, AP_RONA, 0);
1538 #endif
1539 arm_vm_physmap_slide(temp_ptov_table, segKLDDATAB, segSizeKLDDATA, AP_RONA, 0);
1540
1541 // Early-boot data
1542 arm_vm_physmap_slide(temp_ptov_table, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
1543
1544 PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1545 #if CONFIG_DTRACE
1546 if (dtrace_keep_kernel_symbols()) {
1547 keep_linkedit = TRUE;
1548 }
1549 #endif /* CONFIG_DTRACE */
1550 #if KASAN_DYNAMIC_BLACKLIST
1551 /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
1552 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1553 keep_linkedit = TRUE;
1554 #endif
1555 if (!keep_linkedit) {
1556 // Kernel LINKEDIT
1557 arm_vm_physmap_slide(temp_ptov_table, segLINKB, segSizeLINK, AP_RWNA, 0);
1558
1559 if (segSizePLKLINKEDIT) {
1560 // Prelinked kernel LINKEDIT
1561 arm_vm_physmap_slide(temp_ptov_table, segPLKLINKEDITB, segSizePLKLINKEDIT, AP_RWNA, 0);
1562 }
1563 }
1564
1565 // Prelinked kernel plists
1566 arm_vm_physmap_slide(temp_ptov_table, segPRELINKINFOB, segSizePRELINKINFO, AP_RWNA, 0);
1567
1568 // Device tree (if not locked down), ramdisk, boot args
1569 arm_vm_physmap_slide(temp_ptov_table, end_kern, (args->topOfKernelData - gPhysBase + gVirtBase) - end_kern, AP_RWNA, 0);
1570 if (!SecureDTIsLockedDown()) {
1571 PE_slide_devicetree(temp_ptov_table[ptov_index - 1].va - end_kern);
1572 }
1573
1574 // Remainder of physical memory
1575 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1576 real_avail_end - args->topOfKernelData, AP_RWNA, 0);
1577
1578 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1579
1580 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1581 // segments should be placed earliest in the table to optimize lookup performance.
1582 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1583
1584 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1585 }
1586
1587 #else
1588
1589 static void
arm_vm_physmap_init(boot_args * args)1590 arm_vm_physmap_init(boot_args *args)
1591 {
1592 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1593 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1594
1595 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1596 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA,
1597 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1598
1599 arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern,
1600 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* Device Tree (if not locked down), RAM Disk (if present), bootArgs */
1601
1602 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1603 real_avail_end - args->topOfKernelData, AP_RWNA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // rest of physmem
1604
1605 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1606
1607 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1608 // segments should be placed earliest in the table to optimize lookup performance.
1609 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1610
1611 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1612 }
1613
1614 #endif // XNU_MONITOR
1615
1616 void
arm_vm_prot_finalize(boot_args * args __unused)1617 arm_vm_prot_finalize(boot_args * args __unused)
1618 {
1619 /*
1620 * At this point, we are far enough along in the boot process that it will be
1621 * safe to free up all of the memory preceeding the kernel. It may in fact
1622 * be safe to do this earlier.
1623 *
1624 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1625 * as usable.
1626 */
1627
1628 /*
1629 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1630 * otherwise we're dealing with a new style kernel cache, so we should just free the
1631 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1632 * should be immediately followed by XNU's TEXT segment
1633 */
1634
1635 ml_static_mfree(phystokv(gPhysBase), segLOWEST - gVirtBase);
1636
1637 /*
1638 * KTRR support means we will be mucking with these pages and trying to
1639 * protect them; we cannot free the pages to the VM if we do this.
1640 */
1641 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1642 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1643 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1644 }
1645
1646 /* tighten permissions on kext read only data and code */
1647 arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK);
1648 arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1649
1650 cpu_stack_alloc(&BootCpuData);
1651 arm64_replace_bootstack(&BootCpuData);
1652 ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1653
1654 #if __ARM_KERNEL_PROTECT__
1655 arm_vm_populate_kernel_el0_mappings();
1656 #endif /* __ARM_KERNEL_PROTECT__ */
1657
1658 #if XNU_MONITOR
1659 #if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1660 /* __KLD,__text is covered by the rorgn */
1661 for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
1662 pt_entry_t *pte = arm_kva_to_pte(va);
1663 *pte = ARM_PTE_EMPTY;
1664 }
1665 #endif
1666 for (vm_offset_t va = segKLDDATAB; va < (segKLDDATAB + segSizeKLDDATA); va += ARM_PGBYTES) {
1667 pt_entry_t *pte = arm_kva_to_pte(va);
1668 *pte = ARM_PTE_EMPTY;
1669 }
1670 /* Clear the original stack mappings; these pages should be mapped through ptov_table. */
1671 for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
1672 pt_entry_t *pte = arm_kva_to_pte(va);
1673 *pte = ARM_PTE_EMPTY;
1674 }
1675 /* Clear the original PRELINKINFO mapping. This segment should be jettisoned during I/O Kit
1676 * initialization before we reach this point. */
1677 for (vm_offset_t va = segPRELINKINFOB; va < (segPRELINKINFOB + segSizePRELINKINFO); va += ARM_PGBYTES) {
1678 pt_entry_t *pte = arm_kva_to_pte(va);
1679 *pte = ARM_PTE_EMPTY;
1680 }
1681 if (!keep_linkedit) {
1682 for (vm_offset_t va = segLINKB; va < (segLINKB + segSizeLINK); va += ARM_PGBYTES) {
1683 pt_entry_t *pte = arm_kva_to_pte(va);
1684 *pte = ARM_PTE_EMPTY;
1685 }
1686 if (segSizePLKLINKEDIT) {
1687 for (vm_offset_t va = segPLKLINKEDITB; va < (segPLKLINKEDITB + segSizePLKLINKEDIT); va += ARM_PGBYTES) {
1688 pt_entry_t *pte = arm_kva_to_pte(va);
1689 *pte = ARM_PTE_EMPTY;
1690 }
1691 }
1692 }
1693 #endif /* XNU_MONITOR */
1694
1695 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1696 /*
1697 * __LAST,__pinst should no longer be executable.
1698 */
1699 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK);
1700
1701 /* __LASTDATA_CONST should no longer be writable. */
1702 if (segLASTDATACONSTB) {
1703 arm_vm_page_granular_RNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1704 }
1705
1706 /*
1707 * __KLD,__text should no longer be executable.
1708 */
1709 arm_vm_page_granular_RNX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK);
1710
1711 /*
1712 * Must wait until all other region permissions are set before locking down DATA_CONST
1713 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1714 * and will become immutable.
1715 */
1716 #endif
1717
1718 arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1719
1720 __builtin_arm_dsb(DSB_ISH);
1721 flush_mmu_tlb();
1722 }
1723
1724 /*
1725 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1726 * address accesses. It can be enabled separately for TTBR0 (user) and
1727 * TTBR1 (kernel).
1728 */
1729 void
arm_set_kernel_tbi(void)1730 arm_set_kernel_tbi(void)
1731 {
1732 #if !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI
1733 uint64_t old_tcr, new_tcr;
1734
1735 old_tcr = new_tcr = get_tcr();
1736 /*
1737 * For kernel configurations that require TBI support on
1738 * PAC systems, we enable DATA TBI only.
1739 */
1740 new_tcr |= TCR_TBI1_TOPBYTE_IGNORED;
1741 new_tcr |= TCR_TBID1_ENABLE;
1742
1743 if (old_tcr != new_tcr) {
1744 set_tcr(new_tcr);
1745 sysreg_restore.tcr_el1 = new_tcr;
1746 }
1747 #endif /* !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI */
1748 }
1749
1750 static void
arm_set_user_tbi(void)1751 arm_set_user_tbi(void)
1752 {
1753 #if !__ARM_KERNEL_PROTECT__
1754 uint64_t old_tcr, new_tcr;
1755
1756 old_tcr = new_tcr = get_tcr();
1757 new_tcr |= TCR_TBI0_TOPBYTE_IGNORED;
1758
1759 if (old_tcr != new_tcr) {
1760 set_tcr(new_tcr);
1761 sysreg_restore.tcr_el1 = new_tcr;
1762 }
1763 #endif /* !__ARM_KERNEL_PROTECT__ */
1764 }
1765
1766 /*
1767 * Initialize and enter blank (invalid) page tables in a L1 translation table for a given VA range.
1768 *
1769 * This is a helper function used to build up the initial page tables for the kernel translation table.
1770 * With KERNEL_INTEGRITY we keep at least the root level of the kernel page table immutable, thus the need
1771 * to preallocate before machine_lockdown any L1 entries necessary during the entire kernel runtime.
1772 *
1773 * For a given VA range, if necessary, allocate new L2 translation tables and install the table entries in
1774 * the appropriate L1 table indexes. called before the translation table is active
1775 *
1776 * parameters:
1777 *
1778 * tt: virtual address of L1 translation table to modify
1779 * start: beginning of VA range
1780 * end: end of VA range
1781 * static_map: whether to allocate the new translation table page from read only memory
1782 * table_attrs: attributes of new table entry in addition to VALID and TYPE_TABLE attributes
1783 *
1784 */
1785
1786 static void
init_ptpages(tt_entry_t * tt,vm_map_address_t start,vm_map_address_t end,bool static_map,uint64_t table_attrs)1787 init_ptpages(tt_entry_t *tt, vm_map_address_t start, vm_map_address_t end, bool static_map, uint64_t table_attrs)
1788 {
1789 tt_entry_t *l1_tte;
1790 vm_offset_t ptpage_vaddr;
1791
1792 l1_tte = tt + ((start & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1793
1794 while (start < end) {
1795 if (*l1_tte == ARM_TTE_EMPTY) {
1796 /* Allocate a page and setup L1 Table TTE in L1 */
1797 ptpage_vaddr = alloc_ptpage(static_map);
1798 *l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | table_attrs;
1799 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1800 }
1801
1802 if ((start + ARM_TT_L1_SIZE) < start) {
1803 /* If this is the last L1 entry, it must cover the last mapping. */
1804 break;
1805 }
1806
1807 start += ARM_TT_L1_SIZE;
1808 l1_tte++;
1809 }
1810 }
1811
1812 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1813 #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1814
1815 void
arm_vm_init(uint64_t memory_size,boot_args * args)1816 arm_vm_init(uint64_t memory_size, boot_args * args)
1817 {
1818 vm_map_address_t va_l1, va_l1_end;
1819 tt_entry_t *cpu_l1_tte;
1820 vm_map_address_t va_l2, va_l2_end;
1821 tt_entry_t *cpu_l2_tte;
1822 pmap_paddr_t boot_ttep;
1823 tt_entry_t *boot_tte;
1824 uint64_t mem_segments;
1825 vm_offset_t ptpage_vaddr;
1826 vm_map_address_t dynamic_memory_begin;
1827
1828 /*
1829 * Get the virtual and physical kernel-managed memory base from boot_args.
1830 */
1831 gVirtBase = args->virtBase;
1832 gPhysBase = args->physBase;
1833 #if KASAN
1834 real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1835 #else
1836 real_phys_size = args->memSize;
1837 #endif
1838 /*
1839 * Ensure the physical region we specify for the VM to manage ends on a
1840 * software page boundary. Note that the software page size (PAGE_SIZE)
1841 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1842 * We must round the reported memory size down to the nearest PAGE_SIZE
1843 * boundary to ensure the VM does not try to manage a page it does not
1844 * completely own. The KASAN shadow region, if present, is managed entirely
1845 * in units of the hardware page size and should not need similar treatment.
1846 */
1847 gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1848
1849 mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1850
1851 if ((memory_size != 0) && (mem_size > memory_size)) {
1852 mem_size = memory_size;
1853 max_mem_actual = memory_size;
1854 } else {
1855 max_mem_actual = mem_actual;
1856 }
1857 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1858 panic("Unsupported memory configuration %lx", mem_size);
1859 }
1860
1861 #if defined(ARM_LARGE_MEMORY)
1862 unsigned long physmap_l1_entries = ((real_phys_size + ARM64_PHYSMAP_SLIDE_RANGE) >> ARM_TT_L1_SHIFT) + 1;
1863 physmap_base = VM_MIN_KERNEL_ADDRESS - (physmap_l1_entries << ARM_TT_L1_SHIFT);
1864 #else
1865 physmap_base = phystokv(args->topOfKernelData);
1866 #endif
1867
1868 // Slide the physical aperture to a random page-aligned location within the slide range
1869 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1870 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1871
1872 physmap_base += physmap_slide;
1873
1874 #if XNU_MONITOR
1875 physmap_base = ROUND_TWIG(physmap_base);
1876 #if defined(ARM_LARGE_MEMORY)
1877 static_memory_end = phystokv(args->topOfKernelData);
1878 #else
1879 static_memory_end = physmap_base + mem_size;
1880 #endif // ARM_LARGE_MEMORY
1881 physmap_end = physmap_base + real_phys_size;
1882 #else
1883 #if defined(ARM_LARGE_MEMORY)
1884 /* For large memory systems with no PPL such as virtual machines */
1885 static_memory_end = phystokv(args->topOfKernelData);
1886 physmap_end = physmap_base + real_phys_size;
1887 #else
1888 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1889 physmap_end = physmap_base + real_phys_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE);
1890 #endif // ARM_LARGE_MEMORY
1891 #endif
1892
1893 #if KASAN && !defined(ARM_LARGE_MEMORY)
1894 /* add the KASAN stolen memory to the physmap */
1895 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1896 #else
1897 dynamic_memory_begin = static_memory_end;
1898 #endif
1899 #if XNU_MONITOR
1900 pmap_stacks_start = (void*)dynamic_memory_begin;
1901 dynamic_memory_begin += PPL_STACK_REGION_SIZE;
1902 pmap_stacks_end = (void*)dynamic_memory_begin;
1903
1904 #if HAS_GUARDED_IO_FILTER
1905 iofilter_stacks_start = (void*)dynamic_memory_begin;
1906 dynamic_memory_begin += IOFILTER_STACK_REGION_SIZE;
1907 iofilter_stacks_end = (void*)dynamic_memory_begin;
1908 #endif
1909 #endif
1910 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1911 panic("Unsupported memory configuration %lx", mem_size);
1912 }
1913
1914 boot_tte = (tt_entry_t *)&bootstrap_pagetables;
1915 boot_ttep = kvtophys((vm_offset_t)boot_tte);
1916
1917 #if DEVELOPMENT || DEBUG
1918 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1919 * hold our bootstrap mappings for any possible slide */
1920 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1921 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1922 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1923 size_t pages_used = 2 * (l1_entries + 1);
1924 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1925 panic("BOOTSTRAP_TABLE_SIZE too small for memory config");
1926 }
1927 #endif
1928
1929 /*
1930 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1931 * TTBR1 L1, TTBR1 L2 - kernel mapping
1932 */
1933
1934 /*
1935 * TODO: free bootstrap table memory back to allocator.
1936 * on large memory systems bootstrap tables could be quite large.
1937 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1938 * to trampoline to KVA. this requires only 3 pages to stay resident.
1939 */
1940 avail_start = args->topOfKernelData;
1941
1942 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1943 arm_replace_identity_map();
1944 #endif
1945
1946 /* Initialize invalid tte page */
1947 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1948 invalid_ttep = kvtophys((vm_offset_t)invalid_tte);
1949 bzero(invalid_tte, ARM_PGBYTES);
1950
1951 /*
1952 * Initialize l1 page table page
1953 */
1954 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1955 cpu_ttep = kvtophys((vm_offset_t)cpu_tte);
1956 bzero(cpu_tte, ARM_PGBYTES);
1957 avail_end = gPhysBase + mem_size;
1958 assert(!(avail_end & PAGE_MASK));
1959
1960 #if KASAN
1961 real_avail_end = gPhysBase + real_phys_size;
1962 #else
1963 real_avail_end = avail_end;
1964 #endif
1965
1966 /*
1967 * Initialize l1 and l2 page table pages :
1968 * map physical memory at the kernel base virtual address
1969 * cover the kernel dynamic address range section
1970 *
1971 * the so called physical aperture should be statically mapped
1972 */
1973 init_ptpages(cpu_tte, gVirtBase, dynamic_memory_begin, TRUE, ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1974
1975 #if defined(ARM_LARGE_MEMORY)
1976 /*
1977 * Initialize l1 page table pages :
1978 * on large memory systems the physical aperture exists separately below
1979 * the rest of the kernel virtual address space
1980 */
1981 init_ptpages(cpu_tte, physmap_base, ROUND_L1(physmap_end), TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1982 #endif
1983
1984
1985 #if __ARM_KERNEL_PROTECT__
1986 /* Expand the page tables to prepare for the EL0 mappings. */
1987 arm_vm_expand_kernel_el0_mappings();
1988 #endif /* __ARM_KERNEL_PROTECT__ */
1989
1990 /*
1991 * Now retrieve addresses for various segments from kernel mach-o header
1992 */
1993 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1994 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1995 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1996 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1997 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1998 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1999 #if XNU_MONITOR
2000 segPPLTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTEXT", &segSizePPLTEXT);
2001 segPPLTRAMPB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTRAMP", &segSizePPLTRAMP);
2002 segPPLDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA_CONST", &segSizePPLDATACONST);
2003 #endif
2004 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
2005 #if XNU_MONITOR
2006 segPPLDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA", &segSizePPLDATA);
2007 #endif
2008
2009 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
2010 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
2011 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
2012 segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
2013 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
2014 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
2015 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
2016 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
2017 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
2018 segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
2019
2020 sectHIBTEXTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", §SizeHIBTEXT);
2021 sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", §SizeHIBDATACONST);
2022 segHIBDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
2023
2024 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2025 kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
2026
2027 // fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
2028 segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
2029 assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
2030
2031 assert(segLASTB); // kernel LAST can be empty, but it must have
2032 // a valid address for computations below.
2033
2034 assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
2035 assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
2036 segPLKTEXTEXECB = segLASTB + segSizeLAST;
2037 segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
2038
2039 // fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
2040 segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
2041 assert(segPLKDATACONSTB && !segSizePLKDATACONST); // kernel PLK_DATA_CONST must be empty
2042 assert(segLASTDATACONSTB && segSizeLASTDATACONST); // kernel LASTDATA_CONST must be non-empty
2043 assert(segKCDATACONSTB <= segLASTDATACONSTB); // KC DATA_CONST must contain kernel LASTDATA_CONST
2044 assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
2045 segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
2046 segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
2047
2048 // fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
2049 segKCDATAB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
2050 assert(segPRELINKDATAB && !segSizePRELINKDATA); // kernel PRELINK_DATA must be empty
2051 assert(segKCDATAB <= segPRELINKDATAB); // KC DATA must contain kernel PRELINK_DATA
2052 assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
2053 segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
2054
2055 // fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
2056 assert(segPRELINKTEXTB && !segSizePRELINKTEXT); // kernel PRELINK_TEXT must be empty
2057 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
2058 assert(segPRELINKINFOB && !segSizePRELINKINFO); // kernel PRELINK_INFO must be empty
2059 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
2060 segLINKB = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
2061 }
2062
2063 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint, sizeof(use_contiguous_hint));
2064 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
2065
2066 /* if one of the new segments is present, the other one better be as well */
2067 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
2068 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
2069 }
2070
2071 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
2072 sdata = (vm_offset_t) segDATAB;
2073 edata = (vm_offset_t) segDATAB + segSizeDATA;
2074 end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastkerneladdr()); /* Force end to next page */
2075
2076 vm_set_page_size();
2077
2078 vm_kernel_base = segTEXTB;
2079 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
2080 vm_kext_base = segPRELINKTEXTB;
2081 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
2082
2083 vm_prelink_stext = segPRELINKTEXTB;
2084 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
2085 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
2086 } else {
2087 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
2088 }
2089 vm_prelink_sinfo = segPRELINKINFOB;
2090 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
2091 vm_slinkedit = segLINKB;
2092 vm_elinkedit = segLINKB + segSizeLINK;
2093
2094 vm_prelink_sdata = segPRELINKDATAB;
2095 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
2096
2097 arm_vm_prot_init(args);
2098
2099 /*
2100 * Initialize the page tables for the low globals:
2101 * cover this address range:
2102 * LOW_GLOBAL_BASE_ADDRESS + 2MB
2103 */
2104 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
2105 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2106 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2107 ptpage_vaddr = alloc_ptpage(TRUE);
2108 *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
2109 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
2110
2111 /*
2112 * Initialize l2 page table pages :
2113 * cover this address range:
2114 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
2115 */
2116 #if defined(ARM_LARGE_MEMORY)
2117 /*
2118 * dynamic mapped memory outside the VM allocator VA range required to bootstrap VM system
2119 * don't expect to exceed 64GB, no sense mapping any more space between here and the VM heap range
2120 */
2121 init_ptpages(cpu_tte, dynamic_memory_begin, ROUND_L1(dynamic_memory_begin), FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2122 #else
2123 /*
2124 * TODO: do these pages really need to come from RO memory?
2125 * With legacy 3 level table systems we never mapped more than a single L1 entry so this may be dead code
2126 */
2127 init_ptpages(cpu_tte, dynamic_memory_begin, VM_MAX_KERNEL_ADDRESS, TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2128 #endif
2129
2130 #if KASAN
2131 /* record the extent of the physmap */
2132 physmap_vbase = physmap_base;
2133 physmap_vtop = physmap_end;
2134 kasan_init();
2135 #endif /* KASAN */
2136
2137 #if MONOTONIC
2138 mt_early_init();
2139 #endif /* MONOTONIC */
2140
2141 arm_set_user_tbi();
2142
2143 arm_vm_physmap_init(args);
2144 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
2145
2146 ml_enable_monitor();
2147
2148 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
2149
2150 flush_mmu_tlb();
2151 kva_active = TRUE;
2152 // global table pointers may need to be different due to physical aperture remapping
2153 cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
2154 invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep));
2155
2156 // From here on out, we're off the bootstrap translation tables.
2157
2158
2159 /* AuxKC initialization has to be deferred until this point, since
2160 * the AuxKC may not have been fully mapped in the bootstrap
2161 * tables, if it spilled downwards into the prior L2 block.
2162 *
2163 * Now that its mapping set up by arm_vm_prot_init() is active,
2164 * we can traverse and fix it up.
2165 */
2166
2167 /* Calculate the physical bounds of the kernelcache; using
2168 * gVirtBase/gPhysBase math to do this directly is generally a bad idea
2169 * as the physmap is no longer physically contiguous. However, this is
2170 * done here as segLOWEST and end_kern are both virtual addresses the
2171 * bootstrap physmap, and because kvtophys references the page tables
2172 * (at least at the time this comment was written), meaning that at
2173 * least end_kern may not point to a valid mapping on some kernelcache
2174 * configurations, so kvtophys would report a physical address of 0.
2175 *
2176 * Long term, the kernelcache should probably be described in terms of
2177 * multiple physical ranges, as there is no strong guarantee or
2178 * requirement that the kernelcache will always be physically
2179 * contiguous.
2180 */
2181 arm_vm_kernelcache_phys_start = segLOWEST - gVirtBase + gPhysBase;
2182 arm_vm_kernelcache_phys_end = end_kern - gVirtBase + gPhysBase;;
2183
2184 /* Calculate the number of pages that belong to the kernelcache. */
2185 vm_page_kernelcache_count = (unsigned int) (atop_64(arm_vm_kernelcache_phys_end - arm_vm_kernelcache_phys_start));
2186
2187 if (arm_vm_auxkc_init()) {
2188 if (segLOWESTROAuxKC < segLOWESTRO) {
2189 segLOWESTRO = segLOWESTROAuxKC;
2190 }
2191 if (segHIGHESTROAuxKC > segHIGHESTRO) {
2192 segHIGHESTRO = segHIGHESTROAuxKC;
2193 }
2194 if (segLOWESTRXAuxKC < segLOWESTTEXT) {
2195 segLOWESTTEXT = segLOWESTRXAuxKC;
2196 }
2197 assert(segLOWEST == segLOWESTAuxKC);
2198
2199 // The preliminary auxKC mapping has been broken up.
2200 flush_mmu_tlb();
2201 }
2202
2203 sane_size = mem_size - (avail_start - gPhysBase);
2204 max_mem = mem_size;
2205 vm_kernel_slid_base = segLOWESTTEXT;
2206 // vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
2207 vm_kernel_stext = segTEXTB;
2208
2209 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2210 vm_kernel_etext = segTEXTEXECB + segSizeTEXTEXEC;
2211 vm_kernel_slid_top = vm_slinkedit;
2212 } else {
2213 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
2214 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
2215 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
2216 vm_kernel_slid_top = vm_prelink_einfo;
2217 }
2218
2219 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
2220 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2221 // reserve a 32MB region without permission overrides to use later for a CTRR unit test
2222 {
2223 extern vm_offset_t ctrr_test_page;
2224 tt_entry_t *new_tte;
2225
2226 ctrr_test_page = dynamic_memory_begin;
2227 dynamic_memory_begin += ARM_TT_L2_SIZE;
2228 cpu_l1_tte = cpu_tte + ((ctrr_test_page & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2229 assert((*cpu_l1_tte) & ARM_TTE_VALID);
2230 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((ctrr_test_page & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2231 assert((*cpu_l2_tte) == ARM_TTE_EMPTY);
2232 new_tte = (tt_entry_t *)alloc_ptpage(FALSE);
2233 bzero(new_tte, ARM_PGBYTES);
2234 *cpu_l2_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
2235 }
2236 #endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
2237 #if XNU_MONITOR
2238 for (vm_offset_t cur = (vm_offset_t)pmap_stacks_start; cur < (vm_offset_t)pmap_stacks_end; cur += ARM_PGBYTES) {
2239 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2240 }
2241 #if HAS_GUARDED_IO_FILTER
2242 for (vm_offset_t cur = (vm_offset_t)iofilter_stacks_start; cur < (vm_offset_t)iofilter_stacks_end; cur += ARM_PGBYTES) {
2243 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2244 }
2245 #endif
2246 #endif
2247 pmap_bootstrap(dynamic_memory_begin);
2248
2249 disable_preemption();
2250
2251 /*
2252 * Initialize l3 page table pages :
2253 * cover this address range:
2254 * 2MB + FrameBuffer size + 10MB for each 256MB segment
2255 */
2256
2257 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
2258
2259 va_l1 = dynamic_memory_begin;
2260 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
2261 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
2262 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
2263
2264 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2265
2266 while (va_l1 < va_l1_end) {
2267 va_l2 = va_l1;
2268
2269 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2270 /* If this is the last L1 entry, it must cover the last mapping. */
2271 va_l2_end = va_l1_end;
2272 } else {
2273 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2274 }
2275
2276 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2277
2278 while (va_l2 < va_l2_end) {
2279 pt_entry_t * ptp;
2280 pmap_paddr_t ptp_phys;
2281
2282 /* Allocate a page and setup L3 Table TTE in L2 */
2283 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2284 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2285
2286 bzero(ptp, ARM_PGBYTES);
2287 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2288
2289 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2290
2291 va_l2 += ARM_TT_L2_SIZE;
2292 cpu_l2_tte++;
2293 }
2294
2295 va_l1 = va_l2_end;
2296 cpu_l1_tte++;
2297 }
2298
2299 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2300 /*
2301 * In this configuration, the bootstrap mappings (arm_vm_init) and
2302 * the heap mappings occupy separate L1 regions. Explicitly set up
2303 * the heap L1 allocations here.
2304 */
2305 #if defined(ARM_LARGE_MEMORY)
2306 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2307 #else // defined(ARM_LARGE_MEMORY)
2308 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
2309 init_ptpages(cpu_tte, VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2310 #endif // defined(ARM_LARGE_MEMORY)
2311 #else
2312 #if defined(ARM_LARGE_MEMORY)
2313 /* For large memory systems with no KTRR/CTRR such as virtual machines */
2314 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2315 #endif
2316 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2317
2318 /*
2319 * Initialize l3 page table pages :
2320 * cover this address range:
2321 * ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
2322 */
2323 va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
2324 va_l1_end = VM_MAX_KERNEL_ADDRESS;
2325
2326 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2327
2328 while (va_l1 < va_l1_end) {
2329 va_l2 = va_l1;
2330
2331 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2332 /* If this is the last L1 entry, it must cover the last mapping. */
2333 va_l2_end = va_l1_end;
2334 } else {
2335 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2336 }
2337
2338 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2339
2340 while (va_l2 < va_l2_end) {
2341 pt_entry_t * ptp;
2342 pmap_paddr_t ptp_phys;
2343
2344 /* Allocate a page and setup L3 Table TTE in L2 */
2345 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2346 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2347
2348 bzero(ptp, ARM_PGBYTES);
2349 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2350
2351 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2352
2353 va_l2 += ARM_TT_L2_SIZE;
2354 cpu_l2_tte++;
2355 }
2356
2357 va_l1 = va_l2_end;
2358 cpu_l1_tte++;
2359 }
2360
2361
2362 /*
2363 * Adjust avail_start so that the range that the VM owns
2364 * starts on a PAGE_SIZE aligned boundary.
2365 */
2366 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
2367
2368 #if XNU_MONITOR
2369 pmap_static_allocations_done();
2370 #endif
2371 first_avail = avail_start;
2372 patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
2373 enable_preemption();
2374 }
2375
2376 /*
2377 * Returns true if the address is within __TEXT, __TEXT_EXEC or __DATA_CONST
2378 * segment range. This is what [vm_kernel_stext, vm_kernel_etext) range used to
2379 * cover. The segments together may not be continuous anymore and so individual
2380 * intervals are inspected.
2381 */
2382 bool
kernel_text_contains(vm_offset_t addr)2383 kernel_text_contains(vm_offset_t addr)
2384 {
2385 if (segTEXTB <= addr && addr < (segTEXTB + segSizeTEXT)) {
2386 return true;
2387 }
2388 if (segTEXTEXECB <= addr && addr < (segTEXTEXECB + segSizeTEXTEXEC)) {
2389 return true;
2390 }
2391 return segDATACONSTB <= addr && addr < (segDATACONSTB + segSizeDATACONST);
2392 }
2393