1 /*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_debug.h>
30 #include <mach_kdp.h>
31 #include <debug.h>
32
33 #include <kern/assert.h>
34 #include <kern/misc_protos.h>
35 #include <kern/monotonic.h>
36 #include <mach/vm_types.h>
37 #include <mach/vm_param.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_page.h>
40 #include <vm/pmap.h>
41
42 #include <machine/atomic.h>
43 #include <arm64/proc_reg.h>
44 #include <arm64/lowglobals.h>
45 #include <arm/cpu_data_internal.h>
46 #include <arm/misc_protos.h>
47 #include <pexpert/arm64/boot.h>
48 #include <pexpert/device_tree.h>
49
50 #include <libkern/kernel_mach_header.h>
51 #include <libkern/section_keywords.h>
52
53 #include <san/kasan.h>
54
55 #if __ARM_KERNEL_PROTECT__
56 /*
57 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
58 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
59 * range covered by TTBR1 in half). This must also cover the exception vectors.
60 */
61 static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
62
63 /* The exception vectors and the kernel cannot share root TTEs. */
64 static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
65
66 /*
67 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
68 * the exception vectors.
69 */
70 static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
71 #endif /* __ARM_KERNEL_PROTECT__ */
72
73 #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
74
75 #if KASAN
76 extern vm_offset_t shadow_pbase;
77 extern vm_offset_t shadow_ptop;
78 extern vm_offset_t physmap_vbase;
79 extern vm_offset_t physmap_vtop;
80 #endif
81
82 /*
83 * We explicitly place this in const, as it is not const from a language
84 * perspective, but it is only modified before we actually switch away from
85 * the bootstrap page tables.
86 */
87 SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
88
89 /*
90 * Denotes the end of xnu.
91 */
92 extern void *last_kernel_symbol;
93
94 extern void arm64_replace_bootstack(cpu_data_t*);
95 extern void PE_slide_devicetree(vm_offset_t);
96
97 /*
98 * KASLR parameters
99 */
100 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
101 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
102 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
103 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
104 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
105 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
106 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
107 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
108 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
109
110 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
111 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
112 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
113 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
114 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
115 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
116 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
117 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
118
119 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
120 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
121
122 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
123 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
124
125 /* Used by <mach/arm/vm_param.h> */
126 SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
127 SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
128 SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
129 SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
130 SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
131
132 /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
133 * all kexts before the kernel. This is only for arm64 devices and looks
134 * something like the following:
135 * -- vmaddr order --
136 * 0xffffff8004004000 __PRELINK_TEXT
137 * 0xffffff8007004000 __TEXT (xnu)
138 * 0xffffff80075ec000 __DATA (xnu)
139 * 0xffffff80076dc000 __KLD (xnu)
140 * 0xffffff80076e0000 __LAST (xnu)
141 * 0xffffff80076e4000 __LINKEDIT (xnu)
142 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
143 * 0xffffff800782c000 __PRELINK_INFO
144 * 0xffffff80078e4000 -- End of kernelcache
145 */
146
147 /* 24921709 - make XNU ready for KTRR
148 *
149 * Two possible kernel cache layouts, depending on which kcgen is being used.
150 * VAs increasing downwards.
151 * Old KCGEN:
152 *
153 * __PRELINK_TEXT
154 * __TEXT
155 * __DATA_CONST
156 * __TEXT_EXEC
157 * __KLD
158 * __LAST
159 * __DATA
160 * __PRELINK_DATA (expected empty)
161 * __LINKEDIT
162 * __PRELINK_INFO
163 *
164 * New kcgen:
165 *
166 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
167 * __PLK_DATA_CONST
168 * __PLK_TEXT_EXEC
169 * __TEXT
170 * __DATA_CONST
171 * __TEXT_EXEC
172 * __KLD
173 * __LAST <--- Last KTRR (ReadOnly) segment
174 * __DATA
175 * __BOOTDATA (if present)
176 * __LINKEDIT
177 * __PRELINK_DATA (expected populated now)
178 * __PLK_LINKEDIT
179 * __PRELINK_INFO
180 *
181 */
182
183 vm_offset_t mem_size; /* Size of actual physical memory present
184 * minus any performance buffer and possibly
185 * limited by mem_limit in bytes */
186 uint64_t mem_actual; /* The "One True" physical memory size
187 * actually, it's the highest physical
188 * address + 1 */
189 uint64_t max_mem; /* Size of physical memory (bytes), adjusted
190 * by maxmem */
191 uint64_t max_mem_actual; /* Actual size of physical memory (bytes),
192 * adjusted by the maxmem boot-arg */
193 uint64_t sane_size; /* Memory size to use for defaults
194 * calculations */
195 /* This no longer appears to be used; kill it? */
196 addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
197 * virtual address known
198 * to the VM system */
199
200 SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
201 SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
202
203 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
204 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
205 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRO;
206 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRO;
207
208 /* Only set when booted from MH_FILESET kernel collections */
209 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTKC;
210 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTKC;
211 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROKC;
212 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROKC;
213 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTAuxKC;
214 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTAuxKC;
215 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROAuxKC;
216 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROAuxKC;
217 SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRXAuxKC;
218 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRXAuxKC;
219 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTNLEAuxKC;
220
221 SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
222 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
223
224 #if XNU_MONITOR
225 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTEXTB;
226 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTEXT;
227
228 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTRAMPB;
229 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTRAMP;
230
231 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATACONSTB;
232 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATACONST;
233 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_start = NULL;
234 SECURITY_READ_ONLY_LATE(void *) pmap_stacks_end = NULL;
235 #endif
236
237 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
238 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
239
240 SECURITY_READ_ONLY_LATE(vm_offset_t) segTEXTEXECB;
241 SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
242
243 SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
244 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
245
246 #if XNU_MONITOR
247 SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATAB;
248 SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATA;
249 #endif
250
251 SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
252 SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
253 extern vm_offset_t intstack_low_guard;
254 extern vm_offset_t intstack_high_guard;
255 extern vm_offset_t excepstack_high_guard;
256
257 SECURITY_READ_ONLY_LATE(vm_offset_t) segLINKB;
258 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
259
260 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
261 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKLD;
262 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDDATAB;
263 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
264 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
265 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
266 SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
267 SECURITY_READ_ONLY_LATE(unsigned long) segSizeLASTDATACONST;
268
269 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBTEXTB;
270 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBTEXT;
271 SECURITY_READ_ONLY_LATE(vm_offset_t) segHIBDATAB;
272 SECURITY_READ_ONLY_LATE(unsigned long) segSizeHIBDATA;
273 SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBDATACONSTB;
274 SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBDATACONST;
275
276 SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
277 SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
278
279 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
280 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
281
282 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
283 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
284
285 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
286 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
287
288 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
289 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
290
291 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
292 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
293
294 SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
295 SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
296
297 /* Only set when booted from MH_FILESET primary kernel collection */
298 SECURITY_READ_ONLY_LATE(vm_offset_t) segKCTEXTEXECB;
299 SECURITY_READ_ONLY_LATE(unsigned long) segSizeKCTEXTEXEC;
300 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATACONSTB;
301 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
302 SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATAB;
303 SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
304
305 SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
306
307 SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
308
309 SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
310 SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
311 SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
312 SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
313
314 SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base, auxkc_right_above;
315
316 vm_offset_t alloc_ptpage(boolean_t map_static);
317 SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
318 extern int dtrace_keep_kernel_symbols(void);
319
320 /*
321 * Bootstrap the system enough to run with virtual memory.
322 * Map the kernel's code and data, and allocate the system page table.
323 * Page_size must already be set.
324 *
325 * Parameters:
326 * first_avail: first available physical page -
327 * after kernel page tables
328 * avail_start: PA of first physical page
329 * avail_end: PA of last physical page
330 */
331 SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
332 SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
333 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
334 SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
335 SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
336 SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
337 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
338 SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
339
340 #if __ARM_KERNEL_PROTECT__
341 extern void ExceptionVectorsBase;
342 extern void ExceptionVectorsEnd;
343 #endif /* __ARM_KERNEL_PROTECT__ */
344
345 typedef struct {
346 pmap_paddr_t pa;
347 vm_map_address_t va;
348 vm_size_t len;
349 } ptov_table_entry;
350
351 #define PTOV_TABLE_SIZE 8
352 SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
353 SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
354
355
356 vm_map_address_t
phystokv(pmap_paddr_t pa)357 phystokv(pmap_paddr_t pa)
358 {
359 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
360 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
361 return pa - ptov_table[i].pa + ptov_table[i].va;
362 }
363 }
364 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
365 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
366 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
367 }
368 return pa - gPhysBase + gVirtBase;
369 }
370
371 vm_map_address_t
phystokv_range(pmap_paddr_t pa,vm_size_t * max_len)372 phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
373 {
374 vm_size_t len;
375 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
376 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
377 len = ptov_table[i].len - (pa - ptov_table[i].pa);
378 if (*max_len > len) {
379 *max_len = len;
380 }
381 return pa - ptov_table[i].pa + ptov_table[i].va;
382 }
383 }
384 len = PAGE_SIZE - (pa & PAGE_MASK);
385 if (*max_len > len) {
386 *max_len = len;
387 }
388 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
389 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
390 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
391 }
392 return pa - gPhysBase + gVirtBase;
393 }
394
395 vm_offset_t
ml_static_vtop(vm_offset_t va)396 ml_static_vtop(vm_offset_t va)
397 {
398 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
399 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len))) {
400 return va - ptov_table[i].va + ptov_table[i].pa;
401 }
402 }
403 if (__improbable((va < gVirtBase) || (((vm_address_t)(va) - gVirtBase) >= gPhysSize))) {
404 panic("%s: illegal VA: %p; virt base 0x%llx, size 0x%llx", __func__,
405 (void*)va, (unsigned long long)gVirtBase, (unsigned long long)gPhysSize);
406 }
407 return (vm_address_t)(va) - gVirtBase + gPhysBase;
408 }
409
410 /*
411 * This rounds the given address up to the nearest boundary for a PTE contiguous
412 * hint.
413 */
414 static vm_offset_t
round_up_pte_hint_address(vm_offset_t address)415 round_up_pte_hint_address(vm_offset_t address)
416 {
417 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
418 return (address + (hint_size - 1)) & ~(hint_size - 1);
419 }
420
421 /* allocate a page for a page table: we support static and dynamic mappings.
422 *
423 * returns a virtual address for the allocated page
424 *
425 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
426 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
427 *
428 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
429 */
430
431 vm_offset_t
alloc_ptpage(boolean_t map_static)432 alloc_ptpage(boolean_t map_static)
433 {
434 vm_offset_t vaddr;
435
436 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
437 map_static = FALSE;
438 #endif
439
440 if (!ropage_next) {
441 ropage_next = (vm_offset_t)&ropagetable_begin;
442 }
443
444 if (map_static) {
445 assert(ropage_next < (vm_offset_t)&ropagetable_end);
446
447 vaddr = ropage_next;
448 ropage_next += ARM_PGBYTES;
449
450 return vaddr;
451 } else {
452 vaddr = phystokv(avail_start);
453 avail_start += ARM_PGBYTES;
454
455 return vaddr;
456 }
457 }
458
459 #if DEBUG
460
461 void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
462
463 void
dump_kva_l2(vm_offset_t tt_base,tt_entry_t * tt,int indent,uint64_t * rosz_out,uint64_t * rwsz_out)464 dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out)
465 {
466 unsigned int i;
467 boolean_t cur_ro, prev_ro = 0;
468 int start_entry = -1;
469 tt_entry_t cur, prev = 0;
470 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
471 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
472 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
473 kvtophys((vm_offset_t)tt) < roend;
474
475 for (i = 0; i < TTE_PGENTRIES; i++) {
476 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
477 cur = tt[i] & ARM_TTE_TABLE_MASK;
478
479 if (tt_static) {
480 /* addresses mapped by this entry are static if it is a block mapping,
481 * or the table was allocated from the RO page table region */
482 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
483 } else {
484 cur_ro = 0;
485 }
486
487 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
488 uintptr_t start, end, sz;
489
490 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
491 start += tt_base;
492 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
493 end += tt_base;
494
495 sz = end - start + 1;
496 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
497 indent * 4, "",
498 (uint32_t)(start >> 32), (uint32_t)start,
499 (uint32_t)(end >> 32), (uint32_t)end,
500 prev_ro ? "Static " : "Dynamic",
501 (sz >> 20));
502
503 if (prev_ro) {
504 *rosz_out += sz;
505 } else {
506 *rwsz_out += sz;
507 }
508 }
509
510 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
511 start_entry = i;
512 }
513
514 prev = cur;
515 prev_ro = cur_ro;
516 }
517 }
518
519 void
dump_kva_space()520 dump_kva_space()
521 {
522 uint64_t tot_rosz = 0, tot_rwsz = 0;
523 int ro_ptpages, rw_ptpages;
524 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
525 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
526 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
527 kvtophys((vm_offset_t)cpu_tte) < roend;
528 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
529
530 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
531
532 for (unsigned int i = 0; i < TTE_PGENTRIES; i++) {
533 pmap_paddr_t cur;
534 boolean_t cur_ro;
535 uintptr_t start, end;
536 uint64_t rosz = 0, rwsz = 0;
537
538 if ((cpu_tte[i] & ARM_TTE_VALID) == 0) {
539 continue;
540 }
541
542 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
543 start = (uint64_t)i << ARM_TT_L1_SHIFT;
544 start = start + kva_base;
545 end = start + (ARM_TT_L1_SIZE - 1);
546 cur_ro = cur >= robegin && cur < roend;
547
548 printf("0x%08x_%08x-0x%08x_%08x %s\n",
549 (uint32_t)(start >> 32), (uint32_t)start,
550 (uint32_t)(end >> 32), (uint32_t)end,
551 cur_ro ? "Static " : "Dynamic");
552
553 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
554 tot_rosz += rosz;
555 tot_rwsz += rwsz;
556 }
557
558 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
559 tot_rosz >> 20,
560 tot_rwsz >> 20,
561 (tot_rosz >> 20) + (tot_rwsz >> 20));
562
563 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
564 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
565 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
566 }
567
568 #endif /* DEBUG */
569
570 #if __ARM_KERNEL_PROTECT__ || XNU_MONITOR
571 /*
572 * arm_vm_map:
573 * root_ttp: The kernel virtual address for the root of the target page tables
574 * vaddr: The target virtual address
575 * pte: A page table entry value (may be ARM_PTE_EMPTY)
576 *
577 * This function installs pte at vaddr in root_ttp. Any page table pages needed
578 * to install pte will be allocated by this function.
579 */
580 static void
arm_vm_map(tt_entry_t * root_ttp,vm_offset_t vaddr,pt_entry_t pte)581 arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
582 {
583 vm_offset_t ptpage = 0;
584 tt_entry_t * ttp = root_ttp;
585
586 tt_entry_t * l1_ttep = NULL;
587 tt_entry_t l1_tte = 0;
588
589 tt_entry_t * l2_ttep = NULL;
590 tt_entry_t l2_tte = 0;
591 pt_entry_t * ptep = NULL;
592 pt_entry_t cpte = 0;
593
594 /*
595 * Walk the target page table to find the PTE for the given virtual
596 * address. Allocate any page table pages needed to do this.
597 */
598 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
599 l1_tte = *l1_ttep;
600
601 if (l1_tte == ARM_TTE_EMPTY) {
602 ptpage = alloc_ptpage(TRUE);
603 bzero((void *)ptpage, ARM_PGBYTES);
604 l1_tte = kvtophys(ptpage);
605 l1_tte &= ARM_TTE_TABLE_MASK;
606 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA);
607 *l1_ttep = l1_tte;
608 ptpage = 0;
609 }
610
611 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
612
613 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
614 l2_tte = *l2_ttep;
615
616 if (l2_tte == ARM_TTE_EMPTY) {
617 ptpage = alloc_ptpage(TRUE);
618 bzero((void *)ptpage, ARM_PGBYTES);
619 l2_tte = kvtophys(ptpage);
620 l2_tte &= ARM_TTE_TABLE_MASK;
621 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
622 *l2_ttep = l2_tte;
623 ptpage = 0;
624 }
625
626 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
627
628 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
629 cpte = *ptep;
630
631 /*
632 * If the existing PTE is not empty, then we are replacing a valid
633 * mapping.
634 */
635 if (cpte != ARM_PTE_EMPTY) {
636 panic("%s: cpte=%#llx is not empty, "
637 "vaddr=%#lx, pte=%#llx",
638 __FUNCTION__, cpte,
639 vaddr, pte);
640 }
641
642 *ptep = pte;
643 }
644
645 #endif // __ARM_KERNEL_PROTECT || XNU_MONITOR
646
647 #if __ARM_KERNEL_PROTECT__
648
649 /*
650 * arm_vm_kernel_el0_map:
651 * vaddr: The target virtual address
652 * pte: A page table entry value (may be ARM_PTE_EMPTY)
653 *
654 * This function installs pte at vaddr for the EL0 kernel mappings.
655 */
656 static void
arm_vm_kernel_el0_map(vm_offset_t vaddr,pt_entry_t pte)657 arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
658 {
659 /* Calculate where vaddr will be in the EL1 kernel page tables. */
660 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
661 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
662 }
663
664 /*
665 * arm_vm_kernel_el1_map:
666 * vaddr: The target virtual address
667 * pte: A page table entry value (may be ARM_PTE_EMPTY)
668 *
669 * This function installs pte at vaddr for the EL1 kernel mappings.
670 */
671 static void
arm_vm_kernel_el1_map(vm_offset_t vaddr,pt_entry_t pte)672 arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte)
673 {
674 arm_vm_map(cpu_tte, vaddr, pte);
675 }
676
677 /*
678 * arm_vm_kernel_pte:
679 * vaddr: The target virtual address
680 *
681 * This function returns the PTE value for the given vaddr from the kernel page
682 * tables. If the region has been been block mapped, we return what an
683 * equivalent PTE value would be (as regards permissions and flags). We also
684 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
685 */
686 static pt_entry_t
arm_vm_kernel_pte(vm_offset_t vaddr)687 arm_vm_kernel_pte(vm_offset_t vaddr)
688 {
689 tt_entry_t * ttp = cpu_tte;
690 tt_entry_t * ttep = NULL;
691 tt_entry_t tte = 0;
692 pt_entry_t * ptep = NULL;
693 pt_entry_t pte = 0;
694
695 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
696 tte = *ttep;
697
698 assert(tte & ARM_TTE_VALID);
699
700 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
701 /* This is a block mapping; return the equivalent PTE value. */
702 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
703 pte |= ARM_PTE_TYPE_VALID;
704 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
705 pte &= ~ARM_PTE_HINT_MASK;
706 return pte;
707 }
708
709 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
710 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
711 tte = *ttep;
712
713 assert(tte & ARM_TTE_VALID);
714
715 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
716 /* This is a block mapping; return the equivalent PTE value. */
717 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
718 pte |= ARM_PTE_TYPE_VALID;
719 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
720 pte &= ~ARM_PTE_HINT_MASK;
721 return pte;
722 }
723
724 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
725
726 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
727 pte = *ptep;
728 pte &= ~ARM_PTE_HINT_MASK;
729 return pte;
730 }
731
732 /*
733 * arm_vm_prepare_kernel_el0_mappings:
734 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
735 * mappings.
736 *
737 * This function expands the kernel page tables to support the EL0 kernel
738 * mappings, and conditionally installs the PTE values for the EL0 kernel
739 * mappings (if alloc_only is false).
740 */
741 static void
arm_vm_prepare_kernel_el0_mappings(bool alloc_only)742 arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
743 {
744 pt_entry_t pte = 0;
745 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
746 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
747 vm_offset_t cur = 0;
748 vm_offset_t cur_fixed = 0;
749
750 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
751 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
752 /*
753 * We map the exception vectors at a different address than that
754 * of the kernelcache to avoid sharing page table pages with the
755 * kernelcache (as this may cause issues with TLB caching of
756 * page table pages.
757 */
758 if (!alloc_only) {
759 pte = arm_vm_kernel_pte(cur);
760 }
761
762 arm_vm_kernel_el1_map(cur_fixed, pte);
763 arm_vm_kernel_el0_map(cur_fixed, pte);
764 }
765
766 __builtin_arm_dmb(DMB_ISH);
767 __builtin_arm_isb(ISB_SY);
768
769 if (!alloc_only) {
770 /*
771 * If we have created the alternate exception vector mappings,
772 * the boot CPU may now switch over to them.
773 */
774 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
775 __builtin_arm_isb(ISB_SY);
776 }
777 }
778
779 /*
780 * arm_vm_populate_kernel_el0_mappings:
781 *
782 * This function adds all required mappings to the EL0 kernel mappings.
783 */
784 static void
arm_vm_populate_kernel_el0_mappings(void)785 arm_vm_populate_kernel_el0_mappings(void)
786 {
787 arm_vm_prepare_kernel_el0_mappings(FALSE);
788 }
789
790 /*
791 * arm_vm_expand_kernel_el0_mappings:
792 *
793 * This function expands the kernel page tables to accomodate the EL0 kernel
794 * mappings.
795 */
796 static void
arm_vm_expand_kernel_el0_mappings(void)797 arm_vm_expand_kernel_el0_mappings(void)
798 {
799 arm_vm_prepare_kernel_el0_mappings(TRUE);
800 }
801 #endif /* __ARM_KERNEL_PROTECT__ */
802
803 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
804 extern void bootstrap_instructions;
805
806 /*
807 * arm_replace_identity_map takes the V=P map that we construct in start.s
808 * and repurposes it in order to have it map only the page we need in order
809 * to turn on the MMU. This prevents us from running into issues where
810 * KTRR will cause us to fault on executable block mappings that cross the
811 * KTRR boundary.
812 */
813 static void
arm_replace_identity_map(void)814 arm_replace_identity_map(void)
815 {
816 vm_offset_t addr;
817 pmap_paddr_t paddr;
818
819 pmap_paddr_t l1_ptp_phys = 0;
820 tt_entry_t *l1_ptp_virt = NULL;
821 tt_entry_t *tte1 = NULL;
822 pmap_paddr_t l2_ptp_phys = 0;
823 tt_entry_t *l2_ptp_virt = NULL;
824 tt_entry_t *tte2 = NULL;
825 pmap_paddr_t l3_ptp_phys = 0;
826 pt_entry_t *l3_ptp_virt = NULL;
827 pt_entry_t *ptep = NULL;
828
829 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
830 paddr = kvtophys(addr);
831
832 /*
833 * Grab references to the V=P page tables, and allocate an L3 page.
834 */
835 l1_ptp_phys = kvtophys((vm_offset_t)&bootstrap_pagetables);
836 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
837 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
838
839 l2_ptp_virt = L2_TABLE_VA(tte1);
840 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
841 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
842
843 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(TRUE);
844 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
845 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
846
847 /*
848 * Replace the large V=P mapping with a mapping that provides only the
849 * mappings needed to turn on the MMU.
850 */
851
852 bzero(l1_ptp_virt, ARM_PGBYTES);
853 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
854
855 bzero(l2_ptp_virt, ARM_PGBYTES);
856 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
857
858 *ptep = (paddr & ARM_PTE_MASK) |
859 ARM_PTE_TYPE_VALID |
860 ARM_PTE_SH(SH_OUTER_MEMORY) |
861 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
862 ARM_PTE_AF |
863 ARM_PTE_AP(AP_RONA) |
864 ARM_PTE_NX;
865 }
866 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
867
868 tt_entry_t *arm_kva_to_tte(vm_offset_t);
869
870 tt_entry_t *
arm_kva_to_tte(vm_offset_t va)871 arm_kva_to_tte(vm_offset_t va)
872 {
873 tt_entry_t *tte1, *tte2;
874 tte1 = cpu_tte + L1_TABLE_INDEX(va);
875 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
876
877 return tte2;
878 }
879
880 #if XNU_MONITOR
881
882 static inline pt_entry_t *
arm_kva_to_pte(vm_offset_t va)883 arm_kva_to_pte(vm_offset_t va)
884 {
885 tt_entry_t *tte2 = arm_kva_to_tte(va);
886 return L3_TABLE_VA(tte2) + L3_TABLE_INDEX(va);
887 }
888
889 #endif
890
891 #define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
892 #define ARM64_GRANULE_ALLOW_HINT (1 << 1)
893
894 /**
895 * Updates a translation table entry (TTE) with the supplied value, unless doing so might render
896 * the pagetable region read-only before subsequent updates have finished. In that case, the TTE
897 * value will be saved off for deferred processing.
898 *
899 * @param ttep address of the TTE to update
900 * @param entry the value to store in ttep
901 * @param pa the base physical address mapped by the TTE
902 * @param ttebase L3-page- or L2-block-aligned base virtual address of the pagetable region
903 * @param granule mask indicating whether L2 block or L3 hint mappings are allowed for this segment
904 * @param deferred_ttep_pair 2-element array of addresses of deferred TTEs
905 * @param deferred_tte_pair 2-element array containing TTE values for deferred assignment to
906 * corresponding elements of deferred_ttep_pair
907 */
908 static void
update_or_defer_tte(tt_entry_t * ttep,tt_entry_t entry,pmap_paddr_t pa,vm_map_address_t ttebase,unsigned granule __unused,tt_entry_t ** deferred_ttep_pair,tt_entry_t * deferred_tte_pair)909 update_or_defer_tte(tt_entry_t *ttep, tt_entry_t entry, pmap_paddr_t pa, vm_map_address_t ttebase,
910 unsigned granule __unused, tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
911 {
912 /*
913 * If we're trying to assign an entry that maps the current TTE region (identified by ttebase),
914 * and the pagetable is already live (indicated by kva_active), defer assignment of the current
915 * entry and possibly the entry after it until all other mappings in the segment have been
916 * updated. Otherwise we may end up immediately marking the pagetable region read-only
917 * leading to a fault later on a later assignment if we manage to outrun the TLB. This can
918 * happen on KTRR/CTRR-enabled devices when marking segDATACONST read-only, as the pagetables
919 * that map that segment must come from the segment itself. We therefore store the initial
920 * recursive TTE in deferred_ttep_pair[0] and its value in deferred_tte_pair[0]. We may also
921 * defer assignment of the TTE following that recursive TTE and store its value in
922 * deferred_tte_pair[1], because the TTE region following the current one may also contain
923 * pagetables and we must avoid marking that region read-only before updating those tables.
924 *
925 * We require that such recursive mappings must exist in regions that can be mapped with L2
926 * block entries if they are sufficiently large. This is what allows us to assume that no
927 * more than 2 deferred TTEs will be required, because:
928 * --If more than 2 adjacent L3 PTEs were required to map our pagetables, that would mean
929 * we would have at least one full L3 pagetable page and would instead use an L2 block.
930 * --If more than 2 adjacent L2 blocks were required to map our pagetables, that would
931 * mean we would have at least one full L2-block-sized region of TTEs and something
932 * is very wrong because no segment should be that large.
933 */
934 if ((deferred_ttep_pair != NULL) && (deferred_ttep_pair[0] != NULL) && (ttep == (deferred_ttep_pair[0] + 1))) {
935 assert(deferred_tte_pair[1] == 0);
936 deferred_ttep_pair[1] = ttep;
937 deferred_tte_pair[1] = entry;
938 } else if (kva_active && (phystokv(pa) == ttebase)) {
939 assert(deferred_ttep_pair != NULL);
940 assert(granule & ARM64_GRANULE_ALLOW_BLOCK);
941 if (deferred_ttep_pair[0] == NULL) {
942 deferred_ttep_pair[0] = ttep;
943 deferred_tte_pair[0] = entry;
944 } else {
945 assert(deferred_ttep_pair[1] == NULL);
946 deferred_ttep_pair[1] = ttep;
947 deferred_tte_pair[1] = entry;
948 }
949 } else {
950 *ttep = entry;
951 }
952 }
953
954
955 /*
956 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
957 * neccessary) allocate a page for the L3 table and update the corresponding L2
958 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
959 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
960 * not be invoked from a context that does not do L2 iteration separately (basically,
961 * don't call this except from arm_vm_page_granular_prot).
962 *
963 * unsigned granule: 0 => force to page granule, or a combination of
964 * ARM64_GRANULE_* flags declared above.
965 */
966
967 static void
arm_vm_page_granular_helper(vm_offset_t start,vm_offset_t _end,vm_offset_t va,pmap_paddr_t pa_offset,int pte_prot_APX,int pte_prot_XN,unsigned granule,tt_entry_t ** deferred_ttep_pair,tt_entry_t * deferred_tte_pair)968 arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
969 int pte_prot_APX, int pte_prot_XN, unsigned granule,
970 tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
971 {
972 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
973 tt_entry_t *tte2;
974 tt_entry_t tmplate;
975 pmap_paddr_t pa;
976 pt_entry_t *ppte, ptmp;
977 addr64_t ppte_phys;
978 unsigned i;
979
980 va &= ~ARM_TT_L2_OFFMASK;
981 pa = va - gVirtBase + gPhysBase - pa_offset;
982
983 if (pa >= real_avail_end) {
984 return;
985 }
986
987 tte2 = arm_kva_to_tte(va);
988
989 assert(_end >= va);
990 tmplate = *tte2;
991
992 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
993 /* pick up the existing page table. */
994 ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK));
995 } else {
996 // TTE must be reincarnated with page level mappings.
997
998 // ... but we don't want to break up blocks on live
999 // translation tables.
1000 assert(!kva_active);
1001
1002 ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0);
1003 bzero(ppte, ARM_PGBYTES);
1004 ppte_phys = kvtophys((vm_offset_t)ppte);
1005
1006 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
1007 }
1008
1009 vm_offset_t len = _end - va;
1010 if ((pa + len) > real_avail_end) {
1011 _end -= (pa + len - real_avail_end);
1012 }
1013 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
1014
1015 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
1016 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
1017 * a ragged non-PAGE_SIZE-aligned edge. */
1018 vm_offset_t rounded_end = round_page(_end);
1019 /* Apply the desired protections to the specified page range */
1020 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK >> ARM_TT_L3_SHIFT); i++) {
1021 if ((start <= va) && (va < rounded_end)) {
1022 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
1023 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
1024 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
1025 ptmp = ptmp | ARM_PTE_NX;
1026 #if __ARM_KERNEL_PROTECT__
1027 ptmp = ptmp | ARM_PTE_NG;
1028 #endif /* __ARM_KERNEL_PROTECT__ */
1029
1030 if (pte_prot_XN) {
1031 ptmp = ptmp | ARM_PTE_PNX;
1032 }
1033
1034 /*
1035 * If we can, apply the contiguous hint to this range. The hint is
1036 * applicable if the current address falls within a hint-sized range that will
1037 * be fully covered by this mapping request.
1038 */
1039 if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) &&
1040 (granule & ARM64_GRANULE_ALLOW_HINT) && use_contiguous_hint) {
1041 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
1042 ptmp |= ARM_PTE_HINT;
1043 /* Do not attempt to reapply the hint bit to an already-active mapping.
1044 * This very likely means we're attempting to change attributes on an already-active mapping,
1045 * which violates the requirement of the hint bit.*/
1046 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT));
1047 }
1048 /*
1049 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
1050 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
1051 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
1052 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
1053 * could produce a later TLB conflict.
1054 */
1055 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
1056
1057 update_or_defer_tte(&ppte[i], ptmp, pa, (vm_map_address_t)ppte, granule, deferred_ttep_pair, deferred_tte_pair);
1058 }
1059
1060 va += ARM_PGBYTES;
1061 pa += ARM_PGBYTES;
1062 }
1063 }
1064 }
1065
1066 /*
1067 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
1068 * changing them. If a particular chunk necessitates L3 entries (for reasons of
1069 * alignment or length, or an explicit request that the entry be fully expanded), we
1070 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
1071 */
1072 static void
arm_vm_page_granular_prot(vm_offset_t start,unsigned long size,pmap_paddr_t pa_offset,int tte_prot_XN,int pte_prot_APX,int pte_prot_XN,unsigned granule)1073 arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
1074 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
1075 unsigned granule)
1076 {
1077 tt_entry_t *deferred_ttep_pair[2] = {NULL};
1078 tt_entry_t deferred_tte_pair[2] = {0};
1079 vm_offset_t _end = start + size;
1080 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
1081
1082 if (size == 0x0UL) {
1083 return;
1084 }
1085
1086 if (align_start > _end) {
1087 align_start = _end;
1088 }
1089
1090 arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1091
1092 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
1093 if (!(granule & ARM64_GRANULE_ALLOW_BLOCK)) {
1094 arm_vm_page_granular_helper(align_start, align_start + ARM_TT_L2_SIZE, align_start + 1, pa_offset,
1095 pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1096 } else {
1097 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
1098 assert((pa & ARM_TT_L2_OFFMASK) == 0);
1099 tt_entry_t *tte2;
1100 tt_entry_t tmplate;
1101
1102 tte2 = arm_kva_to_tte(align_start);
1103
1104 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
1105 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
1106 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
1107 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
1108 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
1109
1110 #if __ARM_KERNEL_PROTECT__
1111 tmplate = tmplate | ARM_TTE_BLOCK_NG;
1112 #endif /* __ARM_KERNEL_PROTECT__ */
1113 if (tte_prot_XN) {
1114 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
1115 }
1116
1117 update_or_defer_tte(tte2, tmplate, pa, (vm_map_address_t)tte2 & ~ARM_TT_L2_OFFMASK,
1118 granule, deferred_ttep_pair, deferred_tte_pair);
1119 }
1120 }
1121 align_start += ARM_TT_L2_SIZE;
1122 }
1123
1124 if (align_start < _end) {
1125 arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1126 }
1127
1128 if (deferred_ttep_pair[0] != NULL) {
1129 #if DEBUG || DEVELOPMENT
1130 /*
1131 * Flush the TLB to catch bugs that might cause us to prematurely revoke write access from the pagetable page.
1132 * These bugs may otherwise be hidden by TLB entries in most cases, resulting in very rare panics.
1133 * Note that we always flush the TLB at the end of arm_vm_prot_finalize().
1134 */
1135 flush_mmu_tlb();
1136 #endif
1137 /*
1138 * The first TTE in the pair is a recursive mapping of the pagetable region, so we must update it last
1139 * to avoid potentially marking deferred_pte_pair[1] read-only.
1140 */
1141 if (deferred_tte_pair[1] != 0) {
1142 os_atomic_store(deferred_ttep_pair[1], deferred_tte_pair[1], release);
1143 }
1144 os_atomic_store(deferred_ttep_pair[0], deferred_tte_pair[0], release);
1145 }
1146 }
1147
1148 static inline void
arm_vm_page_granular_RNX(vm_offset_t start,unsigned long size,unsigned granule)1149 arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, unsigned granule)
1150 {
1151 arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, granule);
1152 }
1153
1154 static inline void
arm_vm_page_granular_ROX(vm_offset_t start,unsigned long size,unsigned granule)1155 arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, unsigned granule)
1156 {
1157 arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, granule);
1158 }
1159
1160 static inline void
arm_vm_page_granular_RWNX(vm_offset_t start,unsigned long size,unsigned granule)1161 arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, unsigned granule)
1162 {
1163 arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, granule);
1164 }
1165
1166 /* used in the chosen/memory-map node, populated by iBoot. */
1167 typedef struct MemoryMapFileInfo {
1168 vm_offset_t paddr;
1169 size_t length;
1170 } MemoryMapFileInfo;
1171
1172 // Populate seg...AuxKC and fixup AuxKC permissions
1173 static bool
arm_vm_auxkc_init(void)1174 arm_vm_auxkc_init(void)
1175 {
1176 if (auxkc_mh == 0 || auxkc_base == 0) {
1177 return false; // no auxKC.
1178 }
1179
1180 /* Fixup AuxKC and populate seg*AuxKC globals used below */
1181 arm_auxkc_init((void*)auxkc_mh, (void*)auxkc_base);
1182
1183 if (segLOWESTAuxKC != segLOWEST) {
1184 panic("segLOWESTAuxKC (%p) not equal to segLOWEST (%p). auxkc_mh: %p, auxkc_base: %p",
1185 (void*)segLOWESTAuxKC, (void*)segLOWEST,
1186 (void*)auxkc_mh, (void*)auxkc_base);
1187 }
1188
1189 /*
1190 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1191 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1192 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1193 * its RO region before extending it.
1194 */
1195 assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1196 assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1197 if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1198 segHIGHESTROAuxKC = segHIGHESTAuxKC;
1199 }
1200
1201 /*
1202 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1203 * by CTRR, and the AuxKC RX region must be within the RO region.
1204 */
1205 assert(segHIGHESTROAuxKC == auxkc_right_above);
1206 assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1207 assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1208 assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1209 assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1210
1211 if (segHIGHESTRXAuxKC < segLOWEST) {
1212 arm_vm_page_granular_RNX(segHIGHESTRXAuxKC, segLOWEST - segHIGHESTRXAuxKC, 0);
1213 }
1214 if (segLOWESTRXAuxKC < segHIGHESTRXAuxKC) {
1215 arm_vm_page_granular_ROX(segLOWESTRXAuxKC, segHIGHESTRXAuxKC - segLOWESTRXAuxKC, 0); // Refined in OSKext::readPrelinkedExtensions
1216 }
1217 if (segLOWESTROAuxKC < segLOWESTRXAuxKC) {
1218 arm_vm_page_granular_RNX(segLOWESTROAuxKC, segLOWESTRXAuxKC - segLOWESTROAuxKC, 0);
1219 }
1220 if (segLOWESTAuxKC < segLOWESTROAuxKC) {
1221 arm_vm_page_granular_RWNX(segLOWESTAuxKC, segLOWESTROAuxKC - segLOWESTAuxKC, 0);
1222 }
1223
1224 return true;
1225 }
1226
1227 void
arm_vm_prot_init(__unused boot_args * args)1228 arm_vm_prot_init(__unused boot_args * args)
1229 {
1230 segLOWESTTEXT = UINT64_MAX;
1231 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1232 segLOWESTTEXT = segPRELINKTEXTB;
1233 }
1234 assert(segSizeTEXT);
1235 if (segTEXTB < segLOWESTTEXT) {
1236 segLOWESTTEXT = segTEXTB;
1237 }
1238 assert(segLOWESTTEXT < UINT64_MAX);
1239
1240 segEXTRADATA = segLOWESTTEXT;
1241 segSizeEXTRADATA = 0;
1242
1243 segLOWEST = segLOWESTTEXT;
1244 segLOWESTRO = segLOWESTTEXT;
1245
1246 if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1247 /*
1248 * kernel collections have segments below the kernel. In particular the collection mach header
1249 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1250 */
1251 arm_vm_page_granular_RNX(segLOWESTKC, segLOWEST - segLOWESTKC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1252 segLOWEST = segLOWESTKC;
1253 if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1254 segLOWESTRO = segLOWESTROKC;
1255 }
1256 if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1257 segHIGHESTRO = segHIGHESTROKC;
1258 }
1259 }
1260
1261 DTEntry memory_map;
1262 MemoryMapFileInfo const *trustCacheRange;
1263 unsigned int trustCacheRangeSize;
1264 int err;
1265
1266 if (SecureDTIsLockedDown()) {
1267 segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1268 segSizeEXTRADATA = PE_state.deviceTreeSize;
1269 }
1270
1271 err = SecureDTLookupEntry(NULL, "chosen/memory-map", &memory_map);
1272 assert(err == kSuccess);
1273
1274 err = SecureDTGetProperty(memory_map, "TrustCache", (void const **)&trustCacheRange, &trustCacheRangeSize);
1275 if (err == kSuccess) {
1276 assert(trustCacheRangeSize == sizeof(MemoryMapFileInfo));
1277
1278 if (segSizeEXTRADATA == 0) {
1279 segEXTRADATA = phystokv(trustCacheRange->paddr);
1280 segSizeEXTRADATA = trustCacheRange->length;
1281 } else {
1282 segSizeEXTRADATA += trustCacheRange->length;
1283 }
1284 }
1285
1286 if (segSizeEXTRADATA != 0) {
1287 if (segEXTRADATA <= segLOWEST) {
1288 segLOWEST = segEXTRADATA;
1289 if (segEXTRADATA <= segLOWESTRO) {
1290 segLOWESTRO = segEXTRADATA;
1291 }
1292 }
1293 #if !(DEBUG || DEVELOPMENT)
1294
1295
1296 else {
1297 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1298 }
1299 #endif /* !(DEBUG || DEVELOPMENT) */
1300
1301 arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1302 }
1303
1304 const MemoryMapFileInfo *auxKC_range, *auxKC_header_range;
1305 unsigned int auxKC_range_size, auxKC_header_range_size;
1306
1307 err = SecureDTGetProperty(memory_map, "AuxKC", (const void**)&auxKC_range,
1308 &auxKC_range_size);
1309 if (err != kSuccess) {
1310 goto noAuxKC;
1311 }
1312 assert(auxKC_range_size == sizeof(MemoryMapFileInfo));
1313 err = SecureDTGetProperty(memory_map, "AuxKC-mach_header",
1314 (const void**)&auxKC_header_range, &auxKC_header_range_size);
1315 if (err != kSuccess) {
1316 goto noAuxKC;
1317 }
1318 assert(auxKC_header_range_size == sizeof(MemoryMapFileInfo));
1319
1320 if (auxKC_header_range->paddr == 0 || auxKC_range->paddr == 0) {
1321 goto noAuxKC;
1322 }
1323
1324 auxkc_mh = phystokv(auxKC_header_range->paddr);
1325 auxkc_base = phystokv(auxKC_range->paddr);
1326
1327 if (auxkc_base < segLOWEST) {
1328 auxkc_right_above = segLOWEST;
1329 segLOWEST = auxkc_base;
1330 } else {
1331 panic("auxkc_base (%p) not below segLOWEST (%p)", (void*)auxkc_base, (void*)segLOWEST);
1332 }
1333
1334 /* Map AuxKC RWNX initially so that arm_vm_auxkc_init can traverse
1335 * it and apply fixups (after we're off the bootstrap translation
1336 * tables).
1337 */
1338 arm_vm_page_granular_RWNX(auxkc_base, auxKC_range->length, 0);
1339
1340 noAuxKC:
1341 /* Map coalesced kext TEXT segment RWNX for now */
1342 arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1343
1344 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1345 arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1346
1347 /* Map coalesced kext TEXT_EXEC segment RX (could be empty) */
1348 arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Refined in OSKext::readPrelinkedExtensions
1349
1350 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
1351 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
1352 */
1353 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
1354 if (segSizePRELINKTEXT) {
1355 arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT),
1356 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1357 }
1358 } else {
1359 /*
1360 * If we have the new segments, we should still protect the gap between kext
1361 * read-only pages and kernel read-only pages, in the event that this gap
1362 * exists.
1363 */
1364 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1365 arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST),
1366 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1367 }
1368 }
1369
1370 /*
1371 * Protection on kernel text is loose here to allow shenanigans early on. These
1372 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1373 * we currently patch LowResetVectorBase in cpu.c.
1374 *
1375 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1376 */
1377 arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1378
1379 /* Can DATACONST start out and stay RNX?
1380 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1381 * Make RNX in prot_finalize
1382 */
1383 arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1384
1385 arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1386
1387 #if XNU_MONITOR
1388 arm_vm_page_granular_ROX(segPPLTEXTB, segSizePPLTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1389 arm_vm_page_granular_ROX(segPPLTRAMPB, segSizePPLTRAMP, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1390 arm_vm_page_granular_RNX(segPPLDATACONSTB, segSizePPLDATACONST, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1391 #endif
1392
1393 /* DATA segment will remain RWNX */
1394 arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1395 #if XNU_MONITOR
1396 arm_vm_page_granular_RWNX(segPPLDATAB, segSizePPLDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1397 #endif
1398
1399 arm_vm_page_granular_RWNX(segHIBDATAB, segSizeHIBDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1400
1401 arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, 0);
1402 arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, 0);
1403 arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, 0);
1404 arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, 0);
1405
1406 arm_vm_page_granular_ROX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1407 arm_vm_page_granular_RNX(segKLDDATAB, segSizeKLDDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1408 arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1409 arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
1410 arm_vm_page_granular_ROX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
1411 if (segLASTDATACONSTB) {
1412 arm_vm_page_granular_RWNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // __LASTDATA_CONST may be empty, but we cannot assume this
1413 }
1414 arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Prelink __DATA for kexts (RW data)
1415
1416 if (segSizePLKLLVMCOV > 0) {
1417 arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // LLVM code coverage data
1418 }
1419 arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* PreLinkInfoDictionary */
1420
1421 /* Record the bounds of the kernelcache. */
1422 vm_kernelcache_base = segLOWEST;
1423 vm_kernelcache_top = end_kern;
1424 }
1425
1426 /*
1427 * return < 0 for a < b
1428 * 0 for a == b
1429 * > 0 for a > b
1430 */
1431 typedef int (*cmpfunc_t)(const void *a, const void *b);
1432
1433 extern void
1434 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1435
1436 static int
cmp_ptov_entries(const void * a,const void * b)1437 cmp_ptov_entries(const void *a, const void *b)
1438 {
1439 const ptov_table_entry *entry_a = a;
1440 const ptov_table_entry *entry_b = b;
1441 // Sort in descending order of segment length
1442 if (entry_a->len < entry_b->len) {
1443 return 1;
1444 } else if (entry_a->len > entry_b->len) {
1445 return -1;
1446 } else {
1447 return 0;
1448 }
1449 }
1450
1451 SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1452
1453 #define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1454 #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1455
1456 static void
arm_vm_physmap_slide(ptov_table_entry * temp_ptov_table,vm_map_address_t orig_va,vm_size_t len,int pte_prot_APX,unsigned granule)1457 arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, unsigned granule)
1458 {
1459 pmap_paddr_t pa_offset;
1460
1461 if (__improbable(ptov_index >= PTOV_TABLE_SIZE)) {
1462 panic("%s: PTOV table limit exceeded; segment va = 0x%llx, size = 0x%llx", __func__,
1463 (unsigned long long)orig_va, (unsigned long long)len);
1464 }
1465 assert((orig_va & ARM_PGMASK) == 0);
1466 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1467 if (ptov_index == 0) {
1468 temp_ptov_table[ptov_index].va = physmap_base;
1469 } else {
1470 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1471 }
1472 if (granule & ARM64_GRANULE_ALLOW_BLOCK) {
1473 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1474 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1475 if (new_offset < orig_offset) {
1476 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1477 } else if (new_offset > orig_offset) {
1478 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1479 }
1480 }
1481 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1482 temp_ptov_table[ptov_index].len = round_page(len);
1483 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1484 arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, granule);
1485 ++ptov_index;
1486 }
1487
1488 #if XNU_MONITOR
1489
1490 SECURITY_READ_ONLY_LATE(static boolean_t) keep_linkedit = FALSE;
1491
1492 static void
arm_vm_physmap_init(boot_args * args)1493 arm_vm_physmap_init(boot_args *args)
1494 {
1495 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1496 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1497
1498 // This is memory that will either be handed back to the VM layer via ml_static_mfree(),
1499 // or will be available for general-purpose use. Physical aperture mappings for this memory
1500 // must be at page granularity, so that PPL ownership or cache attribute changes can be reflected
1501 // in the physical aperture mappings.
1502
1503 // Slid region between gPhysBase and beginning of protected text
1504 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
1505
1506 // kext bootstrap segments
1507 #if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1508 /* __KLD,__text is covered by the rorgn */
1509 arm_vm_physmap_slide(temp_ptov_table, segKLDB, segSizeKLD, AP_RONA, 0);
1510 #endif
1511 arm_vm_physmap_slide(temp_ptov_table, segKLDDATAB, segSizeKLDDATA, AP_RONA, 0);
1512
1513 // Early-boot data
1514 arm_vm_physmap_slide(temp_ptov_table, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
1515
1516 PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1517 #if CONFIG_DTRACE
1518 if (dtrace_keep_kernel_symbols()) {
1519 keep_linkedit = TRUE;
1520 }
1521 #endif /* CONFIG_DTRACE */
1522 #if KASAN_DYNAMIC_BLACKLIST
1523 /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
1524 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1525 keep_linkedit = TRUE;
1526 #endif
1527 if (!keep_linkedit) {
1528 // Kernel LINKEDIT
1529 arm_vm_physmap_slide(temp_ptov_table, segLINKB, segSizeLINK, AP_RWNA, 0);
1530
1531 if (segSizePLKLINKEDIT) {
1532 // Prelinked kernel LINKEDIT
1533 arm_vm_physmap_slide(temp_ptov_table, segPLKLINKEDITB, segSizePLKLINKEDIT, AP_RWNA, 0);
1534 }
1535 }
1536
1537 // Prelinked kernel plists
1538 arm_vm_physmap_slide(temp_ptov_table, segPRELINKINFOB, segSizePRELINKINFO, AP_RWNA, 0);
1539
1540 // Device tree (if not locked down), ramdisk, boot args
1541 arm_vm_physmap_slide(temp_ptov_table, end_kern, (args->topOfKernelData - gPhysBase + gVirtBase) - end_kern, AP_RWNA, 0);
1542 if (!SecureDTIsLockedDown()) {
1543 PE_slide_devicetree(temp_ptov_table[ptov_index - 1].va - end_kern);
1544 }
1545
1546 // Remainder of physical memory
1547 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1548 real_avail_end - args->topOfKernelData, AP_RWNA, 0);
1549
1550 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1551
1552 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1553 // segments should be placed earliest in the table to optimize lookup performance.
1554 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1555
1556 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1557 }
1558
1559 #else
1560
1561 static void
arm_vm_physmap_init(boot_args * args)1562 arm_vm_physmap_init(boot_args *args)
1563 {
1564 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1565 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1566
1567 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1568 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA,
1569 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1570
1571 arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern,
1572 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* Device Tree (if not locked down), RAM Disk (if present), bootArgs */
1573
1574 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1575 real_avail_end - args->topOfKernelData, AP_RWNA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // rest of physmem
1576
1577 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1578
1579 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1580 // segments should be placed earliest in the table to optimize lookup performance.
1581 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1582
1583 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1584 }
1585
1586 #endif // XNU_MONITOR
1587
1588 void
arm_vm_prot_finalize(boot_args * args __unused)1589 arm_vm_prot_finalize(boot_args * args __unused)
1590 {
1591 /*
1592 * At this point, we are far enough along in the boot process that it will be
1593 * safe to free up all of the memory preceeding the kernel. It may in fact
1594 * be safe to do this earlier.
1595 *
1596 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1597 * as usable.
1598 */
1599
1600 /*
1601 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1602 * otherwise we're dealing with a new style kernel cache, so we should just free the
1603 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1604 * should be immediately followed by XNU's TEXT segment
1605 */
1606
1607 ml_static_mfree(phystokv(gPhysBase), segLOWEST - gVirtBase);
1608
1609 /*
1610 * KTRR support means we will be mucking with these pages and trying to
1611 * protect them; we cannot free the pages to the VM if we do this.
1612 */
1613 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1614 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1615 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1616 }
1617
1618 /* tighten permissions on kext read only data and code */
1619 arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK);
1620 arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1621
1622 cpu_stack_alloc(&BootCpuData);
1623 arm64_replace_bootstack(&BootCpuData);
1624 ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1625
1626 #if __ARM_KERNEL_PROTECT__
1627 arm_vm_populate_kernel_el0_mappings();
1628 #endif /* __ARM_KERNEL_PROTECT__ */
1629
1630 #if XNU_MONITOR
1631 #if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1632 /* __KLD,__text is covered by the rorgn */
1633 for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
1634 pt_entry_t *pte = arm_kva_to_pte(va);
1635 *pte = ARM_PTE_EMPTY;
1636 }
1637 #endif
1638 for (vm_offset_t va = segKLDDATAB; va < (segKLDDATAB + segSizeKLDDATA); va += ARM_PGBYTES) {
1639 pt_entry_t *pte = arm_kva_to_pte(va);
1640 *pte = ARM_PTE_EMPTY;
1641 }
1642 /* Clear the original stack mappings; these pages should be mapped through ptov_table. */
1643 for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
1644 pt_entry_t *pte = arm_kva_to_pte(va);
1645 *pte = ARM_PTE_EMPTY;
1646 }
1647 /* Clear the original PRELINKINFO mapping. This segment should be jettisoned during I/O Kit
1648 * initialization before we reach this point. */
1649 for (vm_offset_t va = segPRELINKINFOB; va < (segPRELINKINFOB + segSizePRELINKINFO); va += ARM_PGBYTES) {
1650 pt_entry_t *pte = arm_kva_to_pte(va);
1651 *pte = ARM_PTE_EMPTY;
1652 }
1653 if (!keep_linkedit) {
1654 for (vm_offset_t va = segLINKB; va < (segLINKB + segSizeLINK); va += ARM_PGBYTES) {
1655 pt_entry_t *pte = arm_kva_to_pte(va);
1656 *pte = ARM_PTE_EMPTY;
1657 }
1658 if (segSizePLKLINKEDIT) {
1659 for (vm_offset_t va = segPLKLINKEDITB; va < (segPLKLINKEDITB + segSizePLKLINKEDIT); va += ARM_PGBYTES) {
1660 pt_entry_t *pte = arm_kva_to_pte(va);
1661 *pte = ARM_PTE_EMPTY;
1662 }
1663 }
1664 }
1665 #endif /* XNU_MONITOR */
1666
1667 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1668 /*
1669 * __LAST,__pinst should no longer be executable.
1670 */
1671 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK);
1672
1673 /* __LASTDATA_CONST should no longer be writable. */
1674 if (segLASTDATACONSTB) {
1675 arm_vm_page_granular_RNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1676 }
1677
1678 /*
1679 * __KLD,__text should no longer be executable.
1680 */
1681 arm_vm_page_granular_RNX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK);
1682
1683 /*
1684 * Must wait until all other region permissions are set before locking down DATA_CONST
1685 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1686 * and will become immutable.
1687 */
1688 #endif
1689
1690 arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1691
1692 __builtin_arm_dsb(DSB_ISH);
1693 flush_mmu_tlb();
1694 }
1695
1696 /*
1697 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1698 * address accesses. It can be enabled separately for TTBR0 (user) and
1699 * TTBR1 (kernel).
1700 */
1701 void
arm_set_kernel_tbi(void)1702 arm_set_kernel_tbi(void)
1703 {
1704 #if !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI
1705 uint64_t old_tcr, new_tcr;
1706
1707 old_tcr = new_tcr = get_tcr();
1708 /*
1709 * For kernel configurations that require TBI support on
1710 * PAC systems, we enable DATA TBI only.
1711 */
1712 new_tcr |= TCR_TBI1_TOPBYTE_IGNORED;
1713 new_tcr |= TCR_TBID1_ENABLE;
1714
1715 if (old_tcr != new_tcr) {
1716 set_tcr(new_tcr);
1717 sysreg_restore.tcr_el1 = new_tcr;
1718 }
1719 #endif /* !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI */
1720 }
1721
1722 static void
arm_set_user_tbi(void)1723 arm_set_user_tbi(void)
1724 {
1725 #if !__ARM_KERNEL_PROTECT__
1726 uint64_t old_tcr, new_tcr;
1727
1728 old_tcr = new_tcr = get_tcr();
1729 new_tcr |= TCR_TBI0_TOPBYTE_IGNORED;
1730
1731 if (old_tcr != new_tcr) {
1732 set_tcr(new_tcr);
1733 sysreg_restore.tcr_el1 = new_tcr;
1734 }
1735 #endif /* !__ARM_KERNEL_PROTECT__ */
1736 }
1737
1738 /*
1739 * Initialize and enter blank (invalid) page tables in a L1 translation table for a given VA range.
1740 *
1741 * This is a helper function used to build up the initial page tables for the kernel translation table.
1742 * With KERNEL_INTEGRITY we keep at least the root level of the kernel page table immutable, thus the need
1743 * to preallocate before machine_lockdown any L1 entries necessary during the entire kernel runtime.
1744 *
1745 * For a given VA range, if necessary, allocate new L2 translation tables and install the table entries in
1746 * the appropriate L1 table indexes. called before the translation table is active
1747 *
1748 * parameters:
1749 *
1750 * tt: virtual address of L1 translation table to modify
1751 * start: beginning of VA range
1752 * end: end of VA range
1753 * static_map: whether to allocate the new translation table page from read only memory
1754 * table_attrs: attributes of new table entry in addition to VALID and TYPE_TABLE attributes
1755 *
1756 */
1757
1758 static void
init_ptpages(tt_entry_t * tt,vm_map_address_t start,vm_map_address_t end,bool static_map,uint64_t table_attrs)1759 init_ptpages(tt_entry_t *tt, vm_map_address_t start, vm_map_address_t end, bool static_map, uint64_t table_attrs)
1760 {
1761 tt_entry_t *l1_tte;
1762 vm_offset_t ptpage_vaddr;
1763
1764 l1_tte = tt + ((start & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1765
1766 while (start < end) {
1767 if (*l1_tte == ARM_TTE_EMPTY) {
1768 /* Allocate a page and setup L1 Table TTE in L1 */
1769 ptpage_vaddr = alloc_ptpage(static_map);
1770 *l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | table_attrs;
1771 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
1772 }
1773
1774 if ((start + ARM_TT_L1_SIZE) < start) {
1775 /* If this is the last L1 entry, it must cover the last mapping. */
1776 break;
1777 }
1778
1779 start += ARM_TT_L1_SIZE;
1780 l1_tte++;
1781 }
1782 }
1783
1784 #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1785 #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1786
1787 void
arm_vm_init(uint64_t memory_size,boot_args * args)1788 arm_vm_init(uint64_t memory_size, boot_args * args)
1789 {
1790 vm_map_address_t va_l1, va_l1_end;
1791 tt_entry_t *cpu_l1_tte;
1792 vm_map_address_t va_l2, va_l2_end;
1793 tt_entry_t *cpu_l2_tte;
1794 pmap_paddr_t boot_ttep;
1795 tt_entry_t *boot_tte;
1796 uint64_t mem_segments;
1797 vm_offset_t ptpage_vaddr;
1798 vm_map_address_t dynamic_memory_begin;
1799
1800 /*
1801 * Get the virtual and physical kernel-managed memory base from boot_args.
1802 */
1803 gVirtBase = args->virtBase;
1804 gPhysBase = args->physBase;
1805 #if KASAN
1806 real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1807 #else
1808 real_phys_size = args->memSize;
1809 #endif
1810 /*
1811 * Ensure the physical region we specify for the VM to manage ends on a
1812 * software page boundary. Note that the software page size (PAGE_SIZE)
1813 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1814 * We must round the reported memory size down to the nearest PAGE_SIZE
1815 * boundary to ensure the VM does not try to manage a page it does not
1816 * completely own. The KASAN shadow region, if present, is managed entirely
1817 * in units of the hardware page size and should not need similar treatment.
1818 */
1819 gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1820
1821 mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1822
1823 if ((memory_size != 0) && (mem_size > memory_size)) {
1824 mem_size = memory_size;
1825 max_mem_actual = memory_size;
1826 } else {
1827 max_mem_actual = mem_actual;
1828 }
1829 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1830 panic("Unsupported memory configuration %lx", mem_size);
1831 }
1832
1833 #if defined(ARM_LARGE_MEMORY)
1834 unsigned long physmap_l1_entries = ((real_phys_size + ARM64_PHYSMAP_SLIDE_RANGE) >> ARM_TT_L1_SHIFT) + 1;
1835 physmap_base = VM_MIN_KERNEL_ADDRESS - (physmap_l1_entries << ARM_TT_L1_SHIFT);
1836 #else
1837 physmap_base = phystokv(args->topOfKernelData);
1838 #endif
1839
1840 // Slide the physical aperture to a random page-aligned location within the slide range
1841 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1842 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1843
1844 physmap_base += physmap_slide;
1845
1846 #if XNU_MONITOR
1847 physmap_base = ROUND_TWIG(physmap_base);
1848 #if defined(ARM_LARGE_MEMORY)
1849 static_memory_end = phystokv(args->topOfKernelData);
1850 #else
1851 static_memory_end = physmap_base + mem_size;
1852 #endif // ARM_LARGE_MEMORY
1853 physmap_end = physmap_base + real_phys_size;
1854 #else
1855 #if defined(ARM_LARGE_MEMORY)
1856 /* For large memory systems with no PPL such as virtual machines */
1857 static_memory_end = phystokv(args->topOfKernelData);
1858 physmap_end = physmap_base + real_phys_size;
1859 #else
1860 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1861 physmap_end = physmap_base + real_phys_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE);
1862 #endif // ARM_LARGE_MEMORY
1863 #endif
1864
1865 #if KASAN && !defined(ARM_LARGE_MEMORY)
1866 /* add the KASAN stolen memory to the physmap */
1867 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1868 #else
1869 dynamic_memory_begin = static_memory_end;
1870 #endif
1871 #if XNU_MONITOR
1872 pmap_stacks_start = (void*)dynamic_memory_begin;
1873 dynamic_memory_begin += PPL_STACK_REGION_SIZE;
1874 pmap_stacks_end = (void*)dynamic_memory_begin;
1875 #endif
1876 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1877 panic("Unsupported memory configuration %lx", mem_size);
1878 }
1879
1880 boot_tte = (tt_entry_t *)&bootstrap_pagetables;
1881 boot_ttep = kvtophys((vm_offset_t)boot_tte);
1882
1883 #if DEVELOPMENT || DEBUG
1884 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1885 * hold our bootstrap mappings for any possible slide */
1886 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1887 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1888 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1889 size_t pages_used = 2 * (l1_entries + 1);
1890 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1891 panic("BOOTSTRAP_TABLE_SIZE too small for memory config");
1892 }
1893 #endif
1894
1895 /*
1896 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1897 * TTBR1 L1, TTBR1 L2 - kernel mapping
1898 */
1899
1900 /*
1901 * TODO: free bootstrap table memory back to allocator.
1902 * on large memory systems bootstrap tables could be quite large.
1903 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1904 * to trampoline to KVA. this requires only 3 pages to stay resident.
1905 */
1906 avail_start = args->topOfKernelData;
1907
1908 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1909 arm_replace_identity_map();
1910 #endif
1911
1912 /* Initialize invalid tte page */
1913 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1914 invalid_ttep = kvtophys((vm_offset_t)invalid_tte);
1915 bzero(invalid_tte, ARM_PGBYTES);
1916
1917 /*
1918 * Initialize l1 page table page
1919 */
1920 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1921 cpu_ttep = kvtophys((vm_offset_t)cpu_tte);
1922 bzero(cpu_tte, ARM_PGBYTES);
1923 avail_end = gPhysBase + mem_size;
1924 assert(!(avail_end & PAGE_MASK));
1925
1926 #if KASAN
1927 real_avail_end = gPhysBase + real_phys_size;
1928 #else
1929 real_avail_end = avail_end;
1930 #endif
1931
1932 /*
1933 * Initialize l1 and l2 page table pages :
1934 * map physical memory at the kernel base virtual address
1935 * cover the kernel dynamic address range section
1936 *
1937 * the so called physical aperture should be statically mapped
1938 */
1939 init_ptpages(cpu_tte, gVirtBase, dynamic_memory_begin, TRUE, ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1940
1941 #if defined(ARM_LARGE_MEMORY)
1942 /*
1943 * Initialize l1 page table pages :
1944 * on large memory systems the physical aperture exists separately below
1945 * the rest of the kernel virtual address space
1946 */
1947 init_ptpages(cpu_tte, physmap_base, ROUND_L1(physmap_end), TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1948 #endif
1949
1950
1951 #if __ARM_KERNEL_PROTECT__
1952 /* Expand the page tables to prepare for the EL0 mappings. */
1953 arm_vm_expand_kernel_el0_mappings();
1954 #endif /* __ARM_KERNEL_PROTECT__ */
1955
1956 /*
1957 * Now retrieve addresses for various segments from kernel mach-o header
1958 */
1959 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
1960 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
1961 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
1962 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
1963 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
1964 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
1965 #if XNU_MONITOR
1966 segPPLTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTEXT", &segSizePPLTEXT);
1967 segPPLTRAMPB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTRAMP", &segSizePPLTRAMP);
1968 segPPLDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA_CONST", &segSizePPLDATACONST);
1969 #endif
1970 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
1971 #if XNU_MONITOR
1972 segPPLDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA", &segSizePPLDATA);
1973 #endif
1974
1975 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
1976 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
1977 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
1978 segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
1979 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
1980 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
1981 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
1982 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
1983 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
1984 segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
1985
1986 sectHIBTEXTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", §SizeHIBTEXT);
1987 sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", §SizeHIBDATACONST);
1988 segHIBDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
1989
1990 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
1991 kernel_mach_header_t *kc_mh = PE_get_kc_header(KCKindPrimary);
1992
1993 // fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
1994 segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
1995 assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
1996
1997 assert(segLASTB); // kernel LAST can be empty, but it must have
1998 // a valid address for computations below.
1999
2000 assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
2001 assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
2002 segPLKTEXTEXECB = segLASTB + segSizeLAST;
2003 segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
2004
2005 // fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
2006 segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
2007 assert(segPLKDATACONSTB && !segSizePLKDATACONST); // kernel PLK_DATA_CONST must be empty
2008 assert(segLASTDATACONSTB && segSizeLASTDATACONST); // kernel LASTDATA_CONST must be non-empty
2009 assert(segKCDATACONSTB <= segLASTDATACONSTB); // KC DATA_CONST must contain kernel LASTDATA_CONST
2010 assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
2011 segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
2012 segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
2013
2014 // fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
2015 segKCDATAB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
2016 assert(segPRELINKDATAB && !segSizePRELINKDATA); // kernel PRELINK_DATA must be empty
2017 assert(segKCDATAB <= segPRELINKDATAB); // KC DATA must contain kernel PRELINK_DATA
2018 assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
2019 segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
2020
2021 // fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
2022 assert(segPRELINKTEXTB && !segSizePRELINKTEXT); // kernel PRELINK_TEXT must be empty
2023 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
2024 assert(segPRELINKINFOB && !segSizePRELINKINFO); // kernel PRELINK_INFO must be empty
2025 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
2026 segLINKB = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
2027 }
2028
2029 (void) PE_parse_boot_argn("use_contiguous_hint", &use_contiguous_hint, sizeof(use_contiguous_hint));
2030 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
2031
2032 /* if one of the new segments is present, the other one better be as well */
2033 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
2034 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
2035 }
2036
2037 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
2038 sdata = (vm_offset_t) segDATAB;
2039 edata = (vm_offset_t) segDATAB + segSizeDATA;
2040 end_kern = round_page(segHIGHESTKC ? segHIGHESTKC : getlastaddr()); /* Force end to next page */
2041
2042 vm_set_page_size();
2043
2044 vm_kernel_base = segTEXTB;
2045 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
2046 vm_kext_base = segPRELINKTEXTB;
2047 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
2048
2049 vm_prelink_stext = segPRELINKTEXTB;
2050 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
2051 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
2052 } else {
2053 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
2054 }
2055 vm_prelink_sinfo = segPRELINKINFOB;
2056 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
2057 vm_slinkedit = segLINKB;
2058 vm_elinkedit = segLINKB + segSizeLINK;
2059
2060 vm_prelink_sdata = segPRELINKDATAB;
2061 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
2062
2063 arm_vm_prot_init(args);
2064
2065 vm_page_kernelcache_count = (unsigned int) (atop_64(end_kern - segLOWEST));
2066
2067 /*
2068 * Initialize the page tables for the low globals:
2069 * cover this address range:
2070 * LOW_GLOBAL_BASE_ADDRESS + 2MB
2071 */
2072 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
2073 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2074 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2075 ptpage_vaddr = alloc_ptpage(TRUE);
2076 *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
2077 bzero((void *)ptpage_vaddr, ARM_PGBYTES);
2078
2079 /*
2080 * Initialize l2 page table pages :
2081 * cover this address range:
2082 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
2083 */
2084 #if defined(ARM_LARGE_MEMORY)
2085 /*
2086 * dynamic mapped memory outside the VM allocator VA range required to bootstrap VM system
2087 * don't expect to exceed 64GB, no sense mapping any more space between here and the VM heap range
2088 */
2089 init_ptpages(cpu_tte, dynamic_memory_begin, ROUND_L1(dynamic_memory_begin), FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2090 #else
2091 /*
2092 * TODO: do these pages really need to come from RO memory?
2093 * With legacy 3 level table systems we never mapped more than a single L1 entry so this may be dead code
2094 */
2095 init_ptpages(cpu_tte, dynamic_memory_begin, VM_MAX_KERNEL_ADDRESS, TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2096 #endif
2097
2098 #if KASAN
2099 /* record the extent of the physmap */
2100 physmap_vbase = physmap_base;
2101 physmap_vtop = physmap_end;
2102 kasan_init();
2103 #endif /* KASAN */
2104
2105 #if MONOTONIC
2106 mt_early_init();
2107 #endif /* MONOTONIC */
2108
2109 arm_set_user_tbi();
2110
2111 arm_vm_physmap_init(args);
2112 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
2113
2114 ml_enable_monitor();
2115
2116 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
2117
2118 flush_mmu_tlb();
2119 kva_active = TRUE;
2120 // global table pointers may need to be different due to physical aperture remapping
2121 cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
2122 invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep));
2123
2124 // From here on out, we're off the bootstrap translation tables.
2125
2126
2127 /* AuxKC initialization has to be deferred until this point, since
2128 * the AuxKC may not have been fully mapped in the bootstrap
2129 * tables, if it spilled downwards into the prior L2 block.
2130 *
2131 * Now that its mapping set up by arm_vm_prot_init() is active,
2132 * we can traverse and fix it up.
2133 */
2134
2135 if (arm_vm_auxkc_init()) {
2136 if (segLOWESTROAuxKC < segLOWESTRO) {
2137 segLOWESTRO = segLOWESTROAuxKC;
2138 }
2139 if (segHIGHESTROAuxKC > segHIGHESTRO) {
2140 segHIGHESTRO = segHIGHESTROAuxKC;
2141 }
2142 if (segLOWESTRXAuxKC < segLOWESTTEXT) {
2143 segLOWESTTEXT = segLOWESTRXAuxKC;
2144 }
2145 assert(segLOWEST == segLOWESTAuxKC);
2146
2147 // The preliminary auxKC mapping has been broken up.
2148 flush_mmu_tlb();
2149 }
2150
2151 sane_size = mem_size - (avail_start - gPhysBase);
2152 max_mem = mem_size;
2153 vm_kernel_slid_base = segLOWESTTEXT;
2154 // vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
2155 vm_kernel_stext = segTEXTB;
2156
2157 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2158 // fileset has kext TEXT before kernel DATA_CONST
2159 assert(segTEXTEXECB == segTEXTB + segSizeTEXT);
2160 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeTEXTEXEC;
2161 vm_kernel_slid_top = vm_slinkedit;
2162 } else {
2163 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
2164 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
2165 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
2166 vm_kernel_slid_top = vm_prelink_einfo;
2167 }
2168
2169 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
2170 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2171 // reserve a 32MB region without permission overrides to use later for a CTRR unit test
2172 {
2173 extern vm_offset_t ctrr_test_page;
2174 tt_entry_t *new_tte;
2175
2176 ctrr_test_page = dynamic_memory_begin;
2177 dynamic_memory_begin += ARM_TT_L2_SIZE;
2178 cpu_l1_tte = cpu_tte + ((ctrr_test_page & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2179 assert((*cpu_l1_tte) & ARM_TTE_VALID);
2180 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((ctrr_test_page & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2181 assert((*cpu_l2_tte) == ARM_TTE_EMPTY);
2182 new_tte = (tt_entry_t *)alloc_ptpage(FALSE);
2183 bzero(new_tte, ARM_PGBYTES);
2184 *cpu_l2_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
2185 }
2186 #endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
2187 #if XNU_MONITOR
2188 for (vm_offset_t cur = (vm_offset_t)pmap_stacks_start; cur < (vm_offset_t)pmap_stacks_end; cur += ARM_PGBYTES) {
2189 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2190 }
2191 #endif
2192 pmap_bootstrap(dynamic_memory_begin);
2193
2194 disable_preemption();
2195
2196 /*
2197 * Initialize l3 page table pages :
2198 * cover this address range:
2199 * 2MB + FrameBuffer size + 10MB for each 256MB segment
2200 */
2201
2202 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
2203
2204 va_l1 = dynamic_memory_begin;
2205 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
2206 va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes);
2207 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
2208
2209 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2210
2211 while (va_l1 < va_l1_end) {
2212 va_l2 = va_l1;
2213
2214 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2215 /* If this is the last L1 entry, it must cover the last mapping. */
2216 va_l2_end = va_l1_end;
2217 } else {
2218 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2219 }
2220
2221 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2222
2223 while (va_l2 < va_l2_end) {
2224 pt_entry_t * ptp;
2225 pmap_paddr_t ptp_phys;
2226
2227 /* Allocate a page and setup L3 Table TTE in L2 */
2228 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2229 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2230
2231 bzero(ptp, ARM_PGBYTES);
2232 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2233
2234 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2235
2236 va_l2 += ARM_TT_L2_SIZE;
2237 cpu_l2_tte++;
2238 }
2239
2240 va_l1 = va_l2_end;
2241 cpu_l1_tte++;
2242 }
2243
2244 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2245 /*
2246 * In this configuration, the bootstrap mappings (arm_vm_init) and
2247 * the heap mappings occupy separate L1 regions. Explicitly set up
2248 * the heap L1 allocations here.
2249 */
2250 #if defined(ARM_LARGE_MEMORY)
2251 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2252 #else // defined(ARM_LARGE_MEMORY)
2253 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
2254 init_ptpages(cpu_tte, VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2255 #endif // defined(ARM_LARGE_MEMORY)
2256 #else
2257 #if defined(ARM_LARGE_MEMORY)
2258 /* For large memory systems with no KTRR/CTRR such as virtual machines */
2259 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2260 #endif
2261 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2262
2263 /*
2264 * Initialize l3 page table pages :
2265 * cover this address range:
2266 * ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
2267 */
2268 va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
2269 va_l1_end = VM_MAX_KERNEL_ADDRESS;
2270
2271 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2272
2273 while (va_l1 < va_l1_end) {
2274 va_l2 = va_l1;
2275
2276 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2277 /* If this is the last L1 entry, it must cover the last mapping. */
2278 va_l2_end = va_l1_end;
2279 } else {
2280 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2281 }
2282
2283 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2284
2285 while (va_l2 < va_l2_end) {
2286 pt_entry_t * ptp;
2287 pmap_paddr_t ptp_phys;
2288
2289 /* Allocate a page and setup L3 Table TTE in L2 */
2290 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2291 ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp);
2292
2293 bzero(ptp, ARM_PGBYTES);
2294 pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE);
2295
2296 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2297
2298 va_l2 += ARM_TT_L2_SIZE;
2299 cpu_l2_tte++;
2300 }
2301
2302 va_l1 = va_l2_end;
2303 cpu_l1_tte++;
2304 }
2305
2306
2307 /*
2308 * Adjust avail_start so that the range that the VM owns
2309 * starts on a PAGE_SIZE aligned boundary.
2310 */
2311 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
2312
2313 #if XNU_MONITOR
2314 pmap_static_allocations_done();
2315 #endif
2316 first_avail = avail_start;
2317 patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
2318 enable_preemption();
2319 }
2320