xref: /xnu-8792.81.2/osfmk/arm/pmap/pmap.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <string.h>
29 #include <stdlib.h>
30 #include <mach_assert.h>
31 #include <mach_ldebug.h>
32 
33 #include <mach/shared_region.h>
34 #include <mach/vm_param.h>
35 #include <mach/vm_prot.h>
36 #include <mach/vm_map.h>
37 #include <mach/machine/vm_param.h>
38 #include <mach/machine/vm_types.h>
39 
40 #include <mach/boolean.h>
41 #include <kern/bits.h>
42 #include <kern/ecc.h>
43 #include <kern/thread.h>
44 #include <kern/sched.h>
45 #include <kern/zalloc.h>
46 #include <kern/zalloc_internal.h>
47 #include <kern/kalloc.h>
48 #include <kern/spl.h>
49 #include <kern/startup.h>
50 #include <kern/trustcache.h>
51 
52 #include <os/overflow.h>
53 
54 #include <vm/pmap.h>
55 #include <vm/pmap_cs.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_protos.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/cpm.h>
63 
64 #include <libkern/img4/interface.h>
65 #include <libkern/amfi/amfi.h>
66 #include <libkern/section_keywords.h>
67 #include <sys/errno.h>
68 #include <sys/trust_caches.h>
69 
70 #include <machine/atomic.h>
71 #include <machine/thread.h>
72 #include <machine/lowglobals.h>
73 
74 #include <arm/caches_internal.h>
75 #include <arm/cpu_data.h>
76 #include <arm/cpu_data_internal.h>
77 #include <arm/cpu_capabilities.h>
78 #include <arm/cpu_number.h>
79 #include <arm/machine_cpu.h>
80 #include <arm/misc_protos.h>
81 #include <arm/pmap/pmap_internal.h>
82 #include <arm/trap.h>
83 
84 #include <arm64/proc_reg.h>
85 #include <pexpert/arm64/boot.h>
86 #include <arm64/ppl/sart.h>
87 #include <arm64/ppl/uat.h>
88 
89 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
90 #include <arm64/amcc_rorgn.h>
91 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
92 
93 #include <pexpert/device_tree.h>
94 
95 #include <san/kasan.h>
96 #include <sys/cdefs.h>
97 
98 #if defined(HAS_APPLE_PAC)
99 #include <ptrauth.h>
100 #endif
101 
102 #ifdef CONFIG_XNUPOST
103 #include <tests/xnupost.h>
104 #endif
105 
106 
107 #if HIBERNATION
108 #include <IOKit/IOHibernatePrivate.h>
109 #endif /* HIBERNATION */
110 
111 #ifdef __ARM64_PMAP_SUBPAGE_L1__
112 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
113 #else
114 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
115 #endif
116 
117 #if __ARM_VMSA__ != 8
118 #error Unknown __ARM_VMSA__
119 #endif
120 
121 #define ARRAY_LEN(x) (sizeof (x) / sizeof (x[0]))
122 
123 extern u_int32_t random(void); /* from <libkern/libkern.h> */
124 
125 static bool alloc_asid(pmap_t pmap);
126 static void free_asid(pmap_t pmap);
127 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap, bool last_level_only);
128 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
129 static pt_entry_t wimg_to_pte(unsigned int wimg, pmap_paddr_t pa);
130 
131 static const struct page_table_ops native_pt_ops =
132 {
133 	.alloc_id = alloc_asid,
134 	.free_id = free_asid,
135 	.flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
136 	.flush_tlb_async = flush_mmu_tlb_full_asid_async,
137 	.wimg_to_pte = wimg_to_pte,
138 };
139 
140 const struct page_table_level_info pmap_table_level_info_16k[] =
141 {
142 	[0] = {
143 		.size       = ARM_16K_TT_L0_SIZE,
144 		.offmask    = ARM_16K_TT_L0_OFFMASK,
145 		.shift      = ARM_16K_TT_L0_SHIFT,
146 		.index_mask = ARM_16K_TT_L0_INDEX_MASK,
147 		.valid_mask = ARM_TTE_VALID,
148 		.type_mask  = ARM_TTE_TYPE_MASK,
149 		.type_block = ARM_TTE_TYPE_BLOCK
150 	},
151 	[1] = {
152 		.size       = ARM_16K_TT_L1_SIZE,
153 		.offmask    = ARM_16K_TT_L1_OFFMASK,
154 		.shift      = ARM_16K_TT_L1_SHIFT,
155 		.index_mask = ARM_16K_TT_L1_INDEX_MASK,
156 		.valid_mask = ARM_TTE_VALID,
157 		.type_mask  = ARM_TTE_TYPE_MASK,
158 		.type_block = ARM_TTE_TYPE_BLOCK
159 	},
160 	[2] = {
161 		.size       = ARM_16K_TT_L2_SIZE,
162 		.offmask    = ARM_16K_TT_L2_OFFMASK,
163 		.shift      = ARM_16K_TT_L2_SHIFT,
164 		.index_mask = ARM_16K_TT_L2_INDEX_MASK,
165 		.valid_mask = ARM_TTE_VALID,
166 		.type_mask  = ARM_TTE_TYPE_MASK,
167 		.type_block = ARM_TTE_TYPE_BLOCK
168 	},
169 	[3] = {
170 		.size       = ARM_16K_TT_L3_SIZE,
171 		.offmask    = ARM_16K_TT_L3_OFFMASK,
172 		.shift      = ARM_16K_TT_L3_SHIFT,
173 		.index_mask = ARM_16K_TT_L3_INDEX_MASK,
174 		.valid_mask = ARM_PTE_TYPE_VALID,
175 		.type_mask  = ARM_PTE_TYPE_MASK,
176 		.type_block = ARM_TTE_TYPE_L3BLOCK
177 	}
178 };
179 
180 const struct page_table_level_info pmap_table_level_info_4k[] =
181 {
182 	[0] = {
183 		.size       = ARM_4K_TT_L0_SIZE,
184 		.offmask    = ARM_4K_TT_L0_OFFMASK,
185 		.shift      = ARM_4K_TT_L0_SHIFT,
186 		.index_mask = ARM_4K_TT_L0_INDEX_MASK,
187 		.valid_mask = ARM_TTE_VALID,
188 		.type_mask  = ARM_TTE_TYPE_MASK,
189 		.type_block = ARM_TTE_TYPE_BLOCK
190 	},
191 	[1] = {
192 		.size       = ARM_4K_TT_L1_SIZE,
193 		.offmask    = ARM_4K_TT_L1_OFFMASK,
194 		.shift      = ARM_4K_TT_L1_SHIFT,
195 		.index_mask = ARM_4K_TT_L1_INDEX_MASK,
196 		.valid_mask = ARM_TTE_VALID,
197 		.type_mask  = ARM_TTE_TYPE_MASK,
198 		.type_block = ARM_TTE_TYPE_BLOCK
199 	},
200 	[2] = {
201 		.size       = ARM_4K_TT_L2_SIZE,
202 		.offmask    = ARM_4K_TT_L2_OFFMASK,
203 		.shift      = ARM_4K_TT_L2_SHIFT,
204 		.index_mask = ARM_4K_TT_L2_INDEX_MASK,
205 		.valid_mask = ARM_TTE_VALID,
206 		.type_mask  = ARM_TTE_TYPE_MASK,
207 		.type_block = ARM_TTE_TYPE_BLOCK
208 	},
209 	[3] = {
210 		.size       = ARM_4K_TT_L3_SIZE,
211 		.offmask    = ARM_4K_TT_L3_OFFMASK,
212 		.shift      = ARM_4K_TT_L3_SHIFT,
213 		.index_mask = ARM_4K_TT_L3_INDEX_MASK,
214 		.valid_mask = ARM_PTE_TYPE_VALID,
215 		.type_mask  = ARM_PTE_TYPE_MASK,
216 		.type_block = ARM_TTE_TYPE_L3BLOCK
217 	}
218 };
219 
220 const struct page_table_attr pmap_pt_attr_4k = {
221 	.pta_level_info = pmap_table_level_info_4k,
222 	.pta_root_level = (T0SZ_BOOT - 16) / 9,
223 #if __ARM_MIXED_PAGE_SIZE__
224 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
225 #else /* __ARM_MIXED_PAGE_SIZE__ */
226 #if __ARM_16K_PG__
227 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
228 #else /* __ARM_16K_PG__ */
229 	.pta_commpage_level = PMAP_TT_L1_LEVEL,
230 #endif /* __ARM_16K_PG__ */
231 #endif /* __ARM_MIXED_PAGE_SIZE__ */
232 	.pta_max_level  = PMAP_TT_L3_LEVEL,
233 	.pta_ops = &native_pt_ops,
234 	.ap_ro = ARM_PTE_AP(AP_RORO),
235 	.ap_rw = ARM_PTE_AP(AP_RWRW),
236 	.ap_rona = ARM_PTE_AP(AP_RONA),
237 	.ap_rwna = ARM_PTE_AP(AP_RWNA),
238 	.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
239 	.ap_x = ARM_PTE_PNX,
240 #if __ARM_MIXED_PAGE_SIZE__
241 	.pta_tcr_value  = TCR_EL1_4KB,
242 #endif /* __ARM_MIXED_PAGE_SIZE__ */
243 	.pta_page_size  = 4096,
244 	.pta_page_shift = 12,
245 };
246 
247 const struct page_table_attr pmap_pt_attr_16k = {
248 	.pta_level_info = pmap_table_level_info_16k,
249 	.pta_root_level = PMAP_TT_L1_LEVEL,
250 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
251 	.pta_max_level  = PMAP_TT_L3_LEVEL,
252 	.pta_ops = &native_pt_ops,
253 	.ap_ro = ARM_PTE_AP(AP_RORO),
254 	.ap_rw = ARM_PTE_AP(AP_RWRW),
255 	.ap_rona = ARM_PTE_AP(AP_RONA),
256 	.ap_rwna = ARM_PTE_AP(AP_RWNA),
257 	.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
258 	.ap_x = ARM_PTE_PNX,
259 #if __ARM_MIXED_PAGE_SIZE__
260 	.pta_tcr_value  = TCR_EL1_16KB,
261 #endif /* __ARM_MIXED_PAGE_SIZE__ */
262 	.pta_page_size  = 16384,
263 	.pta_page_shift = 14,
264 };
265 
266 #if __ARM_16K_PG__
267 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
268 #else /* !__ARM_16K_PG__ */
269 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
270 #endif /* !__ARM_16K_PG__ */
271 
272 
273 #if MACH_ASSERT
274 int vm_footprint_suspend_allowed = 1;
275 
276 extern int pmap_ledgers_panic;
277 extern int pmap_ledgers_panic_leeway;
278 
279 #endif /* MACH_ASSERT */
280 
281 #if DEVELOPMENT || DEBUG
282 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
283 	(current_thread()->pmap_footprint_suspended)
284 #else /* DEVELOPMENT || DEBUG */
285 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
286 #endif /* DEVELOPMENT || DEBUG */
287 
288 
289 SECURITY_READ_ONLY_LATE(int) srd_fused = 0;
290 
291 /*
292  * Represents a tlb range that will be flushed before exiting
293  * the ppl.
294  * Used by phys_attribute_clear_range to defer flushing pages in
295  * this range until the end of the operation.
296  */
297 typedef struct pmap_tlb_flush_range {
298 	pmap_t ptfr_pmap;
299 	vm_map_address_t ptfr_start;
300 	vm_map_address_t ptfr_end;
301 	bool ptfr_flush_needed;
302 } pmap_tlb_flush_range_t;
303 
304 #if XNU_MONITOR
305 /*
306  * PPL External References.
307  */
308 extern vm_offset_t   segPPLDATAB;
309 extern unsigned long segSizePPLDATA;
310 extern vm_offset_t   segPPLTEXTB;
311 extern unsigned long segSizePPLTEXT;
312 extern vm_offset_t   segPPLDATACONSTB;
313 extern unsigned long segSizePPLDATACONST;
314 
315 
316 /*
317  * PPL Global Variables
318  */
319 
320 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
321 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
322 SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
323 #else
324 const boolean_t pmap_ppl_disable = FALSE;
325 #endif
326 
327 /*
328  * Indicates if the PPL has started applying APRR.
329  * This variable is accessed from various assembly trampolines, so be sure to change
330  * those if you change the size or layout of this variable.
331  */
332 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
333 
334 extern void *pmap_stacks_start;
335 extern void *pmap_stacks_end;
336 
337 #endif /* !XNU_MONITOR */
338 
339 
340 
341 /* Virtual memory region for early allocation */
342 #define VREGION1_HIGH_WINDOW    (PE_EARLY_BOOT_VA)
343 #define VREGION1_START          ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
344 #define VREGION1_SIZE           (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
345 
346 extern uint8_t bootstrap_pagetables[];
347 
348 extern unsigned int not_in_kdp;
349 
350 extern vm_offset_t first_avail;
351 
352 extern vm_offset_t     virtual_space_start;     /* Next available kernel VA */
353 extern vm_offset_t     virtual_space_end;       /* End of kernel address space */
354 extern vm_offset_t     static_memory_end;
355 
356 extern const vm_map_address_t physmap_base;
357 extern const vm_map_address_t physmap_end;
358 
359 extern int maxproc, hard_maxproc;
360 
361 vm_address_t MARK_AS_PMAP_DATA image4_slab = 0;
362 vm_address_t MARK_AS_PMAP_DATA image4_late_slab = 0;
363 
364 /* The number of address bits one TTBR can cover. */
365 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
366 
367 /*
368  * The bounds on our TTBRs.  These are for sanity checking that
369  * an address is accessible by a TTBR before we attempt to map it.
370  */
371 
372 /* The level of the root of a page table. */
373 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
374 
375 /* The number of entries in the root TT of a page table. */
376 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
377 
378 struct pmap     kernel_pmap_store MARK_AS_PMAP_DATA;
379 const pmap_t    kernel_pmap = &kernel_pmap_store;
380 
381 static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone;  /* zone of pmap structures */
382 
383 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
384 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
385 queue_head_t    map_pmap_list MARK_AS_PMAP_DATA;
386 
387 typedef struct tt_free_entry {
388 	struct tt_free_entry    *next;
389 } tt_free_entry_t;
390 
391 #define TT_FREE_ENTRY_NULL      ((tt_free_entry_t *) 0)
392 
393 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
394 unsigned int    free_page_size_tt_count MARK_AS_PMAP_DATA;
395 unsigned int    free_page_size_tt_max MARK_AS_PMAP_DATA;
396 #define FREE_PAGE_SIZE_TT_MAX   4
397 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
398 unsigned int    free_two_page_size_tt_count MARK_AS_PMAP_DATA;
399 unsigned int    free_two_page_size_tt_max MARK_AS_PMAP_DATA;
400 #define FREE_TWO_PAGE_SIZE_TT_MAX       4
401 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
402 unsigned int    free_tt_count MARK_AS_PMAP_DATA;
403 unsigned int    free_tt_max MARK_AS_PMAP_DATA;
404 
405 #define TT_FREE_ENTRY_NULL      ((tt_free_entry_t *) 0)
406 
407 unsigned int    inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0;        /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
408 unsigned int    inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0;        /* leaf user pagetable pages, in units of PAGE_SIZE */
409 unsigned int    inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0;  /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
410 unsigned int    inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
411 unsigned int    inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
412 unsigned int    inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
413 
414 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte  = 0;
415 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
416 
417 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte  = 0;                     /* set by arm_vm_init() - keep out of bss */
418 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0;                     /* set by arm_vm_init() - phys tte addr */
419 
420 /* Lock group used for all pmap object locks. */
421 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
422 
423 #if DEVELOPMENT || DEBUG
424 int nx_enabled = 1;                                     /* enable no-execute protection */
425 int allow_data_exec  = 0;                               /* No apps may execute data */
426 int allow_stack_exec = 0;                               /* No apps may execute from the stack */
427 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
428 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
429 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
430 #else /* DEVELOPMENT || DEBUG */
431 const int nx_enabled = 1;                                       /* enable no-execute protection */
432 const int allow_data_exec  = 0;                         /* No apps may execute data */
433 const int allow_stack_exec = 0;                         /* No apps may execute from the stack */
434 #endif /* DEVELOPMENT || DEBUG */
435 
436 /**
437  * This variable is set true during hibernation entry to protect pmap data structures
438  * during image copying, and reset false on hibernation exit.
439  */
440 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
441 
442 #if MACH_ASSERT
443 static void pmap_check_ledgers(pmap_t pmap);
444 #else
445 static inline void
pmap_check_ledgers(__unused pmap_t pmap)446 pmap_check_ledgers(__unused pmap_t pmap)
447 {
448 }
449 #endif /* MACH_ASSERT */
450 
451 /**
452  * This helper function ensures that potentially-long-running batched PPL operations are
453  * called in preemptible context before entering the PPL, so that the PPL call may
454  * periodically exit to allow pending urgent ASTs to be taken.
455  */
456 static inline void
pmap_verify_preemptible(void)457 pmap_verify_preemptible(void)
458 {
459 	assert(preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT));
460 }
461 
462 SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
463 
464 SECURITY_READ_ONLY_LATE(pmap_paddr_t)   vm_first_phys = (pmap_paddr_t) 0;
465 SECURITY_READ_ONLY_LATE(pmap_paddr_t)   vm_last_phys = (pmap_paddr_t) 0;
466 
467 SECURITY_READ_ONLY_LATE(boolean_t)      pmap_initialized = FALSE;       /* Has pmap_init completed? */
468 
469 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default  = 0x0;
470 #if defined(__arm64__)
471 #  ifdef XNU_TARGET_OS_OSX
472 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = MACH_VM_MAX_ADDRESS;
473 #  else
474 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
475 #  endif
476 #endif /* __arm64__ */
477 
478 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
479 SECURITY_READ_ONLY_LATE(boolean_t)   pmap_panic_dev_wimg_on_managed = TRUE;
480 #else
481 SECURITY_READ_ONLY_LATE(boolean_t)   pmap_panic_dev_wimg_on_managed = FALSE;
482 #endif
483 
484 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
485 SECURITY_READ_ONLY_LATE(uint32_t) pmap_max_asids = 0;
486 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
487 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
488 SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
489 static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
490 static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
491 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
492 
493 
494 #if __ARM_MIXED_PAGE_SIZE__
495 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_4k;
496 #endif
497 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_default;
498 SECURITY_READ_ONLY_LATE(static vm_address_t) sharedpage_text_kva = 0;
499 SECURITY_READ_ONLY_LATE(static vm_address_t) sharedpage_ro_data_kva = 0;
500 
501 /* PTE Define Macros */
502 
503 #define pte_is_wired(pte)                                                               \
504 	(((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
505 
506 #define pte_was_writeable(pte) \
507 	(((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
508 
509 #define pte_set_was_writeable(pte, was_writeable) \
510 	do {                                         \
511 	        if ((was_writeable)) {               \
512 	                (pte) |= ARM_PTE_WRITEABLE;  \
513 	        } else {                             \
514 	                (pte) &= ~ARM_PTE_WRITEABLE; \
515 	        }                                    \
516 	} while(0)
517 
518 static inline void
pte_set_wired(pmap_t pmap,pt_entry_t * ptep,boolean_t wired)519 pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
520 {
521 	if (wired) {
522 		*ptep |= ARM_PTE_WIRED;
523 	} else {
524 		*ptep &= ~ARM_PTE_WIRED;
525 	}
526 	/*
527 	 * Do not track wired page count for kernel pagetable pages.  Kernel mappings are
528 	 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
529 	 * never reclaimed.
530 	 */
531 	if (pmap == kernel_pmap) {
532 		return;
533 	}
534 	unsigned short *ptd_wiredcnt_ptr;
535 	ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
536 	if (wired) {
537 		os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
538 	} else {
539 		unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
540 		if (__improbable(prev_wired == 0)) {
541 			panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
542 		}
543 	}
544 }
545 
546 #define PMAP_UPDATE_TLBS(pmap, s, e, strong, last_level_only) {                                       \
547 	pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap, last_level_only); \
548 	arm64_sync_tlb(strong);                                                                        \
549 }
550 
551 /*
552  * Synchronize updates to PTEs that were previously invalid or had the AF bit cleared,
553  * therefore not requiring TLBI.  Use a store-load barrier to ensure subsequent loads
554  * will observe the updated PTE.
555  */
556 #define FLUSH_PTE()                                                                     \
557 	__builtin_arm_dmb(DMB_ISH);
558 
559 /*
560  * Synchronize updates to PTEs that were previously valid and thus may be cached in
561  * TLBs.  DSB is required to ensure the PTE stores have completed prior to the ensuing
562  * TLBI.  This should only require a store-store barrier, as subsequent accesses in
563  * program order will not issue until the DSB completes.  Prior loads may be reordered
564  * after the barrier, but their behavior should not be materially affected by the
565  * reordering.  For fault-driven PTE updates such as COW, PTE contents should not
566  * matter for loads until the access is re-driven well after the TLB update is
567  * synchronized.   For "involuntary" PTE access restriction due to paging lifecycle,
568  * we should be in a position to handle access faults.  For "voluntary" PTE access
569  * restriction due to unmapping or protection, the decision to restrict access should
570  * have a data dependency on prior loads in order to avoid a data race.
571  */
572 #define FLUSH_PTE_STRONG()                                                             \
573 	__builtin_arm_dsb(DSB_ISHST);
574 
575 /**
576  * Write enough page table entries to map a single VM page. On systems where the
577  * VM page size does not match the hardware page size, multiple page table
578  * entries will need to be written.
579  *
580  * @note This function does not emit a barrier to ensure these page table writes
581  *       have completed before continuing. This is commonly needed. In the case
582  *       where a DMB or DSB barrier is needed, then use the write_pte() and
583  *       write_pte_strong() functions respectively instead of this one.
584  *
585  * @param ptep Pointer to the first page table entry to update.
586  * @param pte The value to write into each page table entry. In the case that
587  *            multiple PTEs are updated to a non-empty value, then the address
588  *            in this value will automatically be incremented for each PTE
589  *            write.
590  */
591 static void
write_pte_fast(pt_entry_t * ptep,pt_entry_t pte)592 write_pte_fast(pt_entry_t *ptep, pt_entry_t pte)
593 {
594 	/**
595 	 * The PAGE_SHIFT (and in turn, the PAGE_RATIO) can be a variable on some
596 	 * systems, which is why it's checked at runtime instead of compile time.
597 	 * The "unreachable" warning needs to be suppressed because it still is a
598 	 * compile time constant on some systems.
599 	 */
600 	__unreachable_ok_push
601 	if (TEST_PAGE_RATIO_4) {
602 		if (((uintptr_t)ptep) & 0x1f) {
603 			panic("%s: PTE write is unaligned, ptep=%p, pte=%p",
604 			    __func__, ptep, (void*)pte);
605 		}
606 
607 		if ((pte & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) {
608 			/**
609 			 * If we're writing an empty/compressed PTE value, then don't
610 			 * auto-increment the address for each PTE write.
611 			 */
612 			*ptep = pte;
613 			*(ptep + 1) = pte;
614 			*(ptep + 2) = pte;
615 			*(ptep + 3) = pte;
616 		} else {
617 			*ptep = pte;
618 			*(ptep + 1) = pte | 0x1000;
619 			*(ptep + 2) = pte | 0x2000;
620 			*(ptep + 3) = pte | 0x3000;
621 		}
622 	} else {
623 		*ptep = pte;
624 	}
625 	__unreachable_ok_pop
626 }
627 
628 /**
629  * Writes enough page table entries to map a single VM page and then ensures
630  * those writes complete by executing a Data Memory Barrier.
631  *
632  * @note The DMB issued by this function is not strong enough to protect against
633  *       TLB invalidates from being reordered above the PTE writes. If a TLBI
634  *       instruction is going to immediately be called after this write, it's
635  *       recommended to call write_pte_strong() instead of this function.
636  *
637  * See the function header for write_pte_fast() for more details on the
638  * parameters.
639  */
640 void
write_pte(pt_entry_t * ptep,pt_entry_t pte)641 write_pte(pt_entry_t *ptep, pt_entry_t pte)
642 {
643 	write_pte_fast(ptep, pte);
644 	FLUSH_PTE();
645 }
646 
647 /**
648  * Writes enough page table entries to map a single VM page and then ensures
649  * those writes complete by executing a Data Synchronization Barrier. This
650  * barrier provides stronger guarantees than the DMB executed by write_pte().
651  *
652  * @note This function is useful if you're going to immediately flush the TLB
653  *       after making the PTE write. A DSB is required to protect against the
654  *       TLB invalidate being reordered before the PTE write.
655  *
656  * See the function header for write_pte_fast() for more details on the
657  * parameters.
658  */
659 static void
write_pte_strong(pt_entry_t * ptep,pt_entry_t pte)660 write_pte_strong(pt_entry_t *ptep, pt_entry_t pte)
661 {
662 	write_pte_fast(ptep, pte);
663 	FLUSH_PTE_STRONG();
664 }
665 
666 /**
667  * Retrieve the pmap structure for the thread running on the current CPU.
668  */
669 pmap_t
current_pmap()670 current_pmap()
671 {
672 	const pmap_t current = vm_map_pmap(current_thread()->map);
673 
674 	assert(current != NULL);
675 
676 #if XNU_MONITOR
677 	/**
678 	 * On PPL-enabled systems, it's important that PPL policy decisions aren't
679 	 * decided by kernel-writable memory. This function is used in various parts
680 	 * of the PPL, and besides validating that the pointer returned by this
681 	 * function is indeed a pmap structure, it's also important to ensure that
682 	 * it's actually the current thread's pmap. This is because different pmaps
683 	 * will have access to different entitlements based on the code signature of
684 	 * their loaded process. So if a different user pmap is set in the current
685 	 * thread structure (in an effort to bypass code signing restrictions), even
686 	 * though the structure would validate correctly as it is a real pmap
687 	 * structure, it should fail here.
688 	 *
689 	 * This only needs to occur for user pmaps because the kernel pmap's root
690 	 * page table is always the same as TTBR1 (it's set during bootstrap and not
691 	 * changed so it'd be redundant to check), and its code signing fields are
692 	 * always set to NULL. The PMAP CS logic won't operate on the kernel pmap so
693 	 * it shouldn't be possible to set those fields. Due to that, an attacker
694 	 * setting the current thread's pmap to the kernel pmap as a way to bypass
695 	 * this check won't accomplish anything as it doesn't provide any extra code
696 	 * signing entitlements.
697 	 */
698 	if ((current != kernel_pmap) &&
699 	    ((get_mmu_ttb() & TTBR_BADDR_MASK) != (current->ttep))) {
700 		panic_plain("%s: Current thread's pmap doesn't match up with TTBR0 "
701 		    "%#llx %#llx", __func__, get_mmu_ttb(), current->ttep);
702 	}
703 #endif /* XNU_MONITOR */
704 
705 	return current;
706 }
707 
708 #if DEVELOPMENT || DEBUG
709 
710 /*
711  * Trace levels are controlled by a bitmask in which each
712  * level can be enabled/disabled by the (1<<level) position
713  * in the boot arg
714  * Level 0: PPL extension functionality
715  * Level 1: pmap lifecycle (create/destroy/switch)
716  * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
717  * Level 3: internal state management (attributes/fast-fault)
718  * Level 4-7: TTE traces for paging levels 0-3.  TTBs are traced at level 4.
719  */
720 
721 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
722 
723 #define PMAP_TRACE(level, ...) \
724 	if (__improbable((1 << (level)) & pmap_trace_mask)) { \
725 	        KDBG_RELEASE(__VA_ARGS__); \
726 	}
727 #else /* DEVELOPMENT || DEBUG */
728 
729 #define PMAP_TRACE(level, ...)
730 
731 #endif /* DEVELOPMENT || DEBUG */
732 
733 
734 /*
735  * Internal function prototypes (forward declarations).
736  */
737 
738 static vm_map_size_t pmap_user_va_size(pmap_t pmap);
739 
740 static void pmap_set_reference(ppnum_t pn);
741 
742 pmap_paddr_t pmap_vtophys(pmap_t pmap, addr64_t va);
743 
744 static void pmap_switch_user_ttb(pmap_t pmap, pmap_cpu_data_t *cpu_data_ptr);
745 
746 static kern_return_t pmap_expand(
747 	pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
748 
749 static int pmap_remove_range(
750 	pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
751 
752 static tt_entry_t *pmap_tt1_allocate(
753 	pmap_t, vm_size_t, unsigned int);
754 
755 #define PMAP_TT_ALLOCATE_NOWAIT         0x1
756 
757 static void pmap_tt1_deallocate(
758 	pmap_t, tt_entry_t *, vm_size_t, unsigned int);
759 
760 #define PMAP_TT_DEALLOCATE_NOBLOCK      0x1
761 
762 static kern_return_t pmap_tt_allocate(
763 	pmap_t, tt_entry_t **, unsigned int, unsigned int);
764 
765 #define PMAP_TT_ALLOCATE_NOWAIT         0x1
766 
767 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
768 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
769 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
770 
771 #define PMAP_TT_DEALLOCATE_NOBLOCK      0x1
772 
773 
774 static void pmap_unmap_sharedpage(
775 	pmap_t pmap);
776 
777 static boolean_t
778 pmap_is_64bit(pmap_t);
779 
780 
781 static void pmap_flush_tlb_for_paddr_locked_async(pmap_paddr_t);
782 
783 static void pmap_update_pp_attr_wimg_bits_locked(unsigned int, unsigned int);
784 
785 static bool pmap_update_cache_attributes_locked(
786 	ppnum_t, unsigned, bool);
787 
788 static boolean_t arm_clear_fast_fault(
789 	ppnum_t ppnum,
790 	vm_prot_t fault_type,
791 	pt_entry_t *pte_p);
792 
793 static void pmap_trim_self(pmap_t pmap);
794 static void pmap_trim_subord(pmap_t subord);
795 
796 
797 /*
798  * Temporary prototypes, while we wait for pmap_enter to move to taking an
799  * address instead of a page number.
800  */
801 static kern_return_t
802 pmap_enter_addr(
803 	pmap_t pmap,
804 	vm_map_address_t v,
805 	pmap_paddr_t pa,
806 	vm_prot_t prot,
807 	vm_prot_t fault_type,
808 	unsigned int flags,
809 	boolean_t wired);
810 
811 kern_return_t
812 pmap_enter_options_addr(
813 	pmap_t pmap,
814 	vm_map_address_t v,
815 	pmap_paddr_t pa,
816 	vm_prot_t prot,
817 	vm_prot_t fault_type,
818 	unsigned int flags,
819 	boolean_t wired,
820 	unsigned int options,
821 	__unused void   *arg);
822 
823 #ifdef CONFIG_XNUPOST
824 kern_return_t pmap_test(void);
825 #endif /* CONFIG_XNUPOST */
826 
827 PMAP_SUPPORT_PROTOTYPES(
828 	kern_return_t,
829 	arm_fast_fault, (pmap_t pmap,
830 	vm_map_address_t va,
831 	vm_prot_t fault_type,
832 	bool was_af_fault,
833 	bool from_user), ARM_FAST_FAULT_INDEX);
834 
835 PMAP_SUPPORT_PROTOTYPES(
836 	boolean_t,
837 	arm_force_fast_fault, (ppnum_t ppnum,
838 	vm_prot_t allow_mode,
839 	int options), ARM_FORCE_FAST_FAULT_INDEX);
840 
841 MARK_AS_PMAP_TEXT static boolean_t
842 arm_force_fast_fault_with_flush_range(
843 	ppnum_t ppnum,
844 	vm_prot_t allow_mode,
845 	int options,
846 	pmap_tlb_flush_range_t *flush_range);
847 
848 /**
849  * Definition of the states driving the batch cache attributes update
850  * state machine.
851  */
852 typedef struct {
853 	uint64_t page_index : 32,           /* The page index to be operated on */
854 	    state : 8,                      /* The current state of the update machine */
855 	    tlb_flush_pass_needed : 1,      /* Tracking whether the tlb flush pass is necessary */
856 	    rt_cache_flush_pass_needed : 1, /* Tracking whether the cache flush pass is necessary */
857 	:0;
858 } batch_set_cache_attr_state_t;
859 
860 /* Possible values of the "state" field. */
861 #define PMAP_BATCH_SET_CACHE_ATTRIBUTES_UPDATE_PASS             1
862 #define PMAP_BATCH_SET_CACHE_ATTRIBUTES_TLBFLUSH_PASS           2
863 #define PMAP_BATCH_SET_CACHE_ATTRIBUTES_CACHEFLUSH_PASS         3
864 #define PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE                    4
865 
866 static_assert(sizeof(batch_set_cache_attr_state_t) == sizeof(uint64_t));
867 
868 PMAP_SUPPORT_PROTOTYPES(
869 	batch_set_cache_attr_state_t,
870 	pmap_batch_set_cache_attributes, (
871 #if XNU_MONITOR
872 		volatile upl_page_info_t *user_page_list,
873 #else /* !XNU_MONITOR */
874 		upl_page_info_array_t user_page_list,
875 #endif /* XNU_MONITOR */
876 		batch_set_cache_attr_state_t state,
877 		unsigned int page_cnt,
878 		unsigned int cacheattr), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
879 
880 PMAP_SUPPORT_PROTOTYPES(
881 	void,
882 	pmap_change_wiring, (pmap_t pmap,
883 	vm_map_address_t v,
884 	boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
885 
886 PMAP_SUPPORT_PROTOTYPES(
887 	pmap_t,
888 	pmap_create_options, (ledger_t ledger,
889 	vm_map_size_t size,
890 	unsigned int flags,
891 	kern_return_t * kr), PMAP_CREATE_INDEX);
892 
893 PMAP_SUPPORT_PROTOTYPES(
894 	void,
895 	pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
896 
897 PMAP_SUPPORT_PROTOTYPES(
898 	kern_return_t,
899 	pmap_enter_options, (pmap_t pmap,
900 	vm_map_address_t v,
901 	pmap_paddr_t pa,
902 	vm_prot_t prot,
903 	vm_prot_t fault_type,
904 	unsigned int flags,
905 	boolean_t wired,
906 	unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
907 
908 PMAP_SUPPORT_PROTOTYPES(
909 	pmap_paddr_t,
910 	pmap_find_pa, (pmap_t pmap,
911 	addr64_t va), PMAP_FIND_PA_INDEX);
912 
913 PMAP_SUPPORT_PROTOTYPES(
914 	kern_return_t,
915 	pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
916 
917 
918 PMAP_SUPPORT_PROTOTYPES(
919 	boolean_t,
920 	pmap_is_empty, (pmap_t pmap,
921 	vm_map_offset_t va_start,
922 	vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
923 
924 
925 PMAP_SUPPORT_PROTOTYPES(
926 	unsigned int,
927 	pmap_map_cpu_windows_copy, (ppnum_t pn,
928 	vm_prot_t prot,
929 	unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
930 
931 PMAP_SUPPORT_PROTOTYPES(
932 	void,
933 	pmap_ro_zone_memcpy, (zone_id_t zid,
934 	vm_offset_t va,
935 	vm_offset_t offset,
936 	const vm_offset_t new_data,
937 	vm_size_t new_data_size), PMAP_RO_ZONE_MEMCPY_INDEX);
938 
939 PMAP_SUPPORT_PROTOTYPES(
940 	uint64_t,
941 	pmap_ro_zone_atomic_op, (zone_id_t zid,
942 	vm_offset_t va,
943 	vm_offset_t offset,
944 	zro_atomic_op_t op,
945 	uint64_t value), PMAP_RO_ZONE_ATOMIC_OP_INDEX);
946 
947 PMAP_SUPPORT_PROTOTYPES(
948 	void,
949 	pmap_ro_zone_bzero, (zone_id_t zid,
950 	vm_offset_t va,
951 	vm_offset_t offset,
952 	vm_size_t size), PMAP_RO_ZONE_BZERO_INDEX);
953 
954 PMAP_SUPPORT_PROTOTYPES(
955 	vm_map_offset_t,
956 	pmap_nest, (pmap_t grand,
957 	pmap_t subord,
958 	addr64_t vstart,
959 	uint64_t size,
960 	vm_map_offset_t vrestart,
961 	kern_return_t * krp), PMAP_NEST_INDEX);
962 
963 PMAP_SUPPORT_PROTOTYPES(
964 	void,
965 	pmap_page_protect_options, (ppnum_t ppnum,
966 	vm_prot_t prot,
967 	unsigned int options,
968 	void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
969 
970 PMAP_SUPPORT_PROTOTYPES(
971 	vm_map_address_t,
972 	pmap_protect_options, (pmap_t pmap,
973 	vm_map_address_t start,
974 	vm_map_address_t end,
975 	vm_prot_t prot,
976 	unsigned int options,
977 	void *args), PMAP_PROTECT_OPTIONS_INDEX);
978 
979 PMAP_SUPPORT_PROTOTYPES(
980 	kern_return_t,
981 	pmap_query_page_info, (pmap_t pmap,
982 	vm_map_offset_t va,
983 	int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
984 
985 PMAP_SUPPORT_PROTOTYPES(
986 	mach_vm_size_t,
987 	pmap_query_resident, (pmap_t pmap,
988 	vm_map_address_t start,
989 	vm_map_address_t end,
990 	mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
991 
992 PMAP_SUPPORT_PROTOTYPES(
993 	void,
994 	pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
995 
996 PMAP_SUPPORT_PROTOTYPES(
997 	vm_map_address_t,
998 	pmap_remove_options, (pmap_t pmap,
999 	vm_map_address_t start,
1000 	vm_map_address_t end,
1001 	int options), PMAP_REMOVE_OPTIONS_INDEX);
1002 
1003 
1004 PMAP_SUPPORT_PROTOTYPES(
1005 	void,
1006 	pmap_set_cache_attributes, (ppnum_t pn,
1007 	unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1008 
1009 PMAP_SUPPORT_PROTOTYPES(
1010 	void,
1011 	pmap_update_compressor_page, (ppnum_t pn,
1012 	unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1013 
1014 PMAP_SUPPORT_PROTOTYPES(
1015 	void,
1016 	pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1017 
1018 #if MACH_ASSERT || XNU_MONITOR
1019 PMAP_SUPPORT_PROTOTYPES(
1020 	void,
1021 	pmap_set_process, (pmap_t pmap,
1022 	int pid,
1023 	char *procname), PMAP_SET_PROCESS_INDEX);
1024 #endif
1025 
1026 PMAP_SUPPORT_PROTOTYPES(
1027 	void,
1028 	pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1029 
1030 PMAP_SUPPORT_PROTOTYPES(
1031 	vm_map_offset_t,
1032 	pmap_unnest_options, (pmap_t grand,
1033 	addr64_t vaddr,
1034 	uint64_t size,
1035 	vm_map_offset_t vrestart,
1036 	unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1037 
1038 PMAP_SUPPORT_PROTOTYPES(
1039 	void,
1040 	phys_attribute_set, (ppnum_t pn,
1041 	unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1042 
1043 PMAP_SUPPORT_PROTOTYPES(
1044 	void,
1045 	phys_attribute_clear, (ppnum_t pn,
1046 	unsigned int bits,
1047 	int options,
1048 	void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1049 
1050 #if __ARM_RANGE_TLBI__
1051 PMAP_SUPPORT_PROTOTYPES(
1052 	vm_map_address_t,
1053 	phys_attribute_clear_range, (pmap_t pmap,
1054 	vm_map_address_t start,
1055 	vm_map_address_t end,
1056 	unsigned int bits,
1057 	unsigned int options), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX);
1058 #endif /* __ARM_RANGE_TLBI__ */
1059 
1060 
1061 PMAP_SUPPORT_PROTOTYPES(
1062 	void,
1063 	pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1064 
1065 PMAP_SUPPORT_PROTOTYPES(
1066 	void,
1067 	pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1068 
1069 PMAP_SUPPORT_PROTOTYPES(
1070 	void,
1071 	pmap_set_vm_map_cs_enforced, (pmap_t pmap, bool new_value), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX);
1072 
1073 PMAP_SUPPORT_PROTOTYPES(
1074 	void,
1075 	pmap_set_tpro, (pmap_t pmap), PMAP_SET_TPRO_INDEX);
1076 
1077 PMAP_SUPPORT_PROTOTYPES(
1078 	void,
1079 	pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1080 
1081 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
1082 PMAP_SUPPORT_PROTOTYPES(
1083 	void,
1084 	pmap_disable_user_jop, (pmap_t pmap), PMAP_DISABLE_USER_JOP_INDEX);
1085 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
1086 
1087 /* Definition of the states used by pmap_trim(). */
1088 typedef enum {
1089 	/* Validates the inputs and computes the bounds of the pmaps. This state can also jump directly to DONE state in some cases. */
1090 	PMAP_TRIM_STATE_START = 0,
1091 
1092 	/* Trims the range from the start of the shared region to the "true" start of that of the grand pmap. */
1093 	PMAP_TRIM_STATE_GRAND_BEFORE,
1094 
1095 	/* Trims the range from the "true" end of the shared region to the end of that of the grand pmap. */
1096 	PMAP_TRIM_STATE_GRAND_AFTER,
1097 
1098 	/* Decreases the subord's "no-bound" reference by one. If that becomes zero, trims the subord. */
1099 	PMAP_TRIM_STATE_SUBORD,
1100 
1101 	/* Marks that trimming is finished. */
1102 	PMAP_TRIM_STATE_DONE,
1103 
1104 	/* Sentry enum for sanity checks. */
1105 	PMAP_TRIM_STATE_COUNT,
1106 } pmap_trim_state_t;
1107 
1108 PMAP_SUPPORT_PROTOTYPES(
1109 	pmap_trim_state_t,
1110 	pmap_trim, (pmap_t grand, pmap_t subord, addr64_t vstart, uint64_t size, pmap_trim_state_t state), PMAP_TRIM_INDEX);
1111 
1112 #if HAS_APPLE_PAC
1113 PMAP_SUPPORT_PROTOTYPES(
1114 	void *,
1115 	pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_SIGN_USER_PTR);
1116 PMAP_SUPPORT_PROTOTYPES(
1117 	void *,
1118 	pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_AUTH_USER_PTR);
1119 #endif /* HAS_APPLE_PAC */
1120 
1121 
1122 
1123 
1124 PMAP_SUPPORT_PROTOTYPES(
1125 	kern_return_t,
1126 	pmap_load_trust_cache_with_type, (TCType_t type,
1127 	const vm_address_t pmap_img4_payload,
1128 	const vm_size_t pmap_img4_payload_len,
1129 	const vm_address_t img4_manifest,
1130 	const vm_size_t img4_manifest_len,
1131 	const vm_address_t img4_aux_manifest,
1132 	const vm_size_t img4_aux_manifest_len), PMAP_LOAD_TRUST_CACHE_WITH_TYPE_INDEX);
1133 
1134 PMAP_SUPPORT_PROTOTYPES(
1135 	void,
1136 	pmap_toggle_developer_mode, (bool state), PMAP_TOGGLE_DEVELOPER_MODE_INDEX);
1137 
1138 PMAP_SUPPORT_PROTOTYPES(
1139 	kern_return_t,
1140 	pmap_query_trust_cache, (TCQueryType_t query_type,
1141 	const uint8_t cdhash[kTCEntryHashSize],
1142 	TrustCacheQueryToken_t * query_token), PMAP_QUERY_TRUST_CACHE_INDEX);
1143 
1144 #if PMAP_CS_INCLUDE_CODE_SIGNING
1145 
1146 PMAP_SUPPORT_PROTOTYPES(
1147 	kern_return_t,
1148 	pmap_register_provisioning_profile, (const vm_address_t payload_addr,
1149 	const vm_size_t payload_size), PMAP_REGISTER_PROVISIONING_PROFILE_INDEX);
1150 
1151 PMAP_SUPPORT_PROTOTYPES(
1152 	kern_return_t,
1153 	pmap_unregister_provisioning_profile, (pmap_cs_profile_t * profile_obj),
1154 	PMAP_UNREGISTER_PROVISIONING_PROFILE_INDEX);
1155 
1156 PMAP_SUPPORT_PROTOTYPES(
1157 	kern_return_t,
1158 	pmap_associate_provisioning_profile, (pmap_cs_code_directory_t * cd_entry,
1159 	pmap_cs_profile_t * profile_obj),
1160 	PMAP_ASSOCIATE_PROVISIONING_PROFILE_INDEX);
1161 
1162 PMAP_SUPPORT_PROTOTYPES(
1163 	kern_return_t,
1164 	pmap_disassociate_provisioning_profile, (pmap_cs_code_directory_t * cd_entry),
1165 	PMAP_DISASSOCIATE_PROVISIONING_PROFILE_INDEX);
1166 
1167 #endif
1168 
1169 PMAP_SUPPORT_PROTOTYPES(
1170 	uint32_t,
1171 	pmap_lookup_in_static_trust_cache, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX);
1172 
1173 PMAP_SUPPORT_PROTOTYPES(
1174 	bool,
1175 	pmap_lookup_in_loaded_trust_caches, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX);
1176 
1177 PMAP_SUPPORT_PROTOTYPES(
1178 	void,
1179 	pmap_set_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
1180 	PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX);
1181 
1182 PMAP_SUPPORT_PROTOTYPES(
1183 	bool,
1184 	pmap_match_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
1185 	PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX);
1186 
1187 PMAP_SUPPORT_PROTOTYPES(
1188 	void,
1189 	pmap_set_local_signing_public_key, (const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE]),
1190 	PMAP_SET_LOCAL_SIGNING_PUBLIC_KEY_INDEX);
1191 
1192 PMAP_SUPPORT_PROTOTYPES(
1193 	void,
1194 	pmap_unrestrict_local_signing, (const uint8_t cdhash[CS_CDHASH_LEN]),
1195 	PMAP_UNRESTRICT_LOCAL_SIGNING_INDEX);
1196 
1197 PMAP_SUPPORT_PROTOTYPES(
1198 	void,
1199 	pmap_nop, (pmap_t pmap), PMAP_NOP_INDEX);
1200 
1201 void pmap_footprint_suspend(vm_map_t    map,
1202     boolean_t   suspend);
1203 PMAP_SUPPORT_PROTOTYPES(
1204 	void,
1205 	pmap_footprint_suspend, (vm_map_t map,
1206 	boolean_t suspend),
1207 	PMAP_FOOTPRINT_SUSPEND_INDEX);
1208 
1209 
1210 
1211 
1212 
1213 #if DEVELOPMENT || DEBUG
1214 PMAP_SUPPORT_PROTOTYPES(
1215 	kern_return_t,
1216 	pmap_test_text_corruption, (pmap_paddr_t),
1217 	PMAP_TEST_TEXT_CORRUPTION_INDEX);
1218 #endif /* DEVELOPMENT || DEBUG */
1219 
1220 /*
1221  * The low global vector page is mapped at a fixed alias.
1222  * Since the page size is 16k for H8 and newer we map the globals to a 16k
1223  * aligned address. Readers of the globals (e.g. lldb, panic server) need
1224  * to check both addresses anyway for backward compatibility. So for now
1225  * we leave H6 and H7 where they were.
1226  */
1227 #if (ARM_PGSHIFT == 14)
1228 #define LOWGLOBAL_ALIAS         (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1229 #else
1230 #define LOWGLOBAL_ALIAS         (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1231 #endif
1232 
1233 
1234 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1235 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1236 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1237 
1238 #if XNU_MONITOR
1239 
1240 #if __has_feature(ptrauth_calls)
1241 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
1242 #else
1243 #define __ptrauth_ppl_handler
1244 #endif
1245 
1246 /*
1247  * Table of function pointers used for PPL dispatch.
1248  */
1249 const void * __ptrauth_ppl_handler const ppl_handler_table[PMAP_COUNT] = {
1250 	[ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
1251 	[ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
1252 	[MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
1253 	[PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
1254 	[PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
1255 	[PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
1256 	[PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
1257 	[PMAP_CREATE_INDEX] = pmap_create_options_internal,
1258 	[PMAP_DESTROY_INDEX] = pmap_destroy_internal,
1259 	[PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
1260 	[PMAP_FIND_PA_INDEX] = pmap_find_pa_internal,
1261 	[PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
1262 	[PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
1263 	[PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
1264 	[PMAP_RO_ZONE_MEMCPY_INDEX] = pmap_ro_zone_memcpy_internal,
1265 	[PMAP_RO_ZONE_ATOMIC_OP_INDEX] = pmap_ro_zone_atomic_op_internal,
1266 	[PMAP_RO_ZONE_BZERO_INDEX] = pmap_ro_zone_bzero_internal,
1267 	[PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
1268 	[PMAP_NEST_INDEX] = pmap_nest_internal,
1269 	[PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
1270 	[PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
1271 	[PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
1272 	[PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
1273 	[PMAP_REFERENCE_INDEX] = pmap_reference_internal,
1274 	[PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
1275 	[PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
1276 	[PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
1277 	[PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
1278 	[PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
1279 	[PMAP_SWITCH_INDEX] = pmap_switch_internal,
1280 	[PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
1281 	[PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
1282 	[PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
1283 	[PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
1284 	[PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
1285 	[PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
1286 	[PMAP_SET_VM_MAP_CS_ENFORCED_INDEX] = pmap_set_vm_map_cs_enforced_internal,
1287 	[PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
1288 	[PMAP_SET_TPRO_INDEX] = pmap_set_tpro_internal,
1289 	[PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX] = pmap_lookup_in_static_trust_cache_internal,
1290 	[PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX] = pmap_lookup_in_loaded_trust_caches_internal,
1291 	[PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_set_compilation_service_cdhash_internal,
1292 	[PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_match_compilation_service_cdhash_internal,
1293 	[PMAP_SET_LOCAL_SIGNING_PUBLIC_KEY_INDEX] = pmap_set_local_signing_public_key_internal,
1294 	[PMAP_UNRESTRICT_LOCAL_SIGNING_INDEX] = pmap_unrestrict_local_signing_internal,
1295 	[PMAP_LOAD_TRUST_CACHE_WITH_TYPE_INDEX] = pmap_load_trust_cache_with_type_internal,
1296 	[PMAP_QUERY_TRUST_CACHE_INDEX] = pmap_query_trust_cache_internal,
1297 	[PMAP_TOGGLE_DEVELOPER_MODE_INDEX] = pmap_toggle_developer_mode_internal,
1298 #if PMAP_CS_INCLUDE_CODE_SIGNING
1299 	[PMAP_REGISTER_PROVISIONING_PROFILE_INDEX] = pmap_register_provisioning_profile_internal,
1300 	[PMAP_UNREGISTER_PROVISIONING_PROFILE_INDEX] = pmap_unregister_provisioning_profile_internal,
1301 	[PMAP_ASSOCIATE_PROVISIONING_PROFILE_INDEX] = pmap_associate_provisioning_profile_internal,
1302 	[PMAP_DISASSOCIATE_PROVISIONING_PROFILE_INDEX] = pmap_disassociate_provisioning_profile_internal,
1303 #endif
1304 	[PMAP_TRIM_INDEX] = pmap_trim_internal,
1305 	[PMAP_LEDGER_VERIFY_SIZE_INDEX] = pmap_ledger_verify_size_internal,
1306 	[PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
1307 	[PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
1308 #if HAS_APPLE_PAC
1309 	[PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
1310 	[PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
1311 #endif /* HAS_APPLE_PAC */
1312 #if __ARM_RANGE_TLBI__
1313 	[PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
1314 #endif /* __ARM_RANGE_TLBI__ */
1315 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
1316 	[PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
1317 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
1318 	[PMAP_NOP_INDEX] = pmap_nop_internal,
1319 
1320 #if DEVELOPMENT || DEBUG
1321 	[PMAP_TEST_TEXT_CORRUPTION_INDEX] = pmap_test_text_corruption_internal,
1322 #endif /* DEVELOPMENT || DEBUG */
1323 
1324 };
1325 #endif
1326 
1327 #if XNU_MONITOR
1328 /**
1329  * A convenience function for setting protections on a single physical
1330  * aperture or static region mapping without invalidating the TLB.
1331  *
1332  * @note This function does not perform any TLB invalidations. That must be done
1333  *       separately to be able to safely use the updated mapping.
1334  *
1335  * @note This function understands the difference between the VM page size and
1336  *       the kernel page size and will update multiple PTEs if the sizes differ.
1337  *       In other words, enough PTEs will always get updated to change the
1338  *       permissions on a PAGE_SIZE amount of memory.
1339  *
1340  * @note The PVH lock for the physical page represented by this mapping must
1341  *       already be locked.
1342  *
1343  * @note This function assumes the caller has already verified that the PTE
1344  *       pointer does indeed point to a physical aperture or static region page
1345  *       table. Please validate your inputs before passing it along to this
1346  *       function.
1347  *
1348  * @param ptep Pointer to the physical aperture or static region page table to
1349  *             update with a new XPRR index.
1350  * @param expected_perm The XPRR index that is expected to already exist at the
1351  *                      current mapping. If the current index doesn't match this
1352  *                      then the system will panic.
1353  * @param new_perm The new XPRR index to update the mapping with.
1354  */
1355 MARK_AS_PMAP_TEXT static void
pmap_set_pte_xprr_perm(pt_entry_t * const ptep,unsigned int expected_perm,unsigned int new_perm)1356 pmap_set_pte_xprr_perm(
1357 	pt_entry_t * const ptep,
1358 	unsigned int expected_perm,
1359 	unsigned int new_perm)
1360 {
1361 	assert(ptep != NULL);
1362 
1363 	pt_entry_t spte = *ptep;
1364 	pvh_assert_locked(pa_index(pte_to_pa(spte)));
1365 
1366 	if (__improbable((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM))) {
1367 		panic_plain("%s: invalid XPRR index, ptep=%p, new_perm=%u, expected_perm=%u",
1368 		    __func__, ptep, new_perm, expected_perm);
1369 	}
1370 
1371 	/**
1372 	 * The PTE involved should be valid, should not have the hint bit set, and
1373 	 * should have the expected XPRR index.
1374 	 */
1375 	if (__improbable((spte & ARM_PTE_TYPE_MASK) == ARM_PTE_TYPE_FAULT)) {
1376 		panic_plain("%s: physical aperture or static region PTE is invalid, "
1377 		    "ptep=%p, spte=%#llx, new_perm=%u, expected_perm=%u",
1378 		    __func__, ptep, spte, new_perm, expected_perm);
1379 	}
1380 
1381 	if (__improbable(spte & ARM_PTE_HINT_MASK)) {
1382 		panic_plain("%s: physical aperture or static region PTE has hint bit "
1383 		    "set, ptep=%p, spte=0x%llx, new_perm=%u, expected_perm=%u",
1384 		    __func__, ptep, spte, new_perm, expected_perm);
1385 	}
1386 
1387 	if (__improbable(pte_to_xprr_perm(spte) != expected_perm)) {
1388 		panic("%s: perm=%llu does not match expected_perm, spte=0x%llx, "
1389 		    "ptep=%p, new_perm=%u, expected_perm=%u",
1390 		    __func__, pte_to_xprr_perm(spte), spte, ptep, new_perm, expected_perm);
1391 	}
1392 
1393 	pt_entry_t template = spte;
1394 	template &= ~ARM_PTE_XPRR_MASK;
1395 	template |= xprr_perm_to_pte(new_perm);
1396 
1397 	write_pte_strong(ptep, template);
1398 }
1399 
1400 /**
1401  * Update the protections on a single physical aperture mapping and invalidate
1402  * the TLB so the mapping can be used.
1403  *
1404  * @note The PVH lock for the physical page must already be locked.
1405  *
1406  * @param pai The physical address index of the page whose physical aperture
1407  *            mapping will be updated with new permissions.
1408  * @param expected_perm The XPRR index that is expected to already exist at the
1409  *                      current mapping. If the current index doesn't match this
1410  *                      then the system will panic.
1411  * @param new_perm The new XPRR index to update the mapping with.
1412  */
1413 MARK_AS_PMAP_TEXT void
pmap_set_xprr_perm(unsigned int pai,unsigned int expected_perm,unsigned int new_perm)1414 pmap_set_xprr_perm(
1415 	unsigned int pai,
1416 	unsigned int expected_perm,
1417 	unsigned int new_perm)
1418 {
1419 	pvh_assert_locked(pai);
1420 
1421 	const vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
1422 	pt_entry_t * const ptep = pmap_pte(kernel_pmap, kva);
1423 
1424 	pmap_set_pte_xprr_perm(ptep, expected_perm, new_perm);
1425 
1426 	native_pt_ops.flush_tlb_region_async(kva, PAGE_SIZE, kernel_pmap, true);
1427 	sync_tlb_flush();
1428 }
1429 
1430 /**
1431  * Update the protections on a range of physical aperture or static region
1432  * mappings and invalidate the TLB so the mappings can be used.
1433  *
1434  * @note Static region mappings can only be updated before machine_lockdown().
1435  *       Physical aperture mappings can be updated at any time.
1436  *
1437  * @param start The starting virtual address of the static region or physical
1438  *              aperture range whose permissions will be updated.
1439  * @param end The final (inclusive) virtual address of the static region or
1440  *            physical aperture range whose permissions will be updated.
1441  * @param expected_perm The XPRR index that is expected to already exist at the
1442  *                      current mappings. If the current indices don't match
1443  *                      this then the system will panic.
1444  * @param new_perm The new XPRR index to update the mappings with.
1445  */
1446 MARK_AS_PMAP_TEXT static void
pmap_set_range_xprr_perm(vm_address_t start,vm_address_t end,unsigned int expected_perm,unsigned int new_perm)1447 pmap_set_range_xprr_perm(
1448 	vm_address_t start,
1449 	vm_address_t end,
1450 	unsigned int expected_perm,
1451 	unsigned int new_perm)
1452 {
1453 	/**
1454 	 * Validate our arguments; any invalid argument will be grounds for a panic.
1455 	 */
1456 	if (__improbable((start | end) & ARM_PGMASK)) {
1457 		panic_plain("%s: start or end not page aligned, "
1458 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1459 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1460 	}
1461 
1462 	if (__improbable(start > end)) {
1463 		panic("%s: start > end, start=%p, end=%p, new_perm=%u, expected_perm=%u",
1464 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1465 	}
1466 
1467 	const bool in_physmap = (start >= physmap_base) && (end < physmap_end);
1468 	const bool in_static = (start >= gVirtBase) && (end < static_memory_end);
1469 
1470 	if (__improbable(!(in_physmap || in_static))) {
1471 		panic_plain("%s: address not in static region or physical aperture, "
1472 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1473 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1474 	}
1475 
1476 	if (__improbable((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM))) {
1477 		panic_plain("%s: invalid XPRR index, "
1478 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1479 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1480 	}
1481 
1482 	/*
1483 	 * Walk over the PTEs for the given range, and set the protections on those
1484 	 * PTEs. Each iteration of this loop will update all of the leaf PTEs within
1485 	 * one twig entry (whichever twig entry currently maps "va").
1486 	 */
1487 	vm_address_t va = start;
1488 	while (va < end) {
1489 		/**
1490 		 * Get the last VA that the twig entry for "va" maps. All of the leaf
1491 		 * PTEs from va to tte_va_end will have their permissions updated.
1492 		 */
1493 		vm_address_t tte_va_end =
1494 		    (va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr);
1495 
1496 		if (tte_va_end > end) {
1497 			tte_va_end = end;
1498 		}
1499 
1500 		tt_entry_t *ttep = pmap_tte(kernel_pmap, va);
1501 
1502 		if (ttep == NULL) {
1503 			panic_plain("%s: physical aperture or static region tte is NULL, "
1504 			    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1505 			    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1506 		}
1507 
1508 		tt_entry_t tte = *ttep;
1509 
1510 		if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1511 			panic_plain("%s: tte=0x%llx is not a table type entry, "
1512 			    "start=%p, end=%p, new_perm=%u, expected_perm=%u", __func__,
1513 			    tte, (void *)start, (void *)end, new_perm, expected_perm);
1514 		}
1515 
1516 		/* Walk over the given L3 page table page and update the PTEs. */
1517 		pt_entry_t * const ptep = (pt_entry_t *)ttetokv(tte);
1518 		pt_entry_t * const begin_ptep = &ptep[pte_index(native_pt_attr, va)];
1519 		const uint64_t num_ptes = (tte_va_end - va) >> pt_attr_leaf_shift(native_pt_attr);
1520 		pt_entry_t * const end_ptep = begin_ptep + num_ptes;
1521 
1522 		/**
1523 		 * The current PTE pointer is incremented by the page ratio (ratio of
1524 		 * VM page size to kernel hardware page size) because one call to
1525 		 * pmap_set_pte_xprr_perm() will update all PTE entries required to map
1526 		 * a PAGE_SIZE worth of hardware pages.
1527 		 */
1528 		for (pt_entry_t *cur_ptep = begin_ptep; cur_ptep < end_ptep;
1529 		    cur_ptep += PAGE_RATIO, va += PAGE_SIZE) {
1530 			unsigned int pai = pa_index(pte_to_pa(*cur_ptep));
1531 			pvh_lock(pai);
1532 			pmap_set_pte_xprr_perm(cur_ptep, expected_perm, new_perm);
1533 			pvh_unlock(pai);
1534 		}
1535 
1536 		va = tte_va_end;
1537 	}
1538 
1539 	PMAP_UPDATE_TLBS(kernel_pmap, start, end, false, true);
1540 }
1541 
1542 #endif /* XNU_MONITOR */
1543 
1544 static inline void
PMAP_ZINFO_PALLOC(pmap_t pmap,int bytes)1545 PMAP_ZINFO_PALLOC(
1546 	pmap_t pmap, int bytes)
1547 {
1548 	pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
1549 }
1550 
1551 static inline void
PMAP_ZINFO_PFREE(pmap_t pmap,int bytes)1552 PMAP_ZINFO_PFREE(
1553 	pmap_t pmap,
1554 	int bytes)
1555 {
1556 	pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1557 }
1558 
1559 void
pmap_tt_ledger_credit(pmap_t pmap,vm_size_t size)1560 pmap_tt_ledger_credit(
1561 	pmap_t          pmap,
1562 	vm_size_t       size)
1563 {
1564 	if (pmap != kernel_pmap) {
1565 		pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
1566 		pmap_ledger_credit(pmap, task_ledgers.page_table, size);
1567 	}
1568 }
1569 
1570 void
pmap_tt_ledger_debit(pmap_t pmap,vm_size_t size)1571 pmap_tt_ledger_debit(
1572 	pmap_t          pmap,
1573 	vm_size_t       size)
1574 {
1575 	if (pmap != kernel_pmap) {
1576 		pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
1577 		pmap_ledger_debit(pmap, task_ledgers.page_table, size);
1578 	}
1579 }
1580 
1581 static inline void
pmap_update_plru(uint16_t asid_index)1582 pmap_update_plru(uint16_t asid_index)
1583 {
1584 	if (__probable(pmap_asid_plru)) {
1585 		unsigned plru_index = asid_index >> 6;
1586 		if (__improbable(os_atomic_andnot(&asid_plru_bitmap[plru_index], (1ULL << (asid_index & 63)), relaxed) == 0)) {
1587 			asid_plru_generation[plru_index] = ++asid_plru_gencount;
1588 			asid_plru_bitmap[plru_index] = ((plru_index == (MAX_HW_ASIDS >> 6)) ? ~(1ULL << 63) : UINT64_MAX);
1589 		}
1590 	}
1591 }
1592 
1593 static bool
alloc_asid(pmap_t pmap)1594 alloc_asid(pmap_t pmap)
1595 {
1596 	int vasid = -1;
1597 	uint16_t hw_asid;
1598 
1599 	pmap_simple_lock(&asid_lock);
1600 
1601 	if (__probable(pmap_asid_plru)) {
1602 		unsigned plru_index = 0;
1603 		uint64_t lowest_gen = asid_plru_generation[0];
1604 		uint64_t lowest_gen_bitmap = asid_plru_bitmap[0];
1605 		for (unsigned i = 1; i < (sizeof(asid_plru_generation) / sizeof(asid_plru_generation[0])); ++i) {
1606 			if (asid_plru_generation[i] < lowest_gen) {
1607 				plru_index = i;
1608 				lowest_gen = asid_plru_generation[i];
1609 				lowest_gen_bitmap = asid_plru_bitmap[i];
1610 			}
1611 		}
1612 
1613 		for (; plru_index < BITMAP_LEN(pmap_max_asids); plru_index += ((MAX_HW_ASIDS + 1) >> 6)) {
1614 			uint64_t temp_plru = lowest_gen_bitmap & asid_bitmap[plru_index];
1615 			if (temp_plru) {
1616 				vasid = (plru_index << 6) + lsb_first(temp_plru);
1617 #if DEVELOPMENT || DEBUG
1618 				++pmap_asid_hits;
1619 #endif
1620 				break;
1621 			}
1622 		}
1623 	}
1624 	if (__improbable(vasid < 0)) {
1625 		// bitmap_first() returns highest-order bits first, but a 0-based scheme works
1626 		// slightly better with the collision detection scheme used by pmap_switch_internal().
1627 		vasid = bitmap_lsb_first(&asid_bitmap[0], pmap_max_asids);
1628 #if DEVELOPMENT || DEBUG
1629 		++pmap_asid_misses;
1630 #endif
1631 	}
1632 	if (__improbable(vasid < 0)) {
1633 		pmap_simple_unlock(&asid_lock);
1634 		return false;
1635 	}
1636 	assert((uint32_t)vasid < pmap_max_asids);
1637 	assert(bitmap_test(&asid_bitmap[0], (unsigned int)vasid));
1638 	bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
1639 	pmap_simple_unlock(&asid_lock);
1640 	hw_asid = (uint16_t)(vasid % asid_chunk_size);
1641 	pmap->sw_asid = (uint8_t)(vasid / asid_chunk_size);
1642 	if (__improbable(hw_asid == MAX_HW_ASIDS)) {
1643 		/* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
1644 		 * reassign to a reserved VASID. */
1645 		assert(pmap->sw_asid < UINT8_MAX);
1646 		pmap->sw_asid = UINT8_MAX;
1647 		/* Allocate from the high end of the hardware ASID range to reduce the likelihood of
1648 		 * aliasing with vital system processes, which are likely to have lower ASIDs. */
1649 		hw_asid = MAX_HW_ASIDS - 1 - (uint16_t)(vasid / asid_chunk_size);
1650 		assert(hw_asid < MAX_HW_ASIDS);
1651 	}
1652 	pmap_update_plru(hw_asid);
1653 	hw_asid += 1;  // Account for ASID 0, which is reserved for the kernel
1654 #if __ARM_KERNEL_PROTECT__
1655 	hw_asid <<= 1;  // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
1656 #endif
1657 	pmap->hw_asid = hw_asid;
1658 	return true;
1659 }
1660 
1661 static void
free_asid(pmap_t pmap)1662 free_asid(pmap_t pmap)
1663 {
1664 	unsigned int vasid;
1665 	uint16_t hw_asid = os_atomic_xchg(&pmap->hw_asid, 0, relaxed);
1666 	if (__improbable(hw_asid == 0)) {
1667 		return;
1668 	}
1669 
1670 #if __ARM_KERNEL_PROTECT__
1671 	hw_asid >>= 1;
1672 #endif
1673 	hw_asid -= 1;
1674 
1675 	if (__improbable(pmap->sw_asid == UINT8_MAX)) {
1676 		vasid = ((MAX_HW_ASIDS - 1 - hw_asid) * asid_chunk_size) + MAX_HW_ASIDS;
1677 	} else {
1678 		vasid = ((unsigned int)pmap->sw_asid * asid_chunk_size) + hw_asid;
1679 	}
1680 
1681 	if (__probable(pmap_asid_plru)) {
1682 		os_atomic_or(&asid_plru_bitmap[hw_asid >> 6], (1ULL << (hw_asid & 63)), relaxed);
1683 	}
1684 	pmap_simple_lock(&asid_lock);
1685 	assert(!bitmap_test(&asid_bitmap[0], vasid));
1686 	bitmap_set(&asid_bitmap[0], vasid);
1687 	pmap_simple_unlock(&asid_lock);
1688 }
1689 
1690 
1691 boolean_t
pmap_valid_address(pmap_paddr_t addr)1692 pmap_valid_address(
1693 	pmap_paddr_t addr)
1694 {
1695 	return pa_valid(addr);
1696 }
1697 
1698 
1699 
1700 
1701 
1702 
1703 /*
1704  *      Map memory at initialization.  The physical addresses being
1705  *      mapped are not managed and are never unmapped.
1706  *
1707  *      For now, VM is already on, we only need to map the
1708  *      specified memory.
1709  */
1710 vm_map_address_t
pmap_map(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot,unsigned int flags)1711 pmap_map(
1712 	vm_map_address_t virt,
1713 	vm_offset_t start,
1714 	vm_offset_t end,
1715 	vm_prot_t prot,
1716 	unsigned int flags)
1717 {
1718 	kern_return_t   kr;
1719 	vm_size_t       ps;
1720 
1721 	ps = PAGE_SIZE;
1722 	while (start < end) {
1723 		kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
1724 		    prot, VM_PROT_NONE, flags, FALSE);
1725 
1726 		if (kr != KERN_SUCCESS) {
1727 			panic("%s: failed pmap_enter, "
1728 			    "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
1729 			    __FUNCTION__,
1730 			    (void *) virt, (void *) start, (void *) end, prot, flags);
1731 		}
1732 
1733 		virt += ps;
1734 		start += ps;
1735 	}
1736 	return virt;
1737 }
1738 
1739 vm_map_address_t
pmap_map_bd_with_options(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot,int32_t options)1740 pmap_map_bd_with_options(
1741 	vm_map_address_t virt,
1742 	vm_offset_t start,
1743 	vm_offset_t end,
1744 	vm_prot_t prot,
1745 	int32_t options)
1746 {
1747 	pt_entry_t      tmplate;
1748 	pt_entry_t     *ptep;
1749 	vm_map_address_t vaddr;
1750 	vm_offset_t     paddr;
1751 	pt_entry_t      mem_attr;
1752 
1753 	switch (options & PMAP_MAP_BD_MASK) {
1754 	case PMAP_MAP_BD_WCOMB:
1755 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
1756 		mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
1757 		break;
1758 	case PMAP_MAP_BD_POSTED:
1759 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
1760 		break;
1761 	case PMAP_MAP_BD_POSTED_REORDERED:
1762 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
1763 		break;
1764 	case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
1765 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
1766 		break;
1767 	default:
1768 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
1769 		break;
1770 	}
1771 
1772 	tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
1773 	    mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
1774 #if __ARM_KERNEL_PROTECT__
1775 	tmplate |= ARM_PTE_NG;
1776 #endif /* __ARM_KERNEL_PROTECT__ */
1777 
1778 	vaddr = virt;
1779 	paddr = start;
1780 	while (paddr < end) {
1781 		ptep = pmap_pte(kernel_pmap, vaddr);
1782 		if (ptep == PT_ENTRY_NULL) {
1783 			panic("%s: no PTE for vaddr=%p, "
1784 			    "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
1785 			    __FUNCTION__, (void*)vaddr,
1786 			    (void*)virt, (void*)start, (void*)end, prot, options);
1787 		}
1788 
1789 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1790 		write_pte_strong(ptep, tmplate);
1791 
1792 		pte_increment_pa(tmplate);
1793 		vaddr += PAGE_SIZE;
1794 		paddr += PAGE_SIZE;
1795 	}
1796 
1797 	if (end >= start) {
1798 		flush_mmu_tlb_region(virt, (unsigned)(end - start));
1799 	}
1800 
1801 	return vaddr;
1802 }
1803 
1804 /*
1805  *      Back-door routine for mapping kernel VM at initialization.
1806  *      Useful for mapping memory outside the range
1807  *      [vm_first_phys, vm_last_phys] (i.e., devices).
1808  *      Otherwise like pmap_map.
1809  */
1810 vm_map_address_t
pmap_map_bd(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot)1811 pmap_map_bd(
1812 	vm_map_address_t virt,
1813 	vm_offset_t start,
1814 	vm_offset_t end,
1815 	vm_prot_t prot)
1816 {
1817 	pt_entry_t      tmplate;
1818 	pt_entry_t              *ptep;
1819 	vm_map_address_t vaddr;
1820 	vm_offset_t             paddr;
1821 
1822 	/* not cacheable and not buffered */
1823 	tmplate = pa_to_pte(start)
1824 	    | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
1825 	    | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
1826 	    | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
1827 #if __ARM_KERNEL_PROTECT__
1828 	tmplate |= ARM_PTE_NG;
1829 #endif /* __ARM_KERNEL_PROTECT__ */
1830 
1831 	vaddr = virt;
1832 	paddr = start;
1833 	while (paddr < end) {
1834 		ptep = pmap_pte(kernel_pmap, vaddr);
1835 		if (ptep == PT_ENTRY_NULL) {
1836 			panic("pmap_map_bd");
1837 		}
1838 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1839 		write_pte_strong(ptep, tmplate);
1840 
1841 		pte_increment_pa(tmplate);
1842 		vaddr += PAGE_SIZE;
1843 		paddr += PAGE_SIZE;
1844 	}
1845 
1846 	if (end >= start) {
1847 		flush_mmu_tlb_region(virt, (unsigned)(end - start));
1848 	}
1849 
1850 	return vaddr;
1851 }
1852 
1853 /*
1854  *      Back-door routine for mapping kernel VM at initialization.
1855  *      Useful for mapping memory specific physical addresses in early
1856  *      boot (i.e., before kernel_map is initialized).
1857  *
1858  *      Maps are in the VM_HIGH_KERNEL_WINDOW area.
1859  */
1860 
1861 vm_map_address_t
pmap_map_high_window_bd(vm_offset_t pa_start,vm_size_t len,vm_prot_t prot)1862 pmap_map_high_window_bd(
1863 	vm_offset_t pa_start,
1864 	vm_size_t len,
1865 	vm_prot_t prot)
1866 {
1867 	pt_entry_t              *ptep, pte;
1868 	vm_map_address_t        va_start = VREGION1_START;
1869 	vm_map_address_t        va_max = VREGION1_START + VREGION1_SIZE;
1870 	vm_map_address_t        va_end;
1871 	vm_map_address_t        va;
1872 	vm_size_t               offset;
1873 
1874 	offset = pa_start & PAGE_MASK;
1875 	pa_start -= offset;
1876 	len += offset;
1877 
1878 	if (len > (va_max - va_start)) {
1879 		panic("%s: area too large, "
1880 		    "pa_start=%p, len=%p, prot=0x%x",
1881 		    __FUNCTION__,
1882 		    (void*)pa_start, (void*)len, prot);
1883 	}
1884 
1885 scan:
1886 	for (; va_start < va_max; va_start += PAGE_SIZE) {
1887 		ptep = pmap_pte(kernel_pmap, va_start);
1888 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1889 		if (*ptep == ARM_PTE_TYPE_FAULT) {
1890 			break;
1891 		}
1892 	}
1893 	if (va_start > va_max) {
1894 		panic("%s: insufficient pages, "
1895 		    "pa_start=%p, len=%p, prot=0x%x",
1896 		    __FUNCTION__,
1897 		    (void*)pa_start, (void*)len, prot);
1898 	}
1899 
1900 	for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
1901 		ptep = pmap_pte(kernel_pmap, va_end);
1902 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1903 		if (*ptep != ARM_PTE_TYPE_FAULT) {
1904 			va_start = va_end + PAGE_SIZE;
1905 			goto scan;
1906 		}
1907 	}
1908 
1909 	for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
1910 		ptep = pmap_pte(kernel_pmap, va);
1911 		pte = pa_to_pte(pa_start)
1912 		    | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
1913 		    | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
1914 		    | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
1915 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
1916 #if __ARM_KERNEL_PROTECT__
1917 		pte |= ARM_PTE_NG;
1918 #endif /* __ARM_KERNEL_PROTECT__ */
1919 		write_pte_strong(ptep, pte);
1920 	}
1921 	PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false, true);
1922 #if KASAN
1923 	kasan_notify_address(va_start, len);
1924 #endif
1925 	return va_start;
1926 }
1927 
1928 static uint32_t
pmap_compute_max_asids(void)1929 pmap_compute_max_asids(void)
1930 {
1931 	DTEntry entry;
1932 	void const *prop = NULL;
1933 	uint32_t max_asids;
1934 	int err;
1935 	unsigned int prop_size;
1936 
1937 	err = SecureDTLookupEntry(NULL, "/defaults", &entry);
1938 	assert(err == kSuccess);
1939 
1940 	if (kSuccess != SecureDTGetProperty(entry, "pmap-max-asids", &prop, &prop_size)) {
1941 		/* TODO: consider allowing maxproc limits to be scaled earlier so that
1942 		 * we can choose a more flexible default value here. */
1943 		return MAX_ASIDS;
1944 	}
1945 
1946 	if (prop_size != sizeof(max_asids)) {
1947 		panic("pmap-max-asids property is not a 32-bit integer");
1948 	}
1949 
1950 	max_asids = *((uint32_t const *)prop);
1951 	/* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
1952 	max_asids = (max_asids + 63) & ~63UL;
1953 
1954 	if (((max_asids + MAX_HW_ASIDS) / (MAX_HW_ASIDS + 1)) > MIN(MAX_HW_ASIDS, UINT8_MAX)) {
1955 		/* currently capped by size of pmap->sw_asid */
1956 		panic("pmap-max-asids too large");
1957 	}
1958 	if (max_asids == 0) {
1959 		panic("pmap-max-asids cannot be zero");
1960 	}
1961 	return max_asids;
1962 }
1963 
1964 #if __arm64__
1965 /*
1966  * pmap_get_arm64_prot
1967  *
1968  * return effective armv8 VMSA block protections including
1969  * table AP/PXN/XN overrides of a pmap entry
1970  *
1971  */
1972 
1973 uint64_t
pmap_get_arm64_prot(pmap_t pmap,vm_offset_t addr)1974 pmap_get_arm64_prot(
1975 	pmap_t pmap,
1976 	vm_offset_t addr)
1977 {
1978 	tt_entry_t tte = 0;
1979 	unsigned int level = 0;
1980 	uint64_t tte_type = 0;
1981 	uint64_t effective_prot_bits = 0;
1982 	uint64_t aggregate_tte = 0;
1983 	uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
1984 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
1985 
1986 	for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
1987 		tte = *pmap_ttne(pmap, level, addr);
1988 
1989 		if (!(tte & ARM_TTE_VALID)) {
1990 			return 0;
1991 		}
1992 
1993 		tte_type = tte & ARM_TTE_TYPE_MASK;
1994 
1995 		if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
1996 		    (level == pt_attr->pta_max_level)) {
1997 			/* Block or page mapping; both have the same protection bit layout. */
1998 			break;
1999 		} else if (tte_type == ARM_TTE_TYPE_TABLE) {
2000 			/* All of the table bits we care about are overrides, so just OR them together. */
2001 			aggregate_tte |= tte;
2002 		}
2003 	}
2004 
2005 	table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
2006 	table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
2007 	table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
2008 
2009 	/* Start with the PTE bits. */
2010 	effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
2011 
2012 	/* Table AP bits mask out block/page AP bits */
2013 	effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
2014 
2015 	/* XN/PXN bits can be OR'd in. */
2016 	effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
2017 	effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
2018 
2019 	return effective_prot_bits;
2020 }
2021 #endif /* __arm64__ */
2022 
2023 static void
pmap_set_srd_fusing()2024 pmap_set_srd_fusing()
2025 {
2026 	DTEntry entry;
2027 	uint32_t const *prop = NULL;
2028 	int err;
2029 	unsigned int prop_size = 0;
2030 
2031 	err = SecureDTLookupEntry(NULL, "/chosen", &entry);
2032 	if (err != kSuccess) {
2033 		panic("PMAP: no chosen DT node");
2034 	}
2035 
2036 	if (kSuccess == SecureDTGetProperty(entry, "research-enabled", (const void**)&prop, &prop_size)) {
2037 		if (prop_size == sizeof(uint32_t)) {
2038 			srd_fused = *prop;
2039 		}
2040 	}
2041 
2042 #if DEVELOPMENT || DEBUG
2043 	PE_parse_boot_argn("srd_fusing", &srd_fused, sizeof(srd_fused));
2044 #endif
2045 }
2046 
2047 /*
2048  *	Bootstrap the system enough to run with virtual memory.
2049  *
2050  *	The early VM initialization code has already allocated
2051  *	the first CPU's translation table and made entries for
2052  *	all the one-to-one mappings to be found there.
2053  *
2054  *	We must set up the kernel pmap structures, the
2055  *	physical-to-virtual translation lookup tables for the
2056  *	physical memory to be managed (between avail_start and
2057  *	avail_end).
2058  *
2059  *	Map the kernel's code and data, and allocate the system page table.
2060  *	Page_size must already be set.
2061  *
2062  *	Parameters:
2063  *	first_avail	first available physical page -
2064  *			   after kernel page tables
2065  *	avail_start	PA of first managed physical page
2066  *	avail_end	PA of last managed physical page
2067  */
2068 
2069 void
pmap_bootstrap(vm_offset_t vstart)2070 pmap_bootstrap(
2071 	vm_offset_t vstart)
2072 {
2073 	vm_map_offset_t maxoffset;
2074 
2075 	lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
2076 
2077 	pmap_set_srd_fusing();
2078 
2079 #if XNU_MONITOR
2080 
2081 #if DEVELOPMENT || DEBUG
2082 	PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
2083 #endif
2084 
2085 #if CONFIG_CSR_FROM_DT
2086 	if (csr_unsafe_kernel_text) {
2087 		pmap_ppl_disable = true;
2088 	}
2089 #endif /* CONFIG_CSR_FROM_DT */
2090 
2091 #endif /* XNU_MONITOR */
2092 
2093 #if DEVELOPMENT || DEBUG
2094 	if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
2095 		kprintf("Kernel traces for pmap operations enabled\n");
2096 	}
2097 #endif
2098 
2099 	/*
2100 	 *	Initialize the kernel pmap.
2101 	 */
2102 #if ARM_PARAMETERIZED_PMAP
2103 	kernel_pmap->pmap_pt_attr = native_pt_attr;
2104 #endif /* ARM_PARAMETERIZED_PMAP */
2105 #if HAS_APPLE_PAC
2106 	kernel_pmap->disable_jop = 0;
2107 #endif /* HAS_APPLE_PAC */
2108 	kernel_pmap->tte = cpu_tte;
2109 	kernel_pmap->ttep = cpu_ttep;
2110 	kernel_pmap->min = UINT64_MAX - (1ULL << (64 - T1SZ_BOOT)) + 1;
2111 	kernel_pmap->max = UINTPTR_MAX;
2112 	os_atomic_init(&kernel_pmap->ref_count, 1);
2113 #if XNU_MONITOR
2114 	os_atomic_init(&kernel_pmap->nested_count, 0);
2115 #endif
2116 	kernel_pmap->nx_enabled = TRUE;
2117 #ifdef  __arm64__
2118 	kernel_pmap->is_64bit = TRUE;
2119 #else
2120 	kernel_pmap->is_64bit = FALSE;
2121 #endif
2122 #if CONFIG_ROSETTA
2123 	kernel_pmap->is_rosetta = FALSE;
2124 #endif
2125 
2126 #if ARM_PARAMETERIZED_PMAP
2127 	kernel_pmap->pmap_pt_attr = native_pt_attr;
2128 #endif /* ARM_PARAMETERIZED_PMAP */
2129 
2130 	kernel_pmap->nested_region_addr = 0x0ULL;
2131 	kernel_pmap->nested_region_size = 0x0ULL;
2132 	kernel_pmap->nested_region_asid_bitmap = NULL;
2133 	kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
2134 	kernel_pmap->type = PMAP_TYPE_KERNEL;
2135 
2136 	kernel_pmap->hw_asid = 0;
2137 	kernel_pmap->sw_asid = 0;
2138 
2139 	pmap_lock_init(kernel_pmap);
2140 
2141 	pmap_max_asids = pmap_compute_max_asids();
2142 	pmap_asid_plru = (pmap_max_asids > MAX_HW_ASIDS);
2143 	PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru, sizeof(pmap_asid_plru));
2144 	/* Align the range of available hardware ASIDs to a multiple of 64 to enable the
2145 	 * masking used by the PLRU scheme.  This means we must handle the case in which
2146 	 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
2147 	_Static_assert(sizeof(asid_plru_bitmap[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
2148 	_Static_assert(((MAX_HW_ASIDS + 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
2149 	asid_chunk_size = (pmap_asid_plru ? (MAX_HW_ASIDS + 1) : MAX_HW_ASIDS);
2150 
2151 	const vm_size_t asid_table_size = sizeof(*asid_bitmap) * BITMAP_LEN(pmap_max_asids);
2152 
2153 	/**
2154 	 * Bootstrap the core pmap data structures (e.g., pv_head_table,
2155 	 * pp_attr_table, etc). This function will use `avail_start` to allocate
2156 	 * space for these data structures.
2157 	 */
2158 	pmap_data_bootstrap();
2159 
2160 	/**
2161 	 * Bootstrap any necessary UAT data structures and values needed from the device tree.
2162 	 */
2163 	uat_bootstrap();
2164 
2165 
2166 	/**
2167 	 * Bootstrap any necessary SART data structures and values needed from the device tree.
2168 	 */
2169 	sart_bootstrap();
2170 
2171 	/**
2172 	 * Don't make any assumptions about the alignment of avail_start before this
2173 	 * point (i.e., pmap_data_bootstrap() performs allocations).
2174 	 */
2175 	avail_start = PMAP_ALIGN(avail_start, __alignof(bitmap_t));
2176 
2177 	const pmap_paddr_t pmap_struct_start = avail_start;
2178 
2179 	asid_bitmap = (bitmap_t*)phystokv(avail_start);
2180 	avail_start = round_page(avail_start + asid_table_size);
2181 
2182 	memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
2183 
2184 	vm_first_phys = gPhysBase;
2185 	vm_last_phys = trunc_page(avail_end);
2186 
2187 	queue_init(&map_pmap_list);
2188 	queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
2189 	free_page_size_tt_list = TT_FREE_ENTRY_NULL;
2190 	free_page_size_tt_count = 0;
2191 	free_page_size_tt_max = 0;
2192 	free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
2193 	free_two_page_size_tt_count = 0;
2194 	free_two_page_size_tt_max = 0;
2195 	free_tt_list = TT_FREE_ENTRY_NULL;
2196 	free_tt_count = 0;
2197 	free_tt_max = 0;
2198 
2199 	virtual_space_start = vstart;
2200 	virtual_space_end = VM_MAX_KERNEL_ADDRESS;
2201 
2202 	bitmap_full(&asid_bitmap[0], pmap_max_asids);
2203 	bitmap_full(&asid_plru_bitmap[0], MAX_HW_ASIDS);
2204 	// Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
2205 	asid_plru_bitmap[MAX_HW_ASIDS >> 6] = ~(1ULL << 63);
2206 
2207 
2208 
2209 	if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
2210 		maxoffset = trunc_page(maxoffset);
2211 		if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
2212 		    && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
2213 			arm_pmap_max_offset_default = maxoffset;
2214 		}
2215 	}
2216 #if defined(__arm64__)
2217 	if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
2218 		maxoffset = trunc_page(maxoffset);
2219 		if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
2220 		    && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
2221 			arm64_pmap_max_offset_default = maxoffset;
2222 		}
2223 	}
2224 #endif
2225 
2226 	PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed, sizeof(pmap_panic_dev_wimg_on_managed));
2227 
2228 
2229 #if PMAP_CS_PPL_MONITOR
2230 	/* Initialize the PPL trust cache read-write lock */
2231 	lck_rw_init(&ppl_trust_cache_rt_lock, &pmap_lck_grp, 0);
2232 	ppl_trust_cache_rt_lock.lck_rw_can_sleep = FALSE;
2233 #endif
2234 
2235 #if MACH_ASSERT
2236 	PE_parse_boot_argn("vm_footprint_suspend_allowed",
2237 	    &vm_footprint_suspend_allowed,
2238 	    sizeof(vm_footprint_suspend_allowed));
2239 #endif /* MACH_ASSERT */
2240 
2241 #if KASAN
2242 	/* Shadow the CPU copy windows, as they fall outside of the physical aperture */
2243 	kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
2244 #endif /* KASAN */
2245 
2246 	/**
2247 	 * Ensure that avail_start is always left on a page boundary. The calling
2248 	 * code might not perform any alignment before allocating page tables so
2249 	 * this is important.
2250 	 */
2251 	avail_start = round_page(avail_start);
2252 }
2253 
2254 #if XNU_MONITOR
2255 
2256 static inline void
pa_set_range_monitor(pmap_paddr_t start_pa,pmap_paddr_t end_pa)2257 pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
2258 {
2259 	pmap_paddr_t cur_pa;
2260 	for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
2261 		assert(pa_valid(cur_pa));
2262 		ppattr_pa_set_monitor(cur_pa);
2263 	}
2264 }
2265 
2266 void
pa_set_range_xprr_perm(pmap_paddr_t start_pa,pmap_paddr_t end_pa,unsigned int expected_perm,unsigned int new_perm)2267 pa_set_range_xprr_perm(pmap_paddr_t start_pa,
2268     pmap_paddr_t end_pa,
2269     unsigned int expected_perm,
2270     unsigned int new_perm)
2271 {
2272 	vm_offset_t start_va = phystokv(start_pa);
2273 	vm_offset_t end_va = start_va + (end_pa - start_pa);
2274 
2275 	pa_set_range_monitor(start_pa, end_pa);
2276 	pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
2277 }
2278 
2279 static void
pmap_lockdown_kc(void)2280 pmap_lockdown_kc(void)
2281 {
2282 	extern vm_offset_t vm_kernelcache_base;
2283 	extern vm_offset_t vm_kernelcache_top;
2284 	pmap_paddr_t start_pa = kvtophys_nofail(vm_kernelcache_base);
2285 	pmap_paddr_t end_pa = start_pa + (vm_kernelcache_top - vm_kernelcache_base);
2286 	pmap_paddr_t cur_pa = start_pa;
2287 	vm_offset_t cur_va = vm_kernelcache_base;
2288 	while (cur_pa < end_pa) {
2289 		vm_size_t range_size = end_pa - cur_pa;
2290 		vm_offset_t ptov_va = phystokv_range(cur_pa, &range_size);
2291 		if (ptov_va != cur_va) {
2292 			/*
2293 			 * If the physical address maps back to a virtual address that is non-linear
2294 			 * w.r.t. the kernelcache, that means it corresponds to memory that will be
2295 			 * reclaimed by the OS and should therefore not be locked down.
2296 			 */
2297 			cur_pa += range_size;
2298 			cur_va += range_size;
2299 			continue;
2300 		}
2301 		unsigned int pai = pa_index(cur_pa);
2302 		pv_entry_t **pv_h  = pai_to_pvh(pai);
2303 
2304 		vm_offset_t pvh_flags = pvh_get_flags(pv_h);
2305 
2306 		if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN_MASK)) {
2307 			panic("pai %d already locked down", pai);
2308 		}
2309 
2310 		pvh_set_flags(pv_h, pvh_flags | PVH_FLAG_LOCKDOWN_KC);
2311 		cur_pa += ARM_PGBYTES;
2312 		cur_va += ARM_PGBYTES;
2313 	}
2314 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2315 	extern uint64_t ctrr_ro_test;
2316 	extern uint64_t ctrr_nx_test;
2317 	pmap_paddr_t exclude_pages[] = {kvtophys_nofail((vm_offset_t)&ctrr_ro_test), kvtophys_nofail((vm_offset_t)&ctrr_nx_test)};
2318 	for (unsigned i = 0; i < (sizeof(exclude_pages) / sizeof(exclude_pages[0])); ++i) {
2319 		pv_entry_t **pv_h  = pai_to_pvh(pa_index(exclude_pages[i]));
2320 		pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_LOCKDOWN_KC);
2321 	}
2322 #endif
2323 }
2324 
2325 void
pmap_static_allocations_done(void)2326 pmap_static_allocations_done(void)
2327 {
2328 	pmap_paddr_t monitor_start_pa;
2329 	pmap_paddr_t monitor_end_pa;
2330 
2331 	/*
2332 	 * Protect the bootstrap (V=P and V->P) page tables.
2333 	 *
2334 	 * These bootstrap allocations will be used primarily for page tables.
2335 	 * If we wish to secure the page tables, we need to start by marking
2336 	 * these bootstrap allocations as pages that we want to protect.
2337 	 */
2338 	monitor_start_pa = kvtophys_nofail((vm_offset_t)&bootstrap_pagetables);
2339 	monitor_end_pa = monitor_start_pa + BOOTSTRAP_TABLE_SIZE;
2340 
2341 	/* The bootstrap page tables are mapped RW at boostrap. */
2342 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RO_PERM);
2343 
2344 	/*
2345 	 * We use avail_start as a pointer to the first address that has not
2346 	 * been reserved for bootstrap, so we know which pages to give to the
2347 	 * virtual memory layer.
2348 	 */
2349 	monitor_start_pa = BootArgs->topOfKernelData;
2350 	monitor_end_pa = avail_start;
2351 
2352 	/* The other bootstrap allocations are mapped RW at bootstrap. */
2353 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2354 
2355 	/*
2356 	 * The RO page tables are mapped RW in arm_vm_init() and later restricted
2357 	 * to RO in arm_vm_prot_finalize(), which is called after this function.
2358 	 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
2359 	 * they can't be allocated for other uses.  We don't need a special xPRR
2360 	 * protection index, as there is no PPL_RO index, and these pages are ultimately
2361 	 * protected by KTRR/CTRR.  Furthermore, use of PPL_RW for these pages would
2362 	 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
2363 	 * lookup table index to USER_XO before APRR is applied, leading the hardware
2364 	 * to believe we are dealing with an user XO page upon performing a translation.
2365 	 */
2366 	monitor_start_pa = kvtophys_nofail((vm_offset_t)&ropagetable_begin);
2367 	monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
2368 	pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
2369 
2370 	monitor_start_pa = kvtophys_nofail(segPPLDATAB);
2371 	monitor_end_pa = monitor_start_pa + segSizePPLDATA;
2372 
2373 	/* PPL data is RW for the PPL, RO for the kernel. */
2374 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2375 
2376 	monitor_start_pa = kvtophys_nofail(segPPLTEXTB);
2377 	monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
2378 
2379 	/* PPL text is RX for the PPL, RO for the kernel. */
2380 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
2381 
2382 
2383 	/*
2384 	 * In order to support DTrace, the save areas for the PPL must be
2385 	 * writable.  This is due to the fact that DTrace will try to update
2386 	 * register state.
2387 	 */
2388 	if (pmap_ppl_disable) {
2389 		vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
2390 		vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
2391 
2392 		pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
2393 	}
2394 
2395 
2396 	if (segSizePPLDATACONST > 0) {
2397 		monitor_start_pa = kvtophys_nofail(segPPLDATACONSTB);
2398 		monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
2399 
2400 		pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
2401 	}
2402 
2403 	/*
2404 	 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
2405 	 * precaution.  The real RW mappings are at a different location with guard pages.
2406 	 */
2407 	pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
2408 
2409 	/* Prevent remapping of the kernelcache */
2410 	pmap_lockdown_kc();
2411 }
2412 
2413 void
pmap_lockdown_ppl(void)2414 pmap_lockdown_ppl(void)
2415 {
2416 	/* Mark the PPL as being locked down. */
2417 
2418 	mp_disable_preemption(); // for _nopreempt locking operations
2419 	pmap_ppl_lockdown_page(sharedpage_ro_data_kva, PVH_FLAG_LOCKDOWN_KC, false);
2420 	if (sharedpage_text_kva != 0) {
2421 		pmap_ppl_lockdown_page_with_prot(sharedpage_text_kva, PVH_FLAG_LOCKDOWN_KC,
2422 		    false, VM_PROT_READ | VM_PROT_EXECUTE);
2423 	}
2424 	mp_enable_preemption();
2425 
2426 	/* Write-protect the kernel RO commpage. */
2427 #error "XPRR configuration error"
2428 }
2429 #endif /* XNU_MONITOR */
2430 
2431 void
pmap_virtual_space(vm_offset_t * startp,vm_offset_t * endp)2432 pmap_virtual_space(
2433 	vm_offset_t *startp,
2434 	vm_offset_t *endp
2435 	)
2436 {
2437 	*startp = virtual_space_start;
2438 	*endp = virtual_space_end;
2439 }
2440 
2441 
2442 boolean_t
pmap_virtual_region(unsigned int region_select,vm_map_offset_t * startp,vm_map_size_t * size)2443 pmap_virtual_region(
2444 	unsigned int region_select,
2445 	vm_map_offset_t *startp,
2446 	vm_map_size_t *size
2447 	)
2448 {
2449 	boolean_t       ret = FALSE;
2450 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2451 	if (region_select == 0) {
2452 		/*
2453 		 * In this config, the bootstrap mappings should occupy their own L2
2454 		 * TTs, as they should be immutable after boot.  Having the associated
2455 		 * TTEs and PTEs in their own pages allows us to lock down those pages,
2456 		 * while allowing the rest of the kernel address range to be remapped.
2457 		 */
2458 		*startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
2459 #if defined(ARM_LARGE_MEMORY)
2460 		*size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
2461 #else
2462 		*size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
2463 #endif
2464 		ret = TRUE;
2465 	}
2466 
2467 #if defined(ARM_LARGE_MEMORY)
2468 	if (region_select == 1) {
2469 		*startp = VREGION1_START;
2470 		*size = VREGION1_SIZE;
2471 		ret = TRUE;
2472 	}
2473 #endif
2474 #else /* !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)) */
2475 #if defined(ARM_LARGE_MEMORY)
2476 	/* For large memory systems with no KTRR/CTRR such as virtual machines */
2477 	*startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
2478 	if (region_select == 0) {
2479 		*size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
2480 		ret = TRUE;
2481 	}
2482 #else /* !defined(ARM_LARGE_MEMORY) */
2483 	unsigned long low_global_vr_mask = 0;
2484 	vm_map_size_t low_global_vr_size = 0;
2485 
2486 	if (region_select == 0) {
2487 		/* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
2488 		if (!TEST_PAGE_SIZE_4K) {
2489 			*startp = gVirtBase & 0xFFFFFFFFFE000000;
2490 			*size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
2491 		} else {
2492 			*startp = gVirtBase & 0xFFFFFFFFFF800000;
2493 			*size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
2494 		}
2495 		ret = TRUE;
2496 	}
2497 	if (region_select == 1) {
2498 		*startp = VREGION1_START;
2499 		*size = VREGION1_SIZE;
2500 		ret = TRUE;
2501 	}
2502 	/* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
2503 	if (!TEST_PAGE_SIZE_4K) {
2504 		low_global_vr_mask = 0xFFFFFFFFFE000000;
2505 		low_global_vr_size = 0x2000000;
2506 	} else {
2507 		low_global_vr_mask = 0xFFFFFFFFFF800000;
2508 		low_global_vr_size = 0x800000;
2509 	}
2510 
2511 	if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
2512 		*startp = LOW_GLOBAL_BASE_ADDRESS;
2513 		*size = low_global_vr_size;
2514 		ret = TRUE;
2515 	}
2516 
2517 	if (region_select == 3) {
2518 		/* In this config, we allow the bootstrap mappings to occupy the same
2519 		 * page table pages as the heap.
2520 		 */
2521 		*startp = VM_MIN_KERNEL_ADDRESS;
2522 		*size = LOW_GLOBAL_BASE_ADDRESS - *startp;
2523 		ret = TRUE;
2524 	}
2525 #endif /* defined(ARM_LARGE_MEMORY) */
2526 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
2527 	return ret;
2528 }
2529 
2530 /*
2531  * Routines to track and allocate physical pages during early boot.
2532  * On most systems that memory runs from first_avail through to avail_end
2533  * with no gaps.
2534  *
2535  * If the system supports ECC and ecc_bad_pages_count > 0, we
2536  * need to skip those pages.
2537  */
2538 
2539 static unsigned int avail_page_count = 0;
2540 static bool need_ram_ranges_init = true;
2541 
2542 
2543 /**
2544  * Checks to see if a given page is in
2545  * the array of known bad pages
2546  *
2547  * @param ppn page number to check
2548  */
2549 bool
pmap_is_bad_ram(__unused ppnum_t ppn)2550 pmap_is_bad_ram(__unused ppnum_t ppn)
2551 {
2552 	return false;
2553 }
2554 
2555 /**
2556  * Prepare bad ram pages to be skipped.
2557  */
2558 
2559 /*
2560  * Initialize the count of available pages. No lock needed here,
2561  * as this code is called while kernel boot up is single threaded.
2562  */
2563 static void
initialize_ram_ranges(void)2564 initialize_ram_ranges(void)
2565 {
2566 	pmap_paddr_t first = first_avail;
2567 	pmap_paddr_t end = avail_end;
2568 
2569 	assert(first <= end);
2570 	assert(first == (first & ~PAGE_MASK));
2571 	assert(end == (end & ~PAGE_MASK));
2572 	avail_page_count = atop(end - first);
2573 
2574 	need_ram_ranges_init = false;
2575 }
2576 
2577 unsigned int
pmap_free_pages(void)2578 pmap_free_pages(
2579 	void)
2580 {
2581 	if (need_ram_ranges_init) {
2582 		initialize_ram_ranges();
2583 	}
2584 	return avail_page_count;
2585 }
2586 
2587 unsigned int
pmap_free_pages_span(void)2588 pmap_free_pages_span(
2589 	void)
2590 {
2591 	if (need_ram_ranges_init) {
2592 		initialize_ram_ranges();
2593 	}
2594 	return (unsigned int)atop(avail_end - first_avail);
2595 }
2596 
2597 
2598 boolean_t
pmap_next_page_hi(ppnum_t * pnum,__unused boolean_t might_free)2599 pmap_next_page_hi(
2600 	ppnum_t            * pnum,
2601 	__unused boolean_t might_free)
2602 {
2603 	return pmap_next_page(pnum);
2604 }
2605 
2606 
2607 boolean_t
pmap_next_page(ppnum_t * pnum)2608 pmap_next_page(
2609 	ppnum_t *pnum)
2610 {
2611 	if (need_ram_ranges_init) {
2612 		initialize_ram_ranges();
2613 	}
2614 
2615 
2616 	if (first_avail != avail_end) {
2617 		*pnum = (ppnum_t)atop(first_avail);
2618 		first_avail += PAGE_SIZE;
2619 		assert(avail_page_count > 0);
2620 		--avail_page_count;
2621 		return TRUE;
2622 	}
2623 	assert(avail_page_count == 0);
2624 	return FALSE;
2625 }
2626 
2627 
2628 /*
2629  *	Initialize the pmap module.
2630  *	Called by vm_init, to initialize any structures that the pmap
2631  *	system needs to map virtual memory.
2632  */
2633 void
pmap_init(void)2634 pmap_init(
2635 	void)
2636 {
2637 	/*
2638 	 *	Protect page zero in the kernel map.
2639 	 *	(can be overruled by permanent transltion
2640 	 *	table entries at page zero - see arm_vm_init).
2641 	 */
2642 	vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
2643 
2644 	pmap_initialized = TRUE;
2645 
2646 	/*
2647 	 *	Create the zone of physical maps
2648 	 *	and the physical-to-virtual entries.
2649 	 */
2650 	pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
2651 	    ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
2652 
2653 
2654 	/*
2655 	 *	Initialize the pmap object (for tracking the vm_page_t
2656 	 *	structures for pages we allocate to be page tables in
2657 	 *	pmap_expand().
2658 	 */
2659 	_vm_object_allocate(mem_size, pmap_object);
2660 	pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2661 
2662 	/*
2663 	 * The values of [hard_]maxproc may have been scaled, make sure
2664 	 * they are still less than the value of pmap_max_asids.
2665 	 */
2666 	if ((uint32_t)maxproc > pmap_max_asids) {
2667 		maxproc = pmap_max_asids;
2668 	}
2669 	if ((uint32_t)hard_maxproc > pmap_max_asids) {
2670 		hard_maxproc = pmap_max_asids;
2671 	}
2672 }
2673 
2674 /**
2675  * Verify that a given physical page contains no mappings (outside of the
2676  * default physical aperture mapping).
2677  *
2678  * @param ppnum Physical page number to check there are no mappings to.
2679  *
2680  * @return True if there are no mappings, false otherwise or if the page is not
2681  *         kernel-managed.
2682  */
2683 bool
pmap_verify_free(ppnum_t ppnum)2684 pmap_verify_free(ppnum_t ppnum)
2685 {
2686 	const pmap_paddr_t pa = ptoa(ppnum);
2687 
2688 	assert(pa != vm_page_fictitious_addr);
2689 
2690 	/* Only mappings to kernel-managed physical memory are tracked. */
2691 	if (!pa_valid(pa)) {
2692 		return false;
2693 	}
2694 
2695 	const unsigned int pai = pa_index(pa);
2696 	pv_entry_t **pvh = pai_to_pvh(pai);
2697 
2698 	return pvh_test_type(pvh, PVH_TYPE_NULL);
2699 }
2700 
2701 #if MACH_ASSERT
2702 /**
2703  * Verify that a given physical page contains no mappings (outside of the
2704  * default physical aperture mapping) and if it does, then panic.
2705  *
2706  * @note It's recommended to use pmap_verify_free() directly when operating in
2707  *       the PPL since the PVH lock isn't getting grabbed here (due to this code
2708  *       normally being called from outside of the PPL, and the pv_head_table
2709  *       can't be modified outside of the PPL).
2710  *
2711  * @param ppnum Physical page number to check there are no mappings to.
2712  */
2713 void
pmap_assert_free(ppnum_t ppnum)2714 pmap_assert_free(ppnum_t ppnum)
2715 {
2716 	const pmap_paddr_t pa = ptoa(ppnum);
2717 
2718 	/* Only mappings to kernel-managed physical memory are tracked. */
2719 	if (__probable(!pa_valid(pa) || pmap_verify_free(ppnum))) {
2720 		return;
2721 	}
2722 
2723 	const unsigned int pai = pa_index(pa);
2724 	pv_entry_t **pvh = pai_to_pvh(pai);
2725 
2726 	/**
2727 	 * This function is always called from outside of the PPL. Because of this,
2728 	 * the PVH entry can't be locked. This function is generally only called
2729 	 * before the VM reclaims a physical page and shouldn't be creating new
2730 	 * mappings. Even if a new mapping is created while parsing the hierarchy,
2731 	 * the worst case is that the system will panic in another way, and we were
2732 	 * already about to panic anyway.
2733 	 */
2734 
2735 	/**
2736 	 * Since pmap_verify_free() returned false, that means there is at least one
2737 	 * mapping left. Let's get some extra info on the first mapping we find to
2738 	 * dump in the panic string (the common case is that there is one spare
2739 	 * mapping that was never unmapped).
2740 	 */
2741 	pt_entry_t *first_ptep = PT_ENTRY_NULL;
2742 
2743 	if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
2744 		first_ptep = pvh_ptep(pvh);
2745 	} else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
2746 		pv_entry_t *pvep = pvh_pve_list(pvh);
2747 
2748 		/* Each PVE can contain multiple PTEs. Let's find the first one. */
2749 		for (int pve_ptep_idx = 0; pve_ptep_idx < PTE_PER_PVE; pve_ptep_idx++) {
2750 			first_ptep = pve_get_ptep(pvep, pve_ptep_idx);
2751 			if (first_ptep != PT_ENTRY_NULL) {
2752 				break;
2753 			}
2754 		}
2755 
2756 		/* The PVE should have at least one valid PTE. */
2757 		assert(first_ptep != PT_ENTRY_NULL);
2758 	} else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
2759 		panic("%s: Physical page is being used as a page table at PVH %p (pai: %d)",
2760 		    __func__, pvh, pai);
2761 	} else {
2762 		/**
2763 		 * The mapping disappeared between here and the pmap_verify_free() call.
2764 		 * The only way that can happen is if the VM was racing this call with
2765 		 * a call that unmaps PTEs. Operations on this page should not be
2766 		 * occurring at the same time as this check, and unfortunately we can't
2767 		 * lock the PVH entry to prevent it, so just panic instead.
2768 		 */
2769 		panic("%s: Mapping was detected but is now gone. Is the VM racing this "
2770 		    "call with an operation that unmaps PTEs? PVH %p (pai: %d)",
2771 		    __func__, pvh, pai);
2772 	}
2773 
2774 	/* Panic with a unique string identifying the first bad mapping and owner. */
2775 	{
2776 		/* First PTE is mapped by the main CPUs. */
2777 		pmap_t pmap = ptep_get_pmap(first_ptep);
2778 		const char *type = (pmap == kernel_pmap) ? "Kernel" : "User";
2779 
2780 		panic("%s: Found at least one mapping to %#llx. First PTEP (%p) is a "
2781 		    "%s CPU mapping (pmap: %p)",
2782 		    __func__, (uint64_t)pa, first_ptep, type, pmap);
2783 	}
2784 }
2785 #endif
2786 
2787 
2788 static vm_size_t
pmap_root_alloc_size(pmap_t pmap)2789 pmap_root_alloc_size(pmap_t pmap)
2790 {
2791 #pragma unused(pmap)
2792 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2793 	unsigned int root_level = pt_attr_root_level(pt_attr);
2794 	return ((pt_attr_ln_index_mask(pt_attr, root_level) >> pt_attr_ln_shift(pt_attr, root_level)) + 1) * sizeof(tt_entry_t);
2795 }
2796 
2797 
2798 /*
2799  *	Create and return a physical map.
2800  *
2801  *	If the size specified for the map
2802  *	is zero, the map is an actual physical
2803  *	map, and may be referenced by the
2804  *	hardware.
2805  *
2806  *	If the size specified is non-zero,
2807  *	the map will be used in software only, and
2808  *	is bounded by that size.
2809  */
2810 MARK_AS_PMAP_TEXT pmap_t
pmap_create_options_internal(ledger_t ledger,vm_map_size_t size,unsigned int flags,kern_return_t * kr)2811 pmap_create_options_internal(
2812 	ledger_t ledger,
2813 	vm_map_size_t size,
2814 	unsigned int flags,
2815 	kern_return_t *kr)
2816 {
2817 	unsigned        i;
2818 	unsigned        tte_index_max;
2819 	pmap_t          p;
2820 	bool is_64bit = flags & PMAP_CREATE_64BIT;
2821 #if defined(HAS_APPLE_PAC)
2822 	bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
2823 #endif /* defined(HAS_APPLE_PAC) */
2824 	kern_return_t   local_kr = KERN_SUCCESS;
2825 
2826 	if (size != 0) {
2827 		{
2828 			// Size parameter should only be set for stage 2.
2829 			return PMAP_NULL;
2830 		}
2831 	}
2832 
2833 	if (0 != (flags & ~PMAP_CREATE_KNOWN_FLAGS)) {
2834 		return PMAP_NULL;
2835 	}
2836 
2837 #if XNU_MONITOR
2838 	if ((local_kr = pmap_alloc_pmap(&p)) != KERN_SUCCESS) {
2839 		goto pmap_create_fail;
2840 	}
2841 
2842 	assert(p != PMAP_NULL);
2843 
2844 	if (ledger) {
2845 		pmap_ledger_validate(ledger);
2846 		pmap_ledger_retain(ledger);
2847 	}
2848 #else
2849 	/*
2850 	 *	Allocate a pmap struct from the pmap_zone.  Then allocate
2851 	 *	the translation table of the right size for the pmap.
2852 	 */
2853 	if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
2854 		local_kr = KERN_RESOURCE_SHORTAGE;
2855 		goto pmap_create_fail;
2856 	}
2857 #endif
2858 
2859 	p->ledger = ledger;
2860 
2861 
2862 	p->pmap_vm_map_cs_enforced = false;
2863 	p->min = 0;
2864 
2865 
2866 #if CONFIG_ROSETTA
2867 	if (flags & PMAP_CREATE_ROSETTA) {
2868 		p->is_rosetta = TRUE;
2869 	} else {
2870 		p->is_rosetta = FALSE;
2871 	}
2872 #endif /* CONFIG_ROSETTA */
2873 
2874 #if defined(HAS_APPLE_PAC)
2875 	p->disable_jop = disable_jop;
2876 #endif /* defined(HAS_APPLE_PAC) */
2877 
2878 	p->nested_region_true_start = 0;
2879 	p->nested_region_true_end = ~0;
2880 
2881 	p->nx_enabled = true;
2882 	p->is_64bit = is_64bit;
2883 	p->nested_pmap = PMAP_NULL;
2884 	p->type = PMAP_TYPE_USER;
2885 
2886 #if ARM_PARAMETERIZED_PMAP
2887 	/* Default to the native pt_attr */
2888 	p->pmap_pt_attr = native_pt_attr;
2889 #endif /* ARM_PARAMETERIZED_PMAP */
2890 #if __ARM_MIXED_PAGE_SIZE__
2891 	if (flags & PMAP_CREATE_FORCE_4K_PAGES) {
2892 		p->pmap_pt_attr = &pmap_pt_attr_4k;
2893 	}
2894 #endif /* __ARM_MIXED_PAGE_SIZE__ */
2895 	p->max = pmap_user_va_size(p);
2896 
2897 	if (!pmap_get_pt_ops(p)->alloc_id(p)) {
2898 		local_kr = KERN_NO_SPACE;
2899 		goto id_alloc_fail;
2900 	}
2901 
2902 	pmap_lock_init(p);
2903 
2904 	p->tt_entry_free = (tt_entry_t *)0;
2905 	tte_index_max = ((unsigned)pmap_root_alloc_size(p) / sizeof(tt_entry_t));
2906 
2907 
2908 #if XNU_MONITOR
2909 	p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), PMAP_TT_ALLOCATE_NOWAIT);
2910 #else
2911 	p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), 0);
2912 #endif
2913 	if (!(p->tte)) {
2914 		local_kr = KERN_RESOURCE_SHORTAGE;
2915 		goto tt1_alloc_fail;
2916 	}
2917 
2918 	p->ttep = ml_static_vtop((vm_offset_t)p->tte);
2919 	PMAP_TRACE(4, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
2920 
2921 	/* nullify the translation table */
2922 	for (i = 0; i < tte_index_max; i++) {
2923 		p->tte[i] = ARM_TTE_TYPE_FAULT;
2924 	}
2925 
2926 	FLUSH_PTE();
2927 
2928 	/*
2929 	 *  initialize the rest of the structure
2930 	 */
2931 	p->nested_region_addr = 0x0ULL;
2932 	p->nested_region_size = 0x0ULL;
2933 	p->nested_region_asid_bitmap = NULL;
2934 	p->nested_region_asid_bitmap_size = 0x0UL;
2935 
2936 	p->nested_has_no_bounds_ref = false;
2937 	p->nested_no_bounds_refcnt = 0;
2938 	p->nested_bounds_set = false;
2939 
2940 
2941 #if MACH_ASSERT
2942 	p->pmap_stats_assert = TRUE;
2943 	p->pmap_pid = 0;
2944 	strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
2945 #endif /* MACH_ASSERT */
2946 #if DEVELOPMENT || DEBUG
2947 	p->footprint_was_suspended = FALSE;
2948 #endif /* DEVELOPMENT || DEBUG */
2949 
2950 #if XNU_MONITOR
2951 	os_atomic_init(&p->nested_count, 0);
2952 	assert(os_atomic_load(&p->ref_count, relaxed) == 0);
2953 	/* Ensure prior updates to the new pmap are visible before the non-zero ref_count is visible */
2954 	os_atomic_thread_fence(release);
2955 #endif
2956 	os_atomic_init(&p->ref_count, 1);
2957 	pmap_simple_lock(&pmaps_lock);
2958 	queue_enter(&map_pmap_list, p, pmap_t, pmaps);
2959 	pmap_simple_unlock(&pmaps_lock);
2960 
2961 	return p;
2962 
2963 tt1_alloc_fail:
2964 	pmap_get_pt_ops(p)->free_id(p);
2965 id_alloc_fail:
2966 #if XNU_MONITOR
2967 	pmap_free_pmap(p);
2968 
2969 	if (ledger) {
2970 		pmap_ledger_release(ledger);
2971 	}
2972 #else
2973 	zfree(pmap_zone, p);
2974 #endif
2975 pmap_create_fail:
2976 #if XNU_MONITOR
2977 	pmap_pin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
2978 #endif
2979 	*kr = local_kr;
2980 #if XNU_MONITOR
2981 	pmap_unpin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
2982 #endif
2983 	return PMAP_NULL;
2984 }
2985 
2986 pmap_t
pmap_create_options(ledger_t ledger,vm_map_size_t size,unsigned int flags)2987 pmap_create_options(
2988 	ledger_t ledger,
2989 	vm_map_size_t size,
2990 	unsigned int flags)
2991 {
2992 	pmap_t pmap;
2993 	kern_return_t kr = KERN_SUCCESS;
2994 
2995 	PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
2996 
2997 	ledger_reference(ledger);
2998 
2999 #if XNU_MONITOR
3000 	for (;;) {
3001 		pmap = pmap_create_options_ppl(ledger, size, flags, &kr);
3002 		if (kr != KERN_RESOURCE_SHORTAGE) {
3003 			break;
3004 		}
3005 		assert(pmap == PMAP_NULL);
3006 		pmap_alloc_page_for_ppl(0);
3007 		kr = KERN_SUCCESS;
3008 	}
3009 #else
3010 	pmap = pmap_create_options_internal(ledger, size, flags, &kr);
3011 #endif
3012 
3013 	if (pmap == PMAP_NULL) {
3014 		ledger_dereference(ledger);
3015 	}
3016 
3017 	PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3018 
3019 	return pmap;
3020 }
3021 
3022 #if XNU_MONITOR
3023 /*
3024  * This symbol remains in place when the PPL is enabled so that the dispatch
3025  * table does not change from development to release configurations.
3026  */
3027 #endif
3028 #if MACH_ASSERT || XNU_MONITOR
3029 MARK_AS_PMAP_TEXT void
pmap_set_process_internal(__unused pmap_t pmap,__unused int pid,__unused char * procname)3030 pmap_set_process_internal(
3031 	__unused pmap_t pmap,
3032 	__unused int pid,
3033 	__unused char *procname)
3034 {
3035 #if MACH_ASSERT
3036 	if (pmap == NULL || pmap->pmap_pid == -1) {
3037 		return;
3038 	}
3039 
3040 	validate_pmap_mutable(pmap);
3041 
3042 	pmap->pmap_pid = pid;
3043 	strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3044 	if (pmap_ledgers_panic_leeway) {
3045 		/*
3046 		 * XXX FBDP
3047 		 * Some processes somehow trigger some issues that make
3048 		 * the pmap stats and ledgers go off track, causing
3049 		 * some assertion failures and ledger panics.
3050 		 * Turn off the sanity checks if we allow some ledger leeway
3051 		 * because of that.  We'll still do a final check in
3052 		 * pmap_check_ledgers() for discrepancies larger than the
3053 		 * allowed leeway after the address space has been fully
3054 		 * cleaned up.
3055 		 */
3056 		pmap->pmap_stats_assert = FALSE;
3057 		ledger_disable_panic_on_negative(pmap->ledger,
3058 		    task_ledgers.phys_footprint);
3059 		ledger_disable_panic_on_negative(pmap->ledger,
3060 		    task_ledgers.internal);
3061 		ledger_disable_panic_on_negative(pmap->ledger,
3062 		    task_ledgers.internal_compressed);
3063 		ledger_disable_panic_on_negative(pmap->ledger,
3064 		    task_ledgers.iokit_mapped);
3065 		ledger_disable_panic_on_negative(pmap->ledger,
3066 		    task_ledgers.alternate_accounting);
3067 		ledger_disable_panic_on_negative(pmap->ledger,
3068 		    task_ledgers.alternate_accounting_compressed);
3069 	}
3070 #endif /* MACH_ASSERT */
3071 }
3072 #endif /* MACH_ASSERT || XNU_MONITOR */
3073 
3074 #if MACH_ASSERT
3075 void
pmap_set_process(pmap_t pmap,int pid,char * procname)3076 pmap_set_process(
3077 	pmap_t pmap,
3078 	int pid,
3079 	char *procname)
3080 {
3081 #if XNU_MONITOR
3082 	pmap_set_process_ppl(pmap, pid, procname);
3083 #else
3084 	pmap_set_process_internal(pmap, pid, procname);
3085 #endif
3086 }
3087 #endif /* MACH_ASSERT */
3088 
3089 /*
3090  * pmap_deallocate_all_leaf_tts:
3091  *
3092  * Recursive function for deallocating all leaf TTEs.  Walks the given TT,
3093  * removing and deallocating all TTEs.
3094  */
3095 MARK_AS_PMAP_TEXT static void
pmap_deallocate_all_leaf_tts(pmap_t pmap,tt_entry_t * first_ttep,unsigned level)3096 pmap_deallocate_all_leaf_tts(pmap_t pmap, tt_entry_t * first_ttep, unsigned level)
3097 {
3098 	tt_entry_t tte = ARM_TTE_EMPTY;
3099 	tt_entry_t * ttep = NULL;
3100 	tt_entry_t * last_ttep = NULL;
3101 
3102 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3103 
3104 	assert(level < pt_attr_leaf_level(pt_attr));
3105 
3106 	last_ttep = &first_ttep[ttn_index(pt_attr, ~0, level)];
3107 
3108 	for (ttep = first_ttep; ttep <= last_ttep; ttep++) {
3109 		tte = *ttep;
3110 
3111 		if (!(tte & ARM_TTE_VALID)) {
3112 			continue;
3113 		}
3114 
3115 		if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
3116 			panic("%s: found block mapping, ttep=%p, tte=%p, "
3117 			    "pmap=%p, first_ttep=%p, level=%u",
3118 			    __FUNCTION__, ttep, (void *)tte,
3119 			    pmap, first_ttep, level);
3120 		}
3121 
3122 		/* Must be valid, type table */
3123 		if (level < pt_attr_twig_level(pt_attr)) {
3124 			/* If we haven't reached the twig level, recurse to the next level. */
3125 			pmap_deallocate_all_leaf_tts(pmap, (tt_entry_t *)phystokv((tte) & ARM_TTE_TABLE_MASK), level + 1);
3126 		}
3127 
3128 		/* Remove the TTE. */
3129 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3130 		pmap_tte_deallocate(pmap, 0, 0, false, ttep, level);
3131 	}
3132 }
3133 
3134 /*
3135  * We maintain stats and ledgers so that a task's physical footprint is:
3136  * phys_footprint = ((internal - alternate_accounting)
3137  *                   + (internal_compressed - alternate_accounting_compressed)
3138  *                   + iokit_mapped
3139  *                   + purgeable_nonvolatile
3140  *                   + purgeable_nonvolatile_compressed
3141  *                   + page_table)
3142  * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3143  */
3144 
3145 /*
3146  *	Retire the given physical map from service.
3147  *	Should only be called if the map contains
3148  *	no valid mappings.
3149  */
3150 MARK_AS_PMAP_TEXT void
pmap_destroy_internal(pmap_t pmap)3151 pmap_destroy_internal(
3152 	pmap_t pmap)
3153 {
3154 	if (pmap == PMAP_NULL) {
3155 		return;
3156 	}
3157 
3158 	validate_pmap(pmap);
3159 
3160 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3161 
3162 	int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
3163 	if (ref_count > 0) {
3164 		return;
3165 	} else if (__improbable(ref_count < 0)) {
3166 		panic("pmap %p: refcount underflow", pmap);
3167 	} else if (__improbable(pmap == kernel_pmap)) {
3168 		panic("pmap %p: attempt to destroy kernel pmap", pmap);
3169 	} else if (__improbable(pmap->type == PMAP_TYPE_COMMPAGE)) {
3170 		panic("pmap %p: attempt to destroy commpage pmap", pmap);
3171 	}
3172 
3173 #if XNU_MONITOR
3174 	/*
3175 	 * Issue a store-load barrier to ensure the checks of nested_count and the per-CPU
3176 	 * pmaps below will not be speculated ahead of the decrement of ref_count above.
3177 	 * That ensures that if the pmap is currently in use elsewhere, this path will
3178 	 * either observe it in use and panic, or PMAP_VALIDATE_MUTABLE will observe a
3179 	 * ref_count of 0 and panic.
3180 	 */
3181 	os_atomic_thread_fence(seq_cst);
3182 	if (__improbable(os_atomic_load(&pmap->nested_count, relaxed) != 0)) {
3183 		panic("pmap %p: attempt to destroy while nested", pmap);
3184 	}
3185 	const int max_cpu = ml_get_max_cpu_number();
3186 	for (unsigned int i = 0; i <= max_cpu; ++i) {
3187 		const pmap_cpu_data_t *cpu_data = pmap_get_remote_cpu_data(i);
3188 		if (cpu_data == NULL) {
3189 			continue;
3190 		}
3191 		if (__improbable(os_atomic_load(&cpu_data->inflight_pmap, relaxed) == pmap)) {
3192 			panic("pmap %p: attempting to destroy while in-flight on cpu %llu", pmap, (uint64_t)i);
3193 		} else if (__improbable(os_atomic_load(&cpu_data->active_pmap, relaxed) == pmap)) {
3194 			panic("pmap %p: attempting to destroy while active on cpu %llu", pmap, (uint64_t)i);
3195 		}
3196 	}
3197 #endif
3198 	pmap_unmap_sharedpage(pmap);
3199 
3200 	pmap_simple_lock(&pmaps_lock);
3201 	queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3202 	pmap_simple_unlock(&pmaps_lock);
3203 
3204 	pmap_trim_self(pmap);
3205 
3206 	/*
3207 	 *	Free the memory maps, then the
3208 	 *	pmap structure.
3209 	 */
3210 	pmap_deallocate_all_leaf_tts(pmap, pmap->tte, pt_attr_root_level(pt_attr));
3211 
3212 
3213 
3214 	if (pmap->tte) {
3215 		pmap_tt1_deallocate(pmap, pmap->tte, pmap_root_alloc_size(pmap), 0);
3216 		pmap->tte = (tt_entry_t *) NULL;
3217 		pmap->ttep = 0;
3218 	}
3219 
3220 	assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3221 
3222 	if (__improbable(pmap->type == PMAP_TYPE_NESTED)) {
3223 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(pmap->nested_region_addr, pmap->nested_region_size, pmap, false);
3224 		sync_tlb_flush();
3225 	} else {
3226 		pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
3227 		sync_tlb_flush();
3228 		/* return its asid to the pool */
3229 		pmap_get_pt_ops(pmap)->free_id(pmap);
3230 		if (pmap->nested_pmap != NULL) {
3231 #if XNU_MONITOR
3232 			os_atomic_dec(&pmap->nested_pmap->nested_count, relaxed);
3233 #endif
3234 			/* release the reference we hold on the nested pmap */
3235 			pmap_destroy_internal(pmap->nested_pmap);
3236 		}
3237 	}
3238 
3239 	pmap_check_ledgers(pmap);
3240 
3241 	if (pmap->nested_region_asid_bitmap) {
3242 #if XNU_MONITOR
3243 		pmap_pages_free(kvtophys_nofail((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
3244 #else
3245 		kfree_data(pmap->nested_region_asid_bitmap,
3246 		    pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
3247 #endif
3248 	}
3249 
3250 #if XNU_MONITOR
3251 	if (pmap->ledger) {
3252 		pmap_ledger_release(pmap->ledger);
3253 	}
3254 
3255 	pmap_lock_destroy(pmap);
3256 	pmap_free_pmap(pmap);
3257 #else
3258 	pmap_lock_destroy(pmap);
3259 	zfree(pmap_zone, pmap);
3260 #endif
3261 }
3262 
3263 void
pmap_destroy(pmap_t pmap)3264 pmap_destroy(
3265 	pmap_t pmap)
3266 {
3267 	PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3268 
3269 	ledger_t ledger = pmap->ledger;
3270 
3271 #if XNU_MONITOR
3272 	pmap_destroy_ppl(pmap);
3273 
3274 	pmap_ledger_check_balance(pmap);
3275 #else
3276 	pmap_destroy_internal(pmap);
3277 #endif
3278 
3279 	ledger_dereference(ledger);
3280 
3281 	PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
3282 }
3283 
3284 
3285 /*
3286  *	Add a reference to the specified pmap.
3287  */
3288 MARK_AS_PMAP_TEXT void
pmap_reference_internal(pmap_t pmap)3289 pmap_reference_internal(
3290 	pmap_t pmap)
3291 {
3292 	if (pmap != PMAP_NULL) {
3293 		validate_pmap_mutable(pmap);
3294 		os_atomic_inc(&pmap->ref_count, relaxed);
3295 	}
3296 }
3297 
3298 void
pmap_reference(pmap_t pmap)3299 pmap_reference(
3300 	pmap_t pmap)
3301 {
3302 #if XNU_MONITOR
3303 	pmap_reference_ppl(pmap);
3304 #else
3305 	pmap_reference_internal(pmap);
3306 #endif
3307 }
3308 
3309 static tt_entry_t *
pmap_tt1_allocate(pmap_t pmap,vm_size_t size,unsigned option)3310 pmap_tt1_allocate(
3311 	pmap_t          pmap,
3312 	vm_size_t       size,
3313 	unsigned        option)
3314 {
3315 	tt_entry_t      *tt1 = NULL;
3316 	tt_free_entry_t *tt1_free;
3317 	pmap_paddr_t    pa;
3318 	vm_address_t    va;
3319 	vm_address_t    va_end;
3320 	kern_return_t   ret;
3321 
3322 	if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
3323 		size = PAGE_SIZE;
3324 	}
3325 
3326 	pmap_simple_lock(&tt1_lock);
3327 	if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
3328 		free_page_size_tt_count--;
3329 		tt1 = (tt_entry_t *)free_page_size_tt_list;
3330 		free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3331 	} else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
3332 		free_two_page_size_tt_count--;
3333 		tt1 = (tt_entry_t *)free_two_page_size_tt_list;
3334 		free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3335 	} else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
3336 		free_tt_count--;
3337 		tt1 = (tt_entry_t *)free_tt_list;
3338 		free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
3339 	}
3340 
3341 	pmap_simple_unlock(&tt1_lock);
3342 
3343 	if (tt1 != NULL) {
3344 		pmap_tt_ledger_credit(pmap, size);
3345 		return (tt_entry_t *)tt1;
3346 	}
3347 
3348 	ret = pmap_pages_alloc_zeroed(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
3349 
3350 	if (ret == KERN_RESOURCE_SHORTAGE) {
3351 		return (tt_entry_t *)0;
3352 	}
3353 
3354 #if XNU_MONITOR
3355 	assert(pa);
3356 #endif
3357 
3358 	if (size < PAGE_SIZE) {
3359 		va = phystokv(pa) + size;
3360 		tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
3361 		tt_free_entry_t *next_free = NULL;
3362 		for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
3363 			tt1_free = (tt_free_entry_t *)va;
3364 			tt1_free->next = next_free;
3365 			next_free = tt1_free;
3366 		}
3367 		pmap_simple_lock(&tt1_lock);
3368 		local_free_list->next = free_tt_list;
3369 		free_tt_list = next_free;
3370 		free_tt_count += ((PAGE_SIZE / size) - 1);
3371 		if (free_tt_count > free_tt_max) {
3372 			free_tt_max = free_tt_count;
3373 		}
3374 		pmap_simple_unlock(&tt1_lock);
3375 	}
3376 
3377 	/* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3378 	 * Depending on the device, this can vary between 512b and 16K. */
3379 	OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3380 	OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
3381 	pmap_tt_ledger_credit(pmap, size);
3382 
3383 	return (tt_entry_t *) phystokv(pa);
3384 }
3385 
3386 static void
pmap_tt1_deallocate(pmap_t pmap,tt_entry_t * tt,vm_size_t size,unsigned option)3387 pmap_tt1_deallocate(
3388 	pmap_t pmap,
3389 	tt_entry_t *tt,
3390 	vm_size_t size,
3391 	unsigned option)
3392 {
3393 	tt_free_entry_t *tt_entry;
3394 
3395 	if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
3396 		size = PAGE_SIZE;
3397 	}
3398 
3399 	tt_entry = (tt_free_entry_t *)tt;
3400 	assert(not_in_kdp);
3401 	pmap_simple_lock(&tt1_lock);
3402 
3403 	if (size < PAGE_SIZE) {
3404 		free_tt_count++;
3405 		if (free_tt_count > free_tt_max) {
3406 			free_tt_max = free_tt_count;
3407 		}
3408 		tt_entry->next = free_tt_list;
3409 		free_tt_list = tt_entry;
3410 	}
3411 
3412 	if (size == PAGE_SIZE) {
3413 		free_page_size_tt_count++;
3414 		if (free_page_size_tt_count > free_page_size_tt_max) {
3415 			free_page_size_tt_max = free_page_size_tt_count;
3416 		}
3417 		tt_entry->next = free_page_size_tt_list;
3418 		free_page_size_tt_list = tt_entry;
3419 	}
3420 
3421 	if (size == 2 * PAGE_SIZE) {
3422 		free_two_page_size_tt_count++;
3423 		if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
3424 			free_two_page_size_tt_max = free_two_page_size_tt_count;
3425 		}
3426 		tt_entry->next = free_two_page_size_tt_list;
3427 		free_two_page_size_tt_list = tt_entry;
3428 	}
3429 
3430 	if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
3431 		pmap_simple_unlock(&tt1_lock);
3432 		pmap_tt_ledger_debit(pmap, size);
3433 		return;
3434 	}
3435 
3436 	while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
3437 		free_page_size_tt_count--;
3438 		tt = (tt_entry_t *)free_page_size_tt_list;
3439 		free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3440 
3441 		pmap_simple_unlock(&tt1_lock);
3442 
3443 		pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
3444 
3445 		OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3446 
3447 		pmap_simple_lock(&tt1_lock);
3448 	}
3449 
3450 	while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
3451 		free_two_page_size_tt_count--;
3452 		tt = (tt_entry_t *)free_two_page_size_tt_list;
3453 		free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3454 
3455 		pmap_simple_unlock(&tt1_lock);
3456 
3457 		pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
3458 
3459 		OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3460 
3461 		pmap_simple_lock(&tt1_lock);
3462 	}
3463 	pmap_simple_unlock(&tt1_lock);
3464 	pmap_tt_ledger_debit(pmap, size);
3465 }
3466 
3467 MARK_AS_PMAP_TEXT static kern_return_t
pmap_tt_allocate(pmap_t pmap,tt_entry_t ** ttp,unsigned int level,unsigned int options)3468 pmap_tt_allocate(
3469 	pmap_t pmap,
3470 	tt_entry_t **ttp,
3471 	unsigned int level,
3472 	unsigned int options)
3473 {
3474 	pmap_paddr_t pa;
3475 	*ttp = NULL;
3476 
3477 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3478 	if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
3479 		tt_free_entry_t *tt_free_cur, *tt_free_next;
3480 
3481 		tt_free_cur = ((tt_free_entry_t *)pmap->tt_entry_free);
3482 		tt_free_next = tt_free_cur->next;
3483 		tt_free_cur->next = NULL;
3484 		*ttp = (tt_entry_t *)tt_free_cur;
3485 		pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
3486 	}
3487 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3488 
3489 	if (*ttp == NULL) {
3490 		pt_desc_t       *ptdp;
3491 
3492 		/*
3493 		 *  Allocate a VM page for the level x page table entries.
3494 		 */
3495 		while (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
3496 			if (options & PMAP_OPTIONS_NOWAIT) {
3497 				return KERN_RESOURCE_SHORTAGE;
3498 			}
3499 			VM_PAGE_WAIT();
3500 		}
3501 
3502 		while ((ptdp = ptd_alloc(pmap)) == NULL) {
3503 			if (options & PMAP_OPTIONS_NOWAIT) {
3504 				pmap_pages_free(pa, PAGE_SIZE);
3505 				return KERN_RESOURCE_SHORTAGE;
3506 			}
3507 			VM_PAGE_WAIT();
3508 		}
3509 
3510 		if (level < pt_attr_leaf_level(pmap_get_pt_attr(pmap))) {
3511 			OSAddAtomic64(1, &alloc_ttepages_count);
3512 			OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3513 		} else {
3514 			OSAddAtomic64(1, &alloc_ptepages_count);
3515 			OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3516 		}
3517 
3518 		pmap_tt_ledger_credit(pmap, PAGE_SIZE);
3519 
3520 		PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
3521 
3522 		pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
3523 		/* Clear all PVH flags when using a page for a PTD to avoid tripping unexpected page flag usage checks. */
3524 		pvh_set_flags(pai_to_pvh(pa_index(pa)), 0);
3525 
3526 		uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
3527 		if (PAGE_SIZE > pmap_page_size) {
3528 			vm_address_t    va;
3529 			vm_address_t    va_end;
3530 
3531 			pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3532 
3533 			for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + pmap_page_size; va < va_end; va = va + pmap_page_size) {
3534 				((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3535 				pmap->tt_entry_free = (tt_entry_t *)va;
3536 			}
3537 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3538 		}
3539 
3540 		*ttp = (tt_entry_t *)phystokv(pa);
3541 	}
3542 
3543 #if XNU_MONITOR
3544 	assert(*ttp);
3545 #endif
3546 
3547 	return KERN_SUCCESS;
3548 }
3549 
3550 
3551 static void
pmap_tt_deallocate(pmap_t pmap,tt_entry_t * ttp,unsigned int level)3552 pmap_tt_deallocate(
3553 	pmap_t pmap,
3554 	tt_entry_t *ttp,
3555 	unsigned int level)
3556 {
3557 	pt_desc_t *ptdp;
3558 	ptd_info_t *ptd_info;
3559 	unsigned pt_acc_cnt;
3560 	unsigned i;
3561 	vm_offset_t     free_page = 0;
3562 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3563 	unsigned max_pt_index = PAGE_SIZE / pt_attr_page_size(pt_attr);
3564 
3565 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3566 
3567 	ptdp = ptep_get_ptd(ttp);
3568 	ptd_info = ptd_get_info(ptdp, ttp);
3569 
3570 	ptdp->va[ptd_get_index(ptdp, ttp)] = (vm_offset_t)-1;
3571 
3572 	if ((level < pt_attr_leaf_level(pt_attr)) && (ptd_info->refcnt == PT_DESC_REFCOUNT)) {
3573 		ptd_info->refcnt = 0;
3574 	}
3575 
3576 	if (__improbable(ptd_info->refcnt != 0)) {
3577 		panic("pmap_tt_deallocate(): ptdp %p, count %d", ptdp, ptd_info->refcnt);
3578 	}
3579 
3580 	for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
3581 		pt_acc_cnt += ptdp->ptd_info[i].refcnt;
3582 	}
3583 
3584 	if (pt_acc_cnt == 0) {
3585 		tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3586 		unsigned pt_free_entry_cnt = 1;
3587 
3588 		while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
3589 			tt_free_entry_t *tt_free_list_next;
3590 
3591 			tt_free_list_next = tt_free_list->next;
3592 			if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
3593 				pt_free_entry_cnt++;
3594 			}
3595 			tt_free_list = tt_free_list_next;
3596 		}
3597 		if (pt_free_entry_cnt == max_pt_index) {
3598 			tt_free_entry_t *tt_free_list_cur;
3599 
3600 			free_page = (vm_offset_t)ttp & ~PAGE_MASK;
3601 			tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3602 			tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
3603 
3604 			while (tt_free_list_cur) {
3605 				tt_free_entry_t *tt_free_list_next;
3606 
3607 				tt_free_list_next = tt_free_list_cur->next;
3608 				if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
3609 					tt_free_list->next = tt_free_list_next->next;
3610 				} else {
3611 					tt_free_list = tt_free_list_next;
3612 				}
3613 				tt_free_list_cur = tt_free_list_next;
3614 			}
3615 		} else {
3616 			((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3617 			pmap->tt_entry_free = ttp;
3618 		}
3619 	} else {
3620 		((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3621 		pmap->tt_entry_free = ttp;
3622 	}
3623 
3624 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3625 
3626 	if (free_page != 0) {
3627 		ptd_deallocate(ptep_get_ptd((pt_entry_t*)free_page));
3628 		*(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
3629 		pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
3630 		if (level < pt_attr_leaf_level(pt_attr)) {
3631 			OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3632 		} else {
3633 			OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3634 		}
3635 		PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
3636 		pmap_tt_ledger_debit(pmap, PAGE_SIZE);
3637 	}
3638 }
3639 
3640 /**
3641  * Safely clear out a translation table entry.
3642  *
3643  * @note If the TTE to clear out points to a leaf table, then that leaf table
3644  *       must have a refcnt of zero before the TTE can be removed.
3645  * @note This function expects to be called with pmap locked exclusive, and will
3646  *       return with pmap unlocked.
3647  *
3648  * @param pmap The pmap containing the page table whose TTE is being removed.
3649  * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
3650  * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
3651  * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
3652  * @param ttep Pointer to the TTE that should be cleared out.
3653  * @param level The level of the page table that contains the TTE to be removed.
3654  */
3655 static void
pmap_tte_remove(pmap_t pmap,vm_offset_t va_start,vm_offset_t va_end,bool need_strong_sync,tt_entry_t * ttep,unsigned int level)3656 pmap_tte_remove(
3657 	pmap_t pmap,
3658 	vm_offset_t va_start,
3659 	vm_offset_t va_end,
3660 	bool need_strong_sync,
3661 	tt_entry_t *ttep,
3662 	unsigned int level)
3663 {
3664 	pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
3665 
3666 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3667 	const tt_entry_t tte = *ttep;
3668 
3669 	if (__improbable(tte == ARM_TTE_EMPTY)) {
3670 		panic("%s: L%d TTE is already empty. Potential double unmap or memory "
3671 		    "stomper? pmap=%p ttep=%p", __func__, level, pmap, ttep);
3672 	}
3673 
3674 	*ttep = (tt_entry_t) 0;
3675 	FLUSH_PTE_STRONG();
3676 	// If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
3677 	if (va_end > va_start) {
3678 		PMAP_UPDATE_TLBS(pmap, va_start, va_end, need_strong_sync, false);
3679 	}
3680 
3681 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3682 
3683 	/**
3684 	 * Remember, the passed in "level" parameter refers to the level above the
3685 	 * table that's getting removed (e.g., removing an L2 TTE will unmap an L3
3686 	 * page table).
3687 	 */
3688 	const bool remove_leaf_table = (level == pt_attr_twig_level(pt_attr));
3689 
3690 	/**
3691 	 * Non-leaf pagetables don't track active references in the PTD and instead
3692 	 * use a sentinel refcount.  If we're removing a leaf pagetable, we'll load
3693 	 * the real refcount below.
3694 	 */
3695 	unsigned short refcnt = PT_DESC_REFCOUNT;
3696 
3697 	/*
3698 	 * It's possible that a concurrent pmap_disconnect() operation may need to reference
3699 	 * a PTE on the pagetable page to be removed.  A full disconnect() may have cleared
3700 	 * one or more PTEs on this page but not yet dropped the refcount, which would cause
3701 	 * us to panic in this function on a non-zero refcount.  Moreover, it's possible for
3702 	 * a disconnect-to-compress operation to set the compressed marker on a PTE, and
3703 	 * for pmap_remove_range_options() to concurrently observe that marker, clear it, and
3704 	 * drop the pagetable refcount accordingly, without taking any PVH locks that could
3705 	 * synchronize it against the disconnect operation.  If that removal caused the
3706 	 * refcount to reach zero, the pagetable page could be freed before the disconnect
3707 	 * operation is finished using the relevant pagetable descriptor.
3708 	 * Address these cases by waiting until all CPUs have been observed to not be
3709 	 * executing pmap_disconnect().
3710 	 */
3711 	if (remove_leaf_table) {
3712 		bitmap_t active_disconnects[BITMAP_LEN(MAX_CPUS)];
3713 		const int max_cpu = ml_get_max_cpu_number();
3714 		bitmap_full(&active_disconnects[0], max_cpu + 1);
3715 		bool inflight_disconnect;
3716 
3717 		/*
3718 		 * Ensure the ensuing load of per-CPU inflight_disconnect is not speculated
3719 		 * ahead of any prior PTE load which may have observed the effect of a
3720 		 * concurrent disconnect operation.  An acquire fence is required for this;
3721 		 * a load-acquire operation is insufficient.
3722 		 */
3723 		os_atomic_thread_fence(acquire);
3724 		do {
3725 			inflight_disconnect = false;
3726 			for (int i = bitmap_first(&active_disconnects[0], max_cpu + 1);
3727 			    i >= 0;
3728 			    i = bitmap_next(&active_disconnects[0], i)) {
3729 				const pmap_cpu_data_t *cpu_data = pmap_get_remote_cpu_data(i);
3730 				if (cpu_data == NULL) {
3731 					continue;
3732 				}
3733 				if (os_atomic_load_exclusive(&cpu_data->inflight_disconnect, relaxed)) {
3734 					__builtin_arm_wfe();
3735 					inflight_disconnect = true;
3736 					continue;
3737 				}
3738 				os_atomic_clear_exclusive();
3739 				bitmap_clear(&active_disconnects[0], (unsigned int)i);
3740 			}
3741 		} while (inflight_disconnect);
3742 		/* Ensure the refcount is observed after any observation of inflight_disconnect */
3743 		os_atomic_thread_fence(acquire);
3744 		refcnt = os_atomic_load(&(ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt), relaxed);
3745 	}
3746 
3747 #if MACH_ASSERT
3748 	/**
3749 	 * On internal devices, always do the page table consistency check
3750 	 * regardless of page table level or the actual refcnt value.
3751 	 */
3752 	{
3753 #else /* MACH_ASSERT */
3754 	/**
3755 	 * Only perform the page table consistency check when deleting leaf page
3756 	 * tables and it seems like there might be valid/compressed mappings
3757 	 * leftover.
3758 	 */
3759 	if (__improbable(remove_leaf_table && refcnt != 0)) {
3760 #endif /* MACH_ASSERT */
3761 
3762 		/**
3763 		 * There are multiple problems that can arise as a non-zero refcnt:
3764 		 * 1. A bug in the refcnt management logic.
3765 		 * 2. A memory stomper or hardware failure.
3766 		 * 3. The VM forgetting to unmap all of the valid mappings in an address
3767 		 *    space before destroying a pmap.
3768 		 *
3769 		 * By looping over the page table and determining how many valid or
3770 		 * compressed entries there actually are, we can narrow down which of
3771 		 * these three cases is causing this panic. If the expected refcnt
3772 		 * (valid + compressed) and the actual refcnt don't match then the
3773 		 * problem is probably either a memory corruption issue (if the
3774 		 * non-empty entries don't match valid+compressed, that could also be a
3775 		 * sign of corruption) or refcnt management bug. Otherwise, there
3776 		 * actually are leftover mappings and the higher layers of xnu are
3777 		 * probably at fault.
3778 		 */
3779 		const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
3780 		pt_entry_t *bpte = ((pt_entry_t *) (ttetokv(tte) & ~(pmap_page_size - 1)));
3781 
3782 		pt_entry_t *ptep = bpte;
3783 		unsigned short non_empty = 0, valid = 0, comp = 0;
3784 		for (unsigned int i = 0; i < (pmap_page_size / sizeof(*ptep)); i++, ptep++) {
3785 			/* Keep track of all non-empty entries to detect memory corruption. */
3786 			if (__improbable(*ptep != ARM_PTE_EMPTY)) {
3787 				non_empty++;
3788 			}
3789 
3790 			if (__improbable(ARM_PTE_IS_COMPRESSED(*ptep, ptep))) {
3791 				comp++;
3792 			} else if (__improbable((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE)) {
3793 				valid++;
3794 			}
3795 		}
3796 
3797 #if MACH_ASSERT
3798 		/**
3799 		 * On internal machines, panic whenever a page table getting deleted has
3800 		 * leftover mappings (valid or otherwise) or a leaf page table has a
3801 		 * non-zero refcnt.
3802 		 */
3803 		if (__improbable((non_empty != 0) || (remove_leaf_table && refcnt != 0))) {
3804 #else /* MACH_ASSERT */
3805 		/* We already know the leaf page-table has a non-zero refcnt, so panic. */
3806 		{
3807 #endif /* MACH_ASSERT */
3808 			panic("%s: Found inconsistent state in soon to be deleted L%d table: %d valid, "
3809 			    "%d compressed, %d non-empty, refcnt=%d, L%d tte=%#llx, pmap=%p, bpte=%p", __func__,
3810 			    level + 1, valid, comp, non_empty, refcnt, level, (uint64_t)tte, pmap, bpte);
3811 		}
3812 	}
3813 }
3814 
3815 /**
3816  * Given a pointer to an entry within a `level` page table, delete the
3817  * page table at `level` + 1 that is represented by that entry. For instance,
3818  * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
3819  * contains the PA of the L3 table, and `level` would be "2".
3820  *
3821  * @note If the table getting deallocated is a leaf table, then that leaf table
3822  *       must have a refcnt of zero before getting deallocated. All other levels
3823  *       must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
3824  * @note This function expects to be called with pmap locked exclusive and will
3825  *       return with pmap unlocked.
3826  *
3827  * @param pmap The pmap that owns the page table to be deallocated.
3828  * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
3829  * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
3830  * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
3831  * @param ttep Pointer to the `level` TTE to remove.
3832  * @param level The level of the table that contains an entry pointing to the
3833  *              table to be removed. The deallocated page table will be a
3834  *              `level` + 1 table (so if `level` is 2, then an L3 table will be
3835  *              deleted).
3836  */
3837 void
3838 pmap_tte_deallocate(
3839 	pmap_t pmap,
3840 	vm_offset_t va_start,
3841 	vm_offset_t va_end,
3842 	bool need_strong_sync,
3843 	tt_entry_t *ttep,
3844 	unsigned int level)
3845 {
3846 	tt_entry_t tte;
3847 
3848 	pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
3849 
3850 	tte = *ttep;
3851 
3852 	if (tte_get_ptd(tte)->pmap != pmap) {
3853 		panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
3854 		    __func__, tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
3855 	}
3856 
3857 	assertf((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE, "%s: invalid TTE %p (0x%llx)",
3858 	    __func__, ttep, (unsigned long long)tte);
3859 
3860 	/* pmap_tte_remove() will drop the pmap lock */
3861 	pmap_tte_remove(pmap, va_start, va_end, need_strong_sync, ttep, level);
3862 
3863 	pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(tte_to_pa(tte)), level + 1);
3864 }
3865 
3866 /*
3867  *	Remove a range of hardware page-table entries.
3868  *	The entries given are the first (inclusive)
3869  *	and last (exclusive) entries for the VM pages.
3870  *	The virtual address is the va for the first pte.
3871  *
3872  *	The pmap must be locked.
3873  *	If the pmap is not the kernel pmap, the range must lie
3874  *	entirely within one pte-page.  This is NOT checked.
3875  *	Assumes that the pte-page exists.
3876  *
3877  *	Returns the number of PTE changed
3878  */
3879 MARK_AS_PMAP_TEXT static int
3880 pmap_remove_range(
3881 	pmap_t pmap,
3882 	vm_map_address_t va,
3883 	pt_entry_t *bpte,
3884 	pt_entry_t *epte)
3885 {
3886 	bool need_strong_sync = false;
3887 	int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, NULL,
3888 	    &need_strong_sync, PMAP_OPTIONS_REMOVE);
3889 	if (num_changed > 0) {
3890 		PMAP_UPDATE_TLBS(pmap, va,
3891 		    va + (pt_attr_page_size(pmap_get_pt_attr(pmap)) * (epte - bpte)), need_strong_sync, true);
3892 	}
3893 	return num_changed;
3894 }
3895 
3896 
3897 #ifdef PVH_FLAG_EXEC
3898 
3899 /*
3900  *	Update the access protection bits of the physical aperture mapping for a page.
3901  *	This is useful, for example, in guranteeing that a verified executable page
3902  *	has no writable mappings anywhere in the system, including the physical
3903  *	aperture.  flush_tlb_async can be set to true to avoid unnecessary TLB
3904  *	synchronization overhead in cases where the call to this function is
3905  *	guaranteed to be followed by other TLB operations.
3906  */
3907 void
3908 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
3909 {
3910 #if __ARM_PTE_PHYSMAP__
3911 	pvh_assert_locked(pai);
3912 	vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
3913 	pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
3914 
3915 	pt_entry_t tmplate = *pte_p;
3916 	if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
3917 		return;
3918 	}
3919 	tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
3920 	if (tmplate & ARM_PTE_HINT_MASK) {
3921 		panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
3922 		    __func__, pte_p, (void *)kva, tmplate);
3923 	}
3924 	write_pte_strong(pte_p, tmplate);
3925 	flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap, true);
3926 	if (!flush_tlb_async) {
3927 		sync_tlb_flush();
3928 	}
3929 #endif
3930 }
3931 #endif /* defined(PVH_FLAG_EXEC) */
3932 
3933 
3934 
3935 MARK_AS_PMAP_TEXT int
3936 pmap_remove_range_options(
3937 	pmap_t pmap,
3938 	vm_map_address_t va,
3939 	pt_entry_t *bpte,
3940 	pt_entry_t *epte,
3941 	vm_map_address_t *eva,
3942 	bool *need_strong_sync __unused,
3943 	int options)
3944 {
3945 	pt_entry_t     *cpte;
3946 	size_t          npages = 0;
3947 	int             num_removed, num_unwired;
3948 	int             num_pte_changed;
3949 	unsigned int    pai = 0;
3950 	pmap_paddr_t    pa;
3951 	int             num_external, num_internal, num_reusable;
3952 	int             num_alt_internal;
3953 	uint64_t        num_compressed, num_alt_compressed;
3954 	int16_t         refcnt = 0;
3955 
3956 	pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
3957 
3958 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3959 	uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
3960 
3961 	if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
3962 		panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
3963 	}
3964 
3965 	if (__improbable(pmap->type == PMAP_TYPE_COMMPAGE)) {
3966 		panic("%s: attempt to remove mappings from commpage pmap %p", __func__, pmap);
3967 	}
3968 
3969 	num_removed = 0;
3970 	num_unwired = 0;
3971 	num_pte_changed = 0;
3972 	num_external = 0;
3973 	num_internal = 0;
3974 	num_reusable = 0;
3975 	num_compressed = 0;
3976 	num_alt_internal = 0;
3977 	num_alt_compressed = 0;
3978 
3979 #if XNU_MONITOR
3980 	bool ro_va = false;
3981 	if (__improbable((pmap == kernel_pmap) && (eva != NULL) && zone_spans_ro_va(va, *eva))) {
3982 		ro_va = true;
3983 	}
3984 #endif
3985 	for (cpte = bpte; cpte < epte;
3986 	    cpte += PAGE_RATIO, va += pmap_page_size) {
3987 		pt_entry_t      spte;
3988 		boolean_t       managed = FALSE;
3989 
3990 		/*
3991 		 * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
3992 		 * so we need to be as aggressive as possible in checking for preemption when we can.
3993 		 */
3994 		if (__improbable((eva != NULL) && npages++ && pmap_pending_preemption())) {
3995 			*eva = va;
3996 			break;
3997 		}
3998 
3999 		spte = *((volatile pt_entry_t*)cpte);
4000 
4001 		while (!managed) {
4002 			if (pmap != kernel_pmap &&
4003 			    (options & PMAP_OPTIONS_REMOVE) &&
4004 			    (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
4005 				/*
4006 				 * "pmap" must be locked at this point,
4007 				 * so this should not race with another
4008 				 * pmap_remove_range() or pmap_enter().
4009 				 */
4010 
4011 				/* one less "compressed"... */
4012 				num_compressed++;
4013 				if (spte & ARM_PTE_COMPRESSED_ALT) {
4014 					/* ... but it used to be "ALTACCT" */
4015 					num_alt_compressed++;
4016 				}
4017 
4018 				/* clear marker */
4019 				write_pte_fast(cpte, ARM_PTE_TYPE_FAULT);
4020 				/*
4021 				 * "refcnt" also accounts for
4022 				 * our "compressed" markers,
4023 				 * so let's update it here.
4024 				 */
4025 				--refcnt;
4026 				spte = *((volatile pt_entry_t*)cpte);
4027 			}
4028 			/*
4029 			 * It may be possible for the pte to transition from managed
4030 			 * to unmanaged in this timeframe; for now, elide the assert.
4031 			 * We should break out as a consequence of checking pa_valid.
4032 			 */
4033 			//assert(!ARM_PTE_IS_COMPRESSED(spte));
4034 			pa = pte_to_pa(spte);
4035 			if (!pa_valid(pa)) {
4036 #if XNU_MONITOR
4037 				unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
4038 #endif
4039 #if XNU_MONITOR
4040 				if (__improbable((cacheattr & PP_ATTR_MONITOR) &&
4041 				    (pte_to_xprr_perm(spte) != XPRR_KERN_RO_PERM) && !pmap_ppl_disable)) {
4042 					panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
4043 					    __func__, (uint64_t)pa);
4044 				}
4045 #endif
4046 				break;
4047 			}
4048 			pai = pa_index(pa);
4049 			pvh_lock(pai);
4050 			spte = *((volatile pt_entry_t*)cpte);
4051 			pa = pte_to_pa(spte);
4052 			if (pai == pa_index(pa)) {
4053 				managed = TRUE;
4054 				break; // Leave pai locked as we will unlock it after we free the PV entry
4055 			}
4056 			pvh_unlock(pai);
4057 		}
4058 
4059 		if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
4060 			/*
4061 			 * There used to be a valid mapping here but it
4062 			 * has already been removed when the page was
4063 			 * sent to the VM compressor, so nothing left to
4064 			 * remove now...
4065 			 */
4066 			continue;
4067 		}
4068 
4069 		/* remove the translation, do not flush the TLB */
4070 		if (*cpte != ARM_PTE_TYPE_FAULT) {
4071 			assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4072 			assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4073 #if MACH_ASSERT
4074 			if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4075 				panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
4076 				    cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va, (uint64_t)ptep_get_va(cpte));
4077 			}
4078 #endif
4079 			write_pte_fast(cpte, ARM_PTE_TYPE_FAULT);
4080 			num_pte_changed++;
4081 		}
4082 
4083 		if ((spte != ARM_PTE_TYPE_FAULT) &&
4084 		    (pmap != kernel_pmap)) {
4085 			assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
4086 			assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
4087 			--refcnt;
4088 		}
4089 
4090 		if (pte_is_wired(spte)) {
4091 			pte_set_wired(pmap, cpte, 0);
4092 			num_unwired++;
4093 		}
4094 		/*
4095 		 * if not managed, we're done
4096 		 */
4097 		if (!managed) {
4098 			continue;
4099 		}
4100 
4101 #if XNU_MONITOR
4102 		if (__improbable(ro_va)) {
4103 			pmap_ppl_unlockdown_page_locked(pai, PVH_FLAG_LOCKDOWN_RO, true);
4104 		}
4105 #endif
4106 
4107 		/*
4108 		 * find and remove the mapping from the chain for this
4109 		 * physical address.
4110 		 */
4111 		bool is_internal, is_altacct;
4112 		pmap_remove_pv(pmap, cpte, pai, true, &is_internal, &is_altacct);
4113 
4114 		if (is_altacct) {
4115 			assert(is_internal);
4116 			num_internal++;
4117 			num_alt_internal++;
4118 			if (!pvh_test_type(pai_to_pvh(pai), PVH_TYPE_PTEP)) {
4119 				ppattr_clear_altacct(pai);
4120 				ppattr_clear_internal(pai);
4121 			}
4122 		} else if (is_internal) {
4123 			if (ppattr_test_reusable(pai)) {
4124 				num_reusable++;
4125 			} else {
4126 				num_internal++;
4127 			}
4128 			if (!pvh_test_type(pai_to_pvh(pai), PVH_TYPE_PTEP)) {
4129 				ppattr_clear_internal(pai);
4130 			}
4131 		} else {
4132 			num_external++;
4133 		}
4134 		pvh_unlock(pai);
4135 		num_removed++;
4136 	}
4137 
4138 	/*
4139 	 *	Update the counts
4140 	 */
4141 	pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size);
4142 
4143 	if (pmap != kernel_pmap) {
4144 		if ((refcnt != 0) && (OSAddAtomic16(refcnt, (SInt16 *) &(ptep_get_info(bpte)->refcnt)) <= 0)) {
4145 			panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte), bpte, epte);
4146 		}
4147 
4148 		/* update ledgers */
4149 		pmap_ledger_debit(pmap, task_ledgers.external, (num_external) * pmap_page_size);
4150 		pmap_ledger_debit(pmap, task_ledgers.reusable, (num_reusable) * pmap_page_size);
4151 		pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size);
4152 		pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pmap_page_size);
4153 		pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pmap_page_size);
4154 		pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pmap_page_size);
4155 		pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pmap_page_size);
4156 		/* make needed adjustments to phys_footprint */
4157 		pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4158 		    ((num_internal -
4159 		    num_alt_internal) +
4160 		    (num_compressed -
4161 		    num_alt_compressed)) * pmap_page_size);
4162 	}
4163 
4164 	/* flush the ptable entries we have written */
4165 	if (num_pte_changed > 0) {
4166 		FLUSH_PTE_STRONG();
4167 	}
4168 
4169 	return num_pte_changed;
4170 }
4171 
4172 
4173 /*
4174  *	Remove the given range of addresses
4175  *	from the specified map.
4176  *
4177  *	It is assumed that the start and end are properly
4178  *	rounded to the hardware page size.
4179  */
4180 void
4181 pmap_remove(
4182 	pmap_t pmap,
4183 	vm_map_address_t start,
4184 	vm_map_address_t end)
4185 {
4186 	pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4187 }
4188 
4189 MARK_AS_PMAP_TEXT vm_map_address_t
4190 pmap_remove_options_internal(
4191 	pmap_t pmap,
4192 	vm_map_address_t start,
4193 	vm_map_address_t end,
4194 	int options)
4195 {
4196 	vm_map_address_t eva = end;
4197 	pt_entry_t     *bpte, *epte;
4198 	pt_entry_t     *pte_p;
4199 	tt_entry_t     *tte_p;
4200 	int             remove_count = 0;
4201 	bool            need_strong_sync = false;
4202 	bool            unlock = true;
4203 
4204 	if (__improbable(end < start)) {
4205 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
4206 	}
4207 
4208 	validate_pmap_mutable(pmap);
4209 
4210 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4211 
4212 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
4213 
4214 	tte_p = pmap_tte(pmap, start);
4215 
4216 	if (tte_p == (tt_entry_t *) NULL) {
4217 		goto done;
4218 	}
4219 
4220 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4221 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
4222 		bpte = &pte_p[pte_index(pt_attr, start)];
4223 		epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
4224 
4225 		/*
4226 		 * This check is really intended to ensure that mappings in a nested pmap can't be removed
4227 		 * through a top-level user pmap, although it's also a useful sanity check for other pmap types.
4228 		 * Note that kernel page tables may not have PTDs, so we can't use the check there.
4229 		 */
4230 		if (__improbable((pmap->type != PMAP_TYPE_KERNEL) && (ptep_get_pmap(bpte) != pmap))) {
4231 			panic("%s: attempt to remove mappings owned by pmap %p through pmap %p, starting at pte %p",
4232 			    __func__, ptep_get_pmap(bpte), pmap, bpte);
4233 		}
4234 
4235 		remove_count = pmap_remove_range_options(pmap, start, bpte, epte, &eva,
4236 		    &need_strong_sync, options);
4237 
4238 		if ((pmap->type == PMAP_TYPE_USER) && (ptep_get_info(pte_p)->refcnt == 0)) {
4239 			pmap_tte_deallocate(pmap, start, eva, need_strong_sync, tte_p, pt_attr_twig_level(pt_attr));
4240 			remove_count = 0; // pmap_tte_deallocate has flushed the TLB for us
4241 			unlock = false; // pmap_tte_deallocate() has dropped the lock
4242 		}
4243 	}
4244 
4245 done:
4246 	if (unlock) {
4247 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
4248 	}
4249 
4250 	if (remove_count > 0) {
4251 		PMAP_UPDATE_TLBS(pmap, start, eva, need_strong_sync, true);
4252 	}
4253 	return eva;
4254 }
4255 
4256 void
4257 pmap_remove_options(
4258 	pmap_t pmap,
4259 	vm_map_address_t start,
4260 	vm_map_address_t end,
4261 	int options)
4262 {
4263 	vm_map_address_t va;
4264 
4265 	if (pmap == PMAP_NULL) {
4266 		return;
4267 	}
4268 
4269 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4270 
4271 	PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4272 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4273 	    VM_KERNEL_ADDRHIDE(end));
4274 
4275 #if MACH_ASSERT
4276 	if ((start | end) & pt_attr_leaf_offmask(pt_attr)) {
4277 		panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx",
4278 		    pmap, (uint64_t)start, (uint64_t)end);
4279 	}
4280 	if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4281 		panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx",
4282 		    pmap, (uint64_t)start, (uint64_t)end);
4283 	}
4284 #endif
4285 
4286 	/*
4287 	 * We allow single-page requests to execute non-preemptibly,
4288 	 * as it doesn't make sense to sample AST_URGENT for a single-page
4289 	 * operation, and there are a couple of special use cases that
4290 	 * require a non-preemptible single-page operation.
4291 	 */
4292 	if ((end - start) > (pt_attr_page_size(pt_attr) * PAGE_RATIO)) {
4293 		pmap_verify_preemptible();
4294 	}
4295 
4296 	/*
4297 	 *      Invalidate the translation buffer first
4298 	 */
4299 	va = start;
4300 	while (va < end) {
4301 		vm_map_address_t l;
4302 
4303 		l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
4304 		if (l > end) {
4305 			l = end;
4306 		}
4307 
4308 #if XNU_MONITOR
4309 		va = pmap_remove_options_ppl(pmap, va, l, options);
4310 
4311 		pmap_ledger_check_balance(pmap);
4312 #else
4313 		va = pmap_remove_options_internal(pmap, va, l, options);
4314 #endif
4315 	}
4316 
4317 	PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4318 }
4319 
4320 
4321 /*
4322  *	Remove phys addr if mapped in specified map
4323  */
4324 void
4325 pmap_remove_some_phys(
4326 	__unused pmap_t map,
4327 	__unused ppnum_t pn)
4328 {
4329 	/* Implement to support working set code */
4330 }
4331 
4332 /*
4333  * Implementation of PMAP_SWITCH_USER that Mach VM uses to
4334  * switch a thread onto a new vm_map.
4335  */
4336 void
4337 pmap_switch_user(thread_t thread, vm_map_t new_map)
4338 {
4339 	pmap_t new_pmap = new_map->pmap;
4340 
4341 
4342 	thread->map = new_map;
4343 	pmap_set_pmap(new_pmap, thread);
4344 
4345 }
4346 
4347 void
4348 pmap_set_pmap(
4349 	pmap_t pmap,
4350 #if     !__ARM_USER_PROTECT__
4351 	__unused
4352 #endif
4353 	thread_t        thread)
4354 {
4355 	pmap_switch(pmap);
4356 #if __ARM_USER_PROTECT__
4357 	thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4358 	thread->machine.asid = pmap->hw_asid;
4359 #endif
4360 }
4361 
4362 static void
4363 pmap_flush_core_tlb_asid_async(pmap_t pmap)
4364 {
4365 	flush_core_tlb_asid_async(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
4366 }
4367 
4368 static inline bool
4369 pmap_user_ttb_is_clear(void)
4370 {
4371 	return get_mmu_ttb() == (invalid_ttep & TTBR_BADDR_MASK);
4372 }
4373 
4374 MARK_AS_PMAP_TEXT void
4375 pmap_switch_internal(
4376 	pmap_t pmap)
4377 {
4378 	pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
4379 #if XNU_MONITOR
4380 	os_atomic_store(&cpu_data_ptr->active_pmap, pmap, relaxed);
4381 #endif
4382 	validate_pmap_mutable(pmap);
4383 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4384 	uint16_t asid_index = pmap->hw_asid;
4385 	bool do_asid_flush = false;
4386 	bool do_commpage_flush = false;
4387 
4388 	if (__improbable((asid_index == 0) && (pmap != kernel_pmap))) {
4389 		panic("%s: attempt to activate pmap with invalid ASID %p", __func__, pmap);
4390 	}
4391 #if __ARM_KERNEL_PROTECT__
4392 	asid_index >>= 1;
4393 #endif
4394 
4395 	pmap_t                    last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
4396 	__unused const pt_attr_t *last_nested_pmap_attr = cpu_data_ptr->cpu_nested_pmap_attr;
4397 	__unused vm_map_address_t last_nested_region_addr = cpu_data_ptr->cpu_nested_region_addr;
4398 	__unused vm_map_offset_t  last_nested_region_size = cpu_data_ptr->cpu_nested_region_size;
4399 	bool do_shared_region_flush = ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap));
4400 	bool break_before_make = do_shared_region_flush;
4401 
4402 	if ((pmap_max_asids > MAX_HW_ASIDS) && (asid_index > 0)) {
4403 		asid_index -= 1;
4404 		pmap_update_plru(asid_index);
4405 
4406 		/* Paranoia. */
4407 		assert(asid_index < (sizeof(cpu_data_ptr->cpu_sw_asids) / sizeof(*cpu_data_ptr->cpu_sw_asids)));
4408 
4409 		/* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4410 		uint8_t new_sw_asid = pmap->sw_asid;
4411 		uint8_t last_sw_asid = cpu_data_ptr->cpu_sw_asids[asid_index];
4412 
4413 		if (new_sw_asid != last_sw_asid) {
4414 			/*
4415 			 * If the virtual ASID of the new pmap does not match the virtual ASID
4416 			 * last seen on this CPU for the physical ASID (that was a mouthful),
4417 			 * then this switch runs the risk of aliasing.  We need to flush the
4418 			 * TLB for this phyiscal ASID in this case.
4419 			 */
4420 			cpu_data_ptr->cpu_sw_asids[asid_index] = new_sw_asid;
4421 			do_asid_flush = true;
4422 			break_before_make = true;
4423 		}
4424 	}
4425 
4426 #if __ARM_MIXED_PAGE_SIZE__
4427 	if (pt_attr->pta_tcr_value != get_tcr()) {
4428 		break_before_make = true;
4429 	}
4430 #endif
4431 #if __ARM_MIXED_PAGE_SIZE__
4432 	/*
4433 	 * For mixed page size configurations, we need to flush the global commpage mappings from
4434 	 * the TLB when transitioning between address spaces with different page sizes.  Otherwise
4435 	 * it's possible for a TLB fill against the incoming commpage to produce a TLB entry which
4436 	 * which partially overlaps a TLB entry from the outgoing commpage, leading to a TLB
4437 	 * conflict abort or other unpredictable behavior.
4438 	 */
4439 	if (pt_attr_leaf_shift(pt_attr) != cpu_data_ptr->commpage_page_shift) {
4440 		do_commpage_flush = true;
4441 	}
4442 	if (do_commpage_flush) {
4443 		break_before_make = true;
4444 	}
4445 #endif
4446 	if (__improbable(break_before_make && !pmap_user_ttb_is_clear())) {
4447 		PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB), VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4448 		pmap_clear_user_ttb_internal();
4449 	}
4450 
4451 	/* If we're switching to a different nested pmap (i.e. shared region), we'll need
4452 	 * to flush the userspace mappings for that region.  Those mappings are global
4453 	 * and will not be protected by the ASID.  It should also be cheaper to flush the
4454 	 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
4455 	if (__improbable(do_shared_region_flush)) {
4456 #if __ARM_RANGE_TLBI__
4457 		uint64_t page_shift_prev = pt_attr_leaf_shift(last_nested_pmap_attr);
4458 		vm_map_offset_t npages_prev = last_nested_region_size >> page_shift_prev;
4459 
4460 		/* NOTE: here we flush the global TLB entries for the previous nested region only.
4461 		 * There may still be non-global entries that overlap with the incoming pmap's
4462 		 * nested region.  On Apple SoCs at least, this is acceptable.  Those non-global entries
4463 		 * must necessarily belong to a different ASID than the incoming pmap, or they would
4464 		 * be flushed in the do_asid_flush case below.  This will prevent them from conflicting
4465 		 * with the incoming pmap's nested region.  However, the ARMv8 ARM is not crystal clear
4466 		 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
4467 		 * to consider additional invalidation here in the future. */
4468 		if (npages_prev <= ARM64_TLB_RANGE_PAGES) {
4469 			flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t)npages_prev, 0, last_nested_region_addr, page_shift_prev));
4470 		} else {
4471 			do_asid_flush = false;
4472 			flush_core_tlb_async();
4473 		}
4474 #else
4475 		do_asid_flush = false;
4476 		flush_core_tlb_async();
4477 #endif // __ARM_RANGE_TLBI__
4478 	}
4479 
4480 #if __ARM_MIXED_PAGE_SIZE__
4481 	if (__improbable(do_commpage_flush)) {
4482 		const uint64_t commpage_shift = cpu_data_ptr->commpage_page_shift;
4483 		const uint64_t rtlbi_param = generate_rtlbi_param((ppnum_t)_COMM_PAGE64_NESTING_SIZE >> commpage_shift,
4484 		    0, _COMM_PAGE64_NESTING_START, commpage_shift);
4485 		flush_core_tlb_allrange_async(rtlbi_param);
4486 	}
4487 #endif
4488 	if (__improbable(do_asid_flush)) {
4489 		pmap_flush_core_tlb_asid_async(pmap);
4490 #if DEVELOPMENT || DEBUG
4491 		os_atomic_inc(&pmap_asid_flushes, relaxed);
4492 #endif
4493 	}
4494 	if (__improbable(do_asid_flush || do_shared_region_flush || do_commpage_flush)) {
4495 		sync_tlb_flush_local();
4496 	}
4497 
4498 	pmap_switch_user_ttb(pmap, cpu_data_ptr);
4499 }
4500 
4501 void
4502 pmap_switch(
4503 	pmap_t pmap)
4504 {
4505 	PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4506 #if XNU_MONITOR
4507 	pmap_switch_ppl(pmap);
4508 #else
4509 	pmap_switch_internal(pmap);
4510 #endif
4511 	PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
4512 }
4513 
4514 void
4515 pmap_page_protect(
4516 	ppnum_t ppnum,
4517 	vm_prot_t prot)
4518 {
4519 	pmap_page_protect_options(ppnum, prot, 0, NULL);
4520 }
4521 
4522 /*
4523  *	Routine:	pmap_page_protect_options
4524  *
4525  *	Function:
4526  *		Lower the permission for all mappings to a given
4527  *		page.
4528  */
4529 MARK_AS_PMAP_TEXT static void
4530 pmap_page_protect_options_with_flush_range(
4531 	ppnum_t ppnum,
4532 	vm_prot_t prot,
4533 	unsigned int options,
4534 	pmap_tlb_flush_range_t *flush_range)
4535 {
4536 	pmap_paddr_t    phys = ptoa(ppnum);
4537 	pv_entry_t    **pv_h;
4538 	pv_entry_t     *pve_p, *orig_pve_p;
4539 	pv_entry_t     *pveh_p;
4540 	pv_entry_t     *pvet_p;
4541 	pt_entry_t     *pte_p, *orig_pte_p;
4542 	pv_entry_t     *new_pve_p;
4543 	pt_entry_t     *new_pte_p;
4544 	vm_offset_t     pvh_flags;
4545 	unsigned int    pai;
4546 	bool            remove;
4547 	bool            set_NX;
4548 	unsigned int    pvh_cnt = 0;
4549 	unsigned int    pass1_updated = 0;
4550 	unsigned int    pass2_updated = 0;
4551 
4552 	assert(ppnum != vm_page_fictitious_addr);
4553 
4554 	/* Only work with managed pages. */
4555 	if (!pa_valid(phys)) {
4556 		return;
4557 	}
4558 
4559 	/*
4560 	 * Determine the new protection.
4561 	 */
4562 	switch (prot) {
4563 	case VM_PROT_ALL:
4564 		return;         /* nothing to do */
4565 	case VM_PROT_READ:
4566 	case VM_PROT_READ | VM_PROT_EXECUTE:
4567 		remove = false;
4568 		break;
4569 	default:
4570 		/* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
4571 		options = options & ~PMAP_OPTIONS_NOFLUSH;
4572 		remove = true;
4573 		break;
4574 	}
4575 
4576 	pmap_cpu_data_t *pmap_cpu_data = NULL;
4577 	if (remove) {
4578 #if !XNU_MONITOR
4579 		mp_disable_preemption();
4580 #endif
4581 		pmap_cpu_data = pmap_get_cpu_data();
4582 		os_atomic_store(&pmap_cpu_data->inflight_disconnect, true, relaxed);
4583 		/*
4584 		 * Ensure the store to inflight_disconnect will be observed before any of the
4585 		 * ensuing PTE/refcount stores in this function.  This flag is used to avoid
4586 		 * a race in which the VM may clear a pmap's mappings and destroy the pmap on
4587 		 * another CPU, in between this function's clearing a PTE and dropping the
4588 		 * corresponding pagetable refcount.  That can lead to a panic if the
4589 		 * destroying thread observes a non-zero refcount.  For this we need a store-
4590 		 * store barrier; a store-release operation would not be sufficient.
4591 		 */
4592 		os_atomic_thread_fence(release);
4593 	}
4594 
4595 	pai = pa_index(phys);
4596 	pvh_lock(pai);
4597 	pv_h = pai_to_pvh(pai);
4598 	pvh_flags = pvh_get_flags(pv_h);
4599 
4600 #if XNU_MONITOR
4601 	if (__improbable(remove && (pvh_flags & PVH_FLAG_LOCKDOWN_MASK))) {
4602 		panic("%d is locked down (%#llx), cannot remove", pai, (uint64_t)pvh_get_flags(pv_h));
4603 	}
4604 	if (__improbable(ppattr_pa_test_monitor(phys))) {
4605 		panic("%s: PA 0x%llx belongs to PPL.", __func__, (uint64_t)phys);
4606 	}
4607 #endif
4608 
4609 
4610 	orig_pte_p = pte_p = PT_ENTRY_NULL;
4611 	orig_pve_p = pve_p = PV_ENTRY_NULL;
4612 	pveh_p = PV_ENTRY_NULL;
4613 	pvet_p = PV_ENTRY_NULL;
4614 	new_pve_p = PV_ENTRY_NULL;
4615 	new_pte_p = PT_ENTRY_NULL;
4616 
4617 
4618 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4619 		orig_pte_p = pte_p = pvh_ptep(pv_h);
4620 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4621 		orig_pve_p = pve_p = pvh_pve_list(pv_h);
4622 		pveh_p = pve_p;
4623 	} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
4624 		panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)phys);
4625 	}
4626 
4627 	/* Pass 1: Update all CPU PTEs and accounting info as necessary */
4628 	int pve_ptep_idx = 0;
4629 
4630 	/*
4631 	 * issue_tlbi is used to indicate that this function will need to issue at least one TLB
4632 	 * invalidation during pass 2.  tlb_flush_needed only indicates that PTE permissions have
4633 	 * changed and that a TLB flush will be needed *at some point*, so we'll need to call
4634 	 * FLUSH_PTE_STRONG() to synchronize prior PTE updates.  In the case of a flush_range
4635 	 * operation, TLB invalidation may be handled by the caller so it's possible for
4636 	 * tlb_flush_needed to be true while issue_tlbi is false.
4637 	 */
4638 	bool issue_tlbi = false;
4639 	bool tlb_flush_needed = false;
4640 	const bool compress = (options & PMAP_OPTIONS_COMPRESSOR);
4641 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4642 		pt_entry_t tmplate = ARM_PTE_TYPE_FAULT;
4643 		bool update = false;
4644 
4645 		if (pve_p != PV_ENTRY_NULL) {
4646 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
4647 			if (pte_p == PT_ENTRY_NULL) {
4648 				goto protect_skip_pve_pass1;
4649 			}
4650 		}
4651 
4652 #ifdef PVH_FLAG_IOMMU
4653 		if (pvh_ptep_is_iommu(pte_p)) {
4654 #if XNU_MONITOR
4655 			if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN_MASK)) {
4656 				panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu %p, pve_p=%p",
4657 				    ppnum, ptep_get_iommu(pte_p), pve_p);
4658 			}
4659 #endif
4660 			if (remove && (options & PMAP_OPTIONS_COMPRESSOR)) {
4661 				panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu %p, pve_p=%p",
4662 				    ppnum, ptep_get_iommu(pte_p), pve_p);
4663 			}
4664 			goto protect_skip_pve_pass1;
4665 		}
4666 #endif
4667 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
4668 		const pmap_t pmap = ptdp->pmap;
4669 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
4670 
4671 		if (__improbable((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum))) {
4672 #if MACH_ASSERT
4673 			if ((pmap != NULL) && (pve_p != PV_ENTRY_NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4674 				/* Temporarily set PTEP to NULL so that the logic below doesn't pick it up as duplicate. */
4675 				pt_entry_t *temp_ptep = pve_get_ptep(pve_p, pve_ptep_idx);
4676 				pve_set_ptep(pve_p, pve_ptep_idx, PT_ENTRY_NULL);
4677 
4678 				pv_entry_t *check_pvep = pve_p;
4679 
4680 				do {
4681 					if (pve_find_ptep_index(check_pvep, pte_p) != -1) {
4682 						panic_plain("%s: duplicate pve entry ptep=%p pmap=%p, pvh=%p, "
4683 						    "pvep=%p, pai=0x%x", __func__, pte_p, pmap, pv_h, pve_p, pai);
4684 					}
4685 				} while ((check_pvep = pve_next(check_pvep)) != PV_ENTRY_NULL);
4686 
4687 				/* Restore previous PTEP value. */
4688 				pve_set_ptep(pve_p, pve_ptep_idx, temp_ptep);
4689 			}
4690 #endif
4691 			panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x",
4692 			    pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
4693 		}
4694 
4695 #if DEVELOPMENT || DEBUG
4696 		if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
4697 #else
4698 		if ((prot & VM_PROT_EXECUTE))
4699 #endif
4700 		{
4701 			set_NX = false;
4702 		} else {
4703 			set_NX = true;
4704 		}
4705 
4706 		/* Remove the mapping if new protection is NONE */
4707 		if (remove) {
4708 			const bool is_internal = ppattr_pve_is_internal(pai, pve_p, pve_ptep_idx);
4709 			const bool is_altacct = ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx);
4710 			const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4711 			pt_entry_t spte = *pte_p;
4712 
4713 			if (pte_is_wired(spte)) {
4714 				pte_set_wired(pmap, pte_p, 0);
4715 				spte = *pte_p;
4716 				if (pmap != kernel_pmap) {
4717 					pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4718 				}
4719 			}
4720 
4721 			assertf(atop(pte_to_pa(spte)) == ppnum, "unexpected value 0x%llx for pte %p mapping ppnum 0x%x",
4722 			    (uint64_t)spte, pte_p, ppnum);
4723 
4724 			if (compress && is_internal && (pmap != kernel_pmap)) {
4725 				assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
4726 				/* mark this PTE as having been "compressed" */
4727 				tmplate = ARM_PTE_COMPRESSED;
4728 				if (is_altacct) {
4729 					tmplate |= ARM_PTE_COMPRESSED_ALT;
4730 				}
4731 			} else {
4732 				tmplate = ARM_PTE_TYPE_FAULT;
4733 			}
4734 
4735 			assert(spte != tmplate);
4736 			write_pte_fast(pte_p, tmplate);
4737 			update = true;
4738 			++pass1_updated;
4739 
4740 			pmap_ledger_debit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4741 
4742 			if (pmap != kernel_pmap) {
4743 				if (ppattr_test_reusable(pai) &&
4744 				    is_internal &&
4745 				    !is_altacct) {
4746 					pmap_ledger_debit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4747 				} else if (!is_internal) {
4748 					pmap_ledger_debit(pmap, task_ledgers.external, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4749 				}
4750 
4751 				if (is_altacct) {
4752 					assert(is_internal);
4753 					pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4754 					pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4755 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4756 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4757 						pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4758 					}
4759 					ppattr_pve_clr_internal(pai, pve_p, pve_ptep_idx);
4760 					ppattr_pve_clr_altacct(pai, pve_p, pve_ptep_idx);
4761 				} else if (ppattr_test_reusable(pai)) {
4762 					assert(is_internal);
4763 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4764 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4765 						/* was not in footprint, but is now */
4766 						pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4767 					}
4768 					ppattr_pve_clr_internal(pai, pve_p, pve_ptep_idx);
4769 				} else if (is_internal) {
4770 					pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4771 
4772 					/*
4773 					 * Update all stats related to physical footprint, which only
4774 					 * deals with internal pages.
4775 					 */
4776 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4777 						/*
4778 						 * This removal is only being done so we can send this page to
4779 						 * the compressor; therefore it mustn't affect total task footprint.
4780 						 */
4781 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4782 					} else {
4783 						/*
4784 						 * This internal page isn't going to the compressor, so adjust stats to keep
4785 						 * phys_footprint up to date.
4786 						 */
4787 						pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4788 					}
4789 					ppattr_pve_clr_internal(pai, pve_p, pve_ptep_idx);
4790 				} else {
4791 					/* external page: no impact on ledgers */
4792 				}
4793 			}
4794 			assert((pve_p == PV_ENTRY_NULL) || !pve_get_altacct(pve_p, pve_ptep_idx));
4795 		} else {
4796 			pt_entry_t spte = *pte_p;
4797 			const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
4798 
4799 			if (pmap == kernel_pmap) {
4800 				tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
4801 			} else {
4802 				tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
4803 			}
4804 
4805 			/*
4806 			 * While the naive implementation of this would serve to add execute
4807 			 * permission, this is not how the VM uses this interface, or how
4808 			 * x86_64 implements it.  So ignore requests to add execute permissions.
4809 			 */
4810 			if (set_NX) {
4811 				tmplate |= pt_attr_leaf_xn(pt_attr);
4812 			}
4813 
4814 
4815 			assert(spte != ARM_PTE_TYPE_FAULT);
4816 			assert(!ARM_PTE_IS_COMPRESSED(spte, pte_p));
4817 
4818 			if (spte != tmplate) {
4819 				/*
4820 				 * Mark the PTE so that we'll know this mapping requires a TLB flush in pass 2.
4821 				 * This allows us to avoid unnecessary flushing e.g. for COW aliases that didn't
4822 				 * require permission updates.  We use the ARM_PTE_WRITEABLE bit as that bit
4823 				 * should always be cleared by this function.
4824 				 */
4825 				pte_set_was_writeable(tmplate, true);
4826 				write_pte_fast(pte_p, tmplate);
4827 				update = true;
4828 				++pass1_updated;
4829 			} else if (pte_was_writeable(tmplate)) {
4830 				/*
4831 				 * We didn't change any of the relevant permission bits in the PTE, so we don't need
4832 				 * to flush the TLB, but we do want to clear the "was_writeable" flag.  When revoking
4833 				 * write access to a page, this function should always at least clear that flag for
4834 				 * all PTEs, as the VM is effectively requesting that subsequent write accesses to
4835 				 * these mappings go through vm_fault().  We therefore don't want those accesses to
4836 				 * be handled through arm_fast_fault().
4837 				 */
4838 				pte_set_was_writeable(tmplate, false);
4839 				write_pte_fast(pte_p, tmplate);
4840 			}
4841 		}
4842 
4843 		if (!issue_tlbi && update && !(options & PMAP_OPTIONS_NOFLUSH)) {
4844 			tlb_flush_needed = true;
4845 			if (remove || !flush_range || (flush_range->ptfr_pmap != pmap) ||
4846 			    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
4847 				issue_tlbi = true;
4848 			}
4849 		}
4850 protect_skip_pve_pass1:
4851 		pte_p = PT_ENTRY_NULL;
4852 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
4853 			pve_ptep_idx = 0;
4854 			pve_p = pve_next(pve_p);
4855 		}
4856 	}
4857 
4858 	if (tlb_flush_needed) {
4859 		FLUSH_PTE_STRONG();
4860 	}
4861 
4862 	if (!remove && !issue_tlbi) {
4863 		goto protect_finish;
4864 	}
4865 
4866 	/* Pass 2: Invalidate TLBs and update the list to remove CPU mappings */
4867 	pv_entry_t **pve_pp = pv_h;
4868 	pve_p = orig_pve_p;
4869 	pte_p = orig_pte_p;
4870 	pve_ptep_idx = 0;
4871 
4872 	/*
4873 	 * We need to keep track of whether a particular PVE list contains IOMMU
4874 	 * mappings when removing entries, because we should only remove CPU
4875 	 * mappings. If a PVE list contains at least one IOMMU mapping, we keep
4876 	 * it around.
4877 	 */
4878 	bool iommu_mapping_in_pve = false;
4879 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4880 		if (pve_p != PV_ENTRY_NULL) {
4881 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
4882 			if (pte_p == PT_ENTRY_NULL) {
4883 				goto protect_skip_pve_pass2;
4884 			}
4885 		}
4886 
4887 #ifdef PVH_FLAG_IOMMU
4888 		if (pvh_ptep_is_iommu(pte_p)) {
4889 			iommu_mapping_in_pve = true;
4890 			if (remove && (pve_p == PV_ENTRY_NULL)) {
4891 				/*
4892 				 * We've found an IOMMU entry and it's the only entry in the PV list.
4893 				 * We don't discard IOMMU entries, so simply set up the new PV list to
4894 				 * contain the single IOMMU PTE and exit the loop.
4895 				 */
4896 				new_pte_p = pte_p;
4897 				break;
4898 			}
4899 			goto protect_skip_pve_pass2;
4900 		}
4901 #endif
4902 		pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
4903 		const pmap_t pmap = ptdp->pmap;
4904 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
4905 
4906 		if (remove) {
4907 			if (!compress && (pmap != kernel_pmap)) {
4908 				/*
4909 				 * We must wait to decrement the refcount until we're completely finished using the PTE
4910 				 * on this path.  Otherwise, if we happened to drop the refcount to zero, a concurrent
4911 				 * pmap_remove() call might observe the zero refcount and free the pagetable out from
4912 				 * under us.
4913 				 */
4914 				if (OSAddAtomic16(-1, (SInt16 *) &(ptd_get_info(ptdp, pte_p)->refcnt)) <= 0) {
4915 					panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
4916 				}
4917 			}
4918 			/* Remove this CPU mapping from PVE list. */
4919 			if (pve_p != PV_ENTRY_NULL) {
4920 				pve_set_ptep(pve_p, pve_ptep_idx, PT_ENTRY_NULL);
4921 			}
4922 		} else {
4923 			pt_entry_t spte = *pte_p;
4924 			if (pte_was_writeable(spte)) {
4925 				pte_set_was_writeable(spte, false);
4926 				write_pte_fast(pte_p, spte);
4927 			} else {
4928 				goto protect_skip_pve_pass2;
4929 			}
4930 		}
4931 		++pass2_updated;
4932 		if (remove || !flush_range || (flush_range->ptfr_pmap != pmap) ||
4933 		    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
4934 			pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
4935 			    pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
4936 		}
4937 
4938 protect_skip_pve_pass2:
4939 		pte_p = PT_ENTRY_NULL;
4940 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
4941 			pve_ptep_idx = 0;
4942 
4943 			if (remove) {
4944 				/**
4945 				 * If there are any IOMMU mappings in the PVE list, preserve
4946 				 * those mappings in a new PVE list (new_pve_p) which will later
4947 				 * become the new PVH entry. Keep track of the CPU mappings in
4948 				 * pveh_p/pvet_p so they can be deallocated later.
4949 				 */
4950 				if (iommu_mapping_in_pve) {
4951 					iommu_mapping_in_pve = false;
4952 					pv_entry_t *temp_pve_p = pve_next(pve_p);
4953 					pve_remove(pv_h, pve_pp, pve_p);
4954 					pveh_p = pvh_pve_list(pv_h);
4955 					pve_p->pve_next = new_pve_p;
4956 					new_pve_p = pve_p;
4957 					pve_p = temp_pve_p;
4958 					continue;
4959 				} else {
4960 					pvet_p = pve_p;
4961 					pvh_cnt++;
4962 				}
4963 			}
4964 
4965 			pve_pp = pve_next_ptr(pve_p);
4966 			pve_p = pve_next(pve_p);
4967 			iommu_mapping_in_pve = false;
4968 		}
4969 	}
4970 
4971 protect_finish:
4972 
4973 #ifdef PVH_FLAG_EXEC
4974 	if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
4975 		pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
4976 	}
4977 #endif
4978 	if (__improbable(pass1_updated != pass2_updated)) {
4979 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
4980 		    __func__, pass1_updated, pass2_updated);
4981 	}
4982 	/* if we removed a bunch of entries, take care of them now */
4983 	if (remove) {
4984 		if (new_pve_p != PV_ENTRY_NULL) {
4985 			pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
4986 			pvh_set_flags(pv_h, pvh_flags);
4987 		} else if (new_pte_p != PT_ENTRY_NULL) {
4988 			pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
4989 			pvh_set_flags(pv_h, pvh_flags);
4990 		} else {
4991 			pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4992 		}
4993 	}
4994 
4995 	if (flush_range && tlb_flush_needed) {
4996 		if (!remove) {
4997 			flush_range->ptfr_flush_needed = true;
4998 			tlb_flush_needed = false;
4999 		}
5000 	}
5001 
5002 	/*
5003 	 * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
5004 	 * lock to allow the backing pages to be repurposed.  This is a security precaution, aimed
5005 	 * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
5006 	 * a page to be repurposed while it is still live in the TLBs.
5007 	 */
5008 	if (remove && tlb_flush_needed) {
5009 		sync_tlb_flush();
5010 	}
5011 
5012 	pvh_unlock(pai);
5013 
5014 	if (remove) {
5015 		os_atomic_store(&pmap_cpu_data->inflight_disconnect, false, release);
5016 #if !XNU_MONITOR
5017 		mp_enable_preemption();
5018 #endif
5019 	}
5020 
5021 	if (!remove && tlb_flush_needed) {
5022 		sync_tlb_flush();
5023 	}
5024 
5025 	if (remove && (pvet_p != PV_ENTRY_NULL)) {
5026 		pv_list_free(pveh_p, pvet_p, pvh_cnt);
5027 	}
5028 }
5029 
5030 MARK_AS_PMAP_TEXT void
5031 pmap_page_protect_options_internal(
5032 	ppnum_t ppnum,
5033 	vm_prot_t prot,
5034 	unsigned int options,
5035 	void *arg)
5036 {
5037 	if (arg != NULL) {
5038 		/*
5039 		 * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
5040 		 * ultimately be flushed.  The nature of ARM TLB maintenance is such that we can flush the
5041 		 * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
5042 		 * model requires that we not exit the PPL without performing required TLB flushes anyway.
5043 		 * In that case, force the flush to take place.
5044 		 */
5045 		options &= ~PMAP_OPTIONS_NOFLUSH;
5046 	}
5047 	pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
5048 }
5049 
5050 void
5051 pmap_page_protect_options(
5052 	ppnum_t ppnum,
5053 	vm_prot_t prot,
5054 	unsigned int options,
5055 	void *arg)
5056 {
5057 	pmap_paddr_t    phys = ptoa(ppnum);
5058 
5059 	assert(ppnum != vm_page_fictitious_addr);
5060 
5061 	/* Only work with managed pages. */
5062 	if (!pa_valid(phys)) {
5063 		return;
5064 	}
5065 
5066 	/*
5067 	 * Determine the new protection.
5068 	 */
5069 	if (prot == VM_PROT_ALL) {
5070 		return;         /* nothing to do */
5071 	}
5072 
5073 	PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5074 
5075 #if XNU_MONITOR
5076 	pmap_page_protect_options_ppl(ppnum, prot, options, arg);
5077 #else
5078 	pmap_page_protect_options_internal(ppnum, prot, options, arg);
5079 #endif
5080 
5081 	PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5082 }
5083 
5084 
5085 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
5086 MARK_AS_PMAP_TEXT void
5087 pmap_disable_user_jop_internal(pmap_t pmap)
5088 {
5089 	if (pmap == kernel_pmap) {
5090 		panic("%s: called with kernel_pmap", __func__);
5091 	}
5092 	validate_pmap_mutable(pmap);
5093 	pmap->disable_jop = true;
5094 }
5095 
5096 void
5097 pmap_disable_user_jop(pmap_t pmap)
5098 {
5099 #if XNU_MONITOR
5100 	pmap_disable_user_jop_ppl(pmap);
5101 #else
5102 	pmap_disable_user_jop_internal(pmap);
5103 #endif
5104 }
5105 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
5106 
5107 /*
5108  * Indicates if the pmap layer enforces some additional restrictions on the
5109  * given set of protections.
5110  */
5111 bool
5112 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
5113 {
5114 	return false;
5115 }
5116 
5117 /*
5118  *	Set the physical protection on the
5119  *	specified range of this map as requested.
5120  *	VERY IMPORTANT: Will not increase permissions.
5121  *	VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5122  */
5123 void
5124 pmap_protect(
5125 	pmap_t pmap,
5126 	vm_map_address_t b,
5127 	vm_map_address_t e,
5128 	vm_prot_t prot)
5129 {
5130 	pmap_protect_options(pmap, b, e, prot, 0, NULL);
5131 }
5132 
5133 MARK_AS_PMAP_TEXT vm_map_address_t
5134 pmap_protect_options_internal(
5135 	pmap_t pmap,
5136 	vm_map_address_t start,
5137 	vm_map_address_t end,
5138 	vm_prot_t prot,
5139 	unsigned int options,
5140 	__unused void *args)
5141 {
5142 	tt_entry_t      *tte_p;
5143 	pt_entry_t      *bpte_p, *epte_p;
5144 	pt_entry_t      *pte_p;
5145 	boolean_t        set_NX = TRUE;
5146 	boolean_t        set_XO = FALSE;
5147 	boolean_t        should_have_removed = FALSE;
5148 	bool             need_strong_sync = false;
5149 
5150 	/* Validate the pmap input before accessing its data. */
5151 	validate_pmap_mutable(pmap);
5152 
5153 	const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5154 
5155 	if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
5156 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
5157 	}
5158 
5159 #if DEVELOPMENT || DEBUG
5160 	if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5161 		if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5162 			should_have_removed = TRUE;
5163 		}
5164 	} else
5165 #endif
5166 	{
5167 		/* Determine the new protection. */
5168 		switch (prot) {
5169 		case VM_PROT_EXECUTE:
5170 			set_XO = TRUE;
5171 			OS_FALLTHROUGH;
5172 		case VM_PROT_READ:
5173 		case VM_PROT_READ | VM_PROT_EXECUTE:
5174 			break;
5175 		case VM_PROT_READ | VM_PROT_WRITE:
5176 		case VM_PROT_ALL:
5177 			return end;         /* nothing to do */
5178 		default:
5179 			should_have_removed = TRUE;
5180 		}
5181 	}
5182 
5183 	if (should_have_removed) {
5184 		panic("%s: should have been a remove operation, "
5185 		    "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5186 		    __FUNCTION__,
5187 		    pmap, (void *)start, (void *)end, prot, options, args);
5188 	}
5189 
5190 #if DEVELOPMENT || DEBUG
5191 	if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5192 #else
5193 	if ((prot & VM_PROT_EXECUTE))
5194 #endif
5195 	{
5196 		set_NX = FALSE;
5197 	} else {
5198 		set_NX = TRUE;
5199 	}
5200 
5201 	const uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
5202 	vm_map_address_t va = start;
5203 	unsigned int npages = 0;
5204 
5205 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
5206 
5207 	tte_p = pmap_tte(pmap, start);
5208 
5209 	if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5210 		bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5211 		bpte_p = &bpte_p[pte_index(pt_attr, start)];
5212 		epte_p = bpte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
5213 		pte_p = bpte_p;
5214 
5215 		for (pte_p = bpte_p;
5216 		    pte_p < epte_p;
5217 		    pte_p += PAGE_RATIO, va += pmap_page_size) {
5218 			++npages;
5219 			if (__improbable(!(npages % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
5220 			    pmap_pending_preemption())) {
5221 				break;
5222 			}
5223 			pt_entry_t spte;
5224 #if DEVELOPMENT || DEBUG
5225 			boolean_t  force_write = FALSE;
5226 #endif
5227 
5228 			spte = *((volatile pt_entry_t*)pte_p);
5229 
5230 			if ((spte == ARM_PTE_TYPE_FAULT) ||
5231 			    ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5232 				continue;
5233 			}
5234 
5235 			pmap_paddr_t    pa;
5236 			unsigned int    pai = 0;
5237 			boolean_t       managed = FALSE;
5238 
5239 			while (!managed) {
5240 				/*
5241 				 * It may be possible for the pte to transition from managed
5242 				 * to unmanaged in this timeframe; for now, elide the assert.
5243 				 * We should break out as a consequence of checking pa_valid.
5244 				 */
5245 				// assert(!ARM_PTE_IS_COMPRESSED(spte));
5246 				pa = pte_to_pa(spte);
5247 				if (!pa_valid(pa)) {
5248 					break;
5249 				}
5250 				pai = pa_index(pa);
5251 				pvh_lock(pai);
5252 				spte = *((volatile pt_entry_t*)pte_p);
5253 				pa = pte_to_pa(spte);
5254 				if (pai == pa_index(pa)) {
5255 					managed = TRUE;
5256 					break; // Leave the PVH locked as we will unlock it after we free the PTE
5257 				}
5258 				pvh_unlock(pai);
5259 			}
5260 
5261 			if ((spte == ARM_PTE_TYPE_FAULT) ||
5262 			    ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5263 				continue;
5264 			}
5265 
5266 			pt_entry_t      tmplate;
5267 
5268 			if (pmap == kernel_pmap) {
5269 #if DEVELOPMENT || DEBUG
5270 				if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5271 					force_write = TRUE;
5272 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5273 				} else
5274 #endif
5275 				{
5276 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5277 				}
5278 			} else {
5279 #if DEVELOPMENT || DEBUG
5280 				if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5281 					assert(pmap->type != PMAP_TYPE_NESTED);
5282 					force_write = TRUE;
5283 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
5284 				} else
5285 #endif
5286 				{
5287 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5288 				}
5289 			}
5290 
5291 			/*
5292 			 * XXX Removing "NX" would
5293 			 * grant "execute" access
5294 			 * immediately, bypassing any
5295 			 * checks VM might want to do
5296 			 * in its soft fault path.
5297 			 * pmap_protect() and co. are
5298 			 * not allowed to increase
5299 			 * access permissions.
5300 			 */
5301 			if (set_NX) {
5302 				tmplate |= pt_attr_leaf_xn(pt_attr);
5303 			} else {
5304 				if (pmap == kernel_pmap) {
5305 					/* do NOT clear "PNX"! */
5306 					tmplate |= ARM_PTE_NX;
5307 				} else {
5308 					/* do NOT clear "NX"! */
5309 					tmplate |= pt_attr_leaf_x(pt_attr);
5310 					if (set_XO) {
5311 						tmplate &= ~ARM_PTE_APMASK;
5312 						tmplate |= pt_attr_leaf_rona(pt_attr);
5313 					}
5314 				}
5315 			}
5316 
5317 #if DEVELOPMENT || DEBUG
5318 			if (force_write) {
5319 				/*
5320 				 * TODO: Run CS/Monitor checks here.
5321 				 */
5322 				if (managed) {
5323 					/*
5324 					 * We are marking the page as writable,
5325 					 * so we consider it to be modified and
5326 					 * referenced.
5327 					 */
5328 					ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5329 					tmplate |= ARM_PTE_AF;
5330 
5331 					if (ppattr_test_reffault(pai)) {
5332 						ppattr_clear_reffault(pai);
5333 					}
5334 
5335 					if (ppattr_test_modfault(pai)) {
5336 						ppattr_clear_modfault(pai);
5337 					}
5338 				}
5339 			} else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5340 				/*
5341 				 * An immediate request for anything other than
5342 				 * write should still mark the page as
5343 				 * referenced if managed.
5344 				 */
5345 				if (managed) {
5346 					ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
5347 					tmplate |= ARM_PTE_AF;
5348 
5349 					if (ppattr_test_reffault(pai)) {
5350 						ppattr_clear_reffault(pai);
5351 					}
5352 				}
5353 			}
5354 #endif
5355 
5356 			/* We do not expect to write fast fault the entry. */
5357 			pte_set_was_writeable(tmplate, false);
5358 
5359 			write_pte_fast(pte_p, tmplate);
5360 
5361 			if (managed) {
5362 				pvh_assert_locked(pai);
5363 				pvh_unlock(pai);
5364 			}
5365 		}
5366 		FLUSH_PTE_STRONG();
5367 		PMAP_UPDATE_TLBS(pmap, start, va, need_strong_sync, true);
5368 	} else {
5369 		va = end;
5370 	}
5371 
5372 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
5373 	return va;
5374 }
5375 
5376 void
5377 pmap_protect_options(
5378 	pmap_t pmap,
5379 	vm_map_address_t b,
5380 	vm_map_address_t e,
5381 	vm_prot_t prot,
5382 	unsigned int options,
5383 	__unused void *args)
5384 {
5385 	vm_map_address_t l, beg;
5386 
5387 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5388 
5389 	if ((b | e) & pt_attr_leaf_offmask(pt_attr)) {
5390 		panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx",
5391 		    pmap, (uint64_t)b, (uint64_t)e);
5392 	}
5393 
5394 	/*
5395 	 * We allow single-page requests to execute non-preemptibly,
5396 	 * as it doesn't make sense to sample AST_URGENT for a single-page
5397 	 * operation, and there are a couple of special use cases that
5398 	 * require a non-preemptible single-page operation.
5399 	 */
5400 	if ((e - b) > (pt_attr_page_size(pt_attr) * PAGE_RATIO)) {
5401 		pmap_verify_preemptible();
5402 	}
5403 
5404 #if DEVELOPMENT || DEBUG
5405 	if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5406 		if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5407 			pmap_remove_options(pmap, b, e, options);
5408 			return;
5409 		}
5410 	} else
5411 #endif
5412 	{
5413 		/* Determine the new protection. */
5414 		switch (prot) {
5415 		case VM_PROT_EXECUTE:
5416 		case VM_PROT_READ:
5417 		case VM_PROT_READ | VM_PROT_EXECUTE:
5418 			break;
5419 		case VM_PROT_READ | VM_PROT_WRITE:
5420 		case VM_PROT_ALL:
5421 			return;         /* nothing to do */
5422 		default:
5423 			pmap_remove_options(pmap, b, e, options);
5424 			return;
5425 		}
5426 	}
5427 
5428 	PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5429 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5430 	    VM_KERNEL_ADDRHIDE(e));
5431 
5432 	beg = b;
5433 
5434 	while (beg < e) {
5435 		l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5436 
5437 		if (l > e) {
5438 			l = e;
5439 		}
5440 
5441 #if XNU_MONITOR
5442 		beg = pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
5443 #else
5444 		beg = pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5445 #endif
5446 	}
5447 
5448 	PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5449 }
5450 
5451 /**
5452  * Inserts an arbitrary number of physical pages ("block") in a pmap.
5453  *
5454  * @param pmap pmap to insert the pages into.
5455  * @param va virtual address to map the pages into.
5456  * @param pa page number of the first physical page to map.
5457  * @param size block size, in number of pages.
5458  * @param prot mapping protection attributes.
5459  * @param attr flags to pass to pmap_enter().
5460  *
5461  * @return KERN_SUCCESS.
5462  */
5463 kern_return_t
5464 pmap_map_block(
5465 	pmap_t pmap,
5466 	addr64_t va,
5467 	ppnum_t pa,
5468 	uint32_t size,
5469 	vm_prot_t prot,
5470 	int attr,
5471 	unsigned int flags)
5472 {
5473 	return pmap_map_block_addr(pmap, va, ((pmap_paddr_t)pa) << PAGE_SHIFT, size, prot, attr, flags);
5474 }
5475 
5476 /**
5477  * Inserts an arbitrary number of physical pages ("block") in a pmap.
5478  * As opposed to pmap_map_block(), this function takes
5479  * a physical address as an input and operates using the
5480  * page size associated with the input pmap.
5481  *
5482  * @param pmap pmap to insert the pages into.
5483  * @param va virtual address to map the pages into.
5484  * @param pa physical address of the first physical page to map.
5485  * @param size block size, in number of pages.
5486  * @param prot mapping protection attributes.
5487  * @param attr flags to pass to pmap_enter().
5488  *
5489  * @return KERN_SUCCESS.
5490  */
5491 kern_return_t
5492 pmap_map_block_addr(
5493 	pmap_t pmap,
5494 	addr64_t va,
5495 	pmap_paddr_t pa,
5496 	uint32_t size,
5497 	vm_prot_t prot,
5498 	int attr,
5499 	unsigned int flags)
5500 {
5501 #if __ARM_MIXED_PAGE_SIZE__
5502 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5503 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
5504 #else
5505 	const uint64_t pmap_page_size = PAGE_SIZE;
5506 #endif
5507 
5508 	for (ppnum_t page = 0; page < size; page++) {
5509 		if (pmap_enter_addr(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE) != KERN_SUCCESS) {
5510 			panic("%s: failed pmap_enter_addr, "
5511 			    "pmap=%p, va=%#llx, pa=%llu, size=%u, prot=%#x, flags=%#x",
5512 			    __FUNCTION__,
5513 			    pmap, va, (uint64_t)pa, size, prot, flags);
5514 		}
5515 
5516 		va += pmap_page_size;
5517 		pa += pmap_page_size;
5518 	}
5519 
5520 	return KERN_SUCCESS;
5521 }
5522 
5523 kern_return_t
5524 pmap_enter_addr(
5525 	pmap_t pmap,
5526 	vm_map_address_t v,
5527 	pmap_paddr_t pa,
5528 	vm_prot_t prot,
5529 	vm_prot_t fault_type,
5530 	unsigned int flags,
5531 	boolean_t wired)
5532 {
5533 	return pmap_enter_options_addr(pmap, v, pa, prot, fault_type, flags, wired, 0, NULL);
5534 }
5535 
5536 /*
5537  *	Insert the given physical page (p) at
5538  *	the specified virtual address (v) in the
5539  *	target physical map with the protection requested.
5540  *
5541  *	If specified, the page will be wired down, meaning
5542  *	that the related pte can not be reclaimed.
5543  *
5544  *	NB:  This is the only routine which MAY NOT lazy-evaluate
5545  *	or lose information.  That is, this routine must actually
5546  *	insert this page into the given map eventually (must make
5547  *	forward progress eventually.
5548  */
5549 kern_return_t
5550 pmap_enter(
5551 	pmap_t pmap,
5552 	vm_map_address_t v,
5553 	ppnum_t pn,
5554 	vm_prot_t prot,
5555 	vm_prot_t fault_type,
5556 	unsigned int flags,
5557 	boolean_t wired)
5558 {
5559 	return pmap_enter_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired);
5560 }
5561 
5562 /*
5563  * Attempt to commit the pte.
5564  * Succeeds iff able to change *pte_p from old_pte to new_pte.
5565  * Performs no page table or accounting writes on failures.
5566  */
5567 static inline bool
5568 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t *old_pte, pt_entry_t new_pte, vm_map_address_t v)
5569 {
5570 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5571 	bool success = false, changed_wiring = false;
5572 
5573 	__unreachable_ok_push
5574 	if (TEST_PAGE_RATIO_4) {
5575 		/*
5576 		 * 16K virtual pages w/ 4K hw pages.
5577 		 * We actually need to update 4 ptes here which can't easily be done atomically.
5578 		 * As a result we require the exclusive pmap lock.
5579 		 */
5580 		pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
5581 		*old_pte = *pte_p;
5582 		if (*old_pte == new_pte) {
5583 			/* Another thread completed this operation. Nothing to do here. */
5584 			success = true;
5585 		} else if (pa_valid(pte_to_pa(new_pte)) && pte_to_pa(*old_pte) != pte_to_pa(new_pte) &&
5586 		    (*old_pte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE) {
5587 			/* pte has been modified by another thread and we hold the wrong PVH lock. Retry. */
5588 			success = false;
5589 		} else {
5590 			write_pte_fast(pte_p, new_pte);
5591 			success = true;
5592 		}
5593 	} else {
5594 		success = os_atomic_cmpxchgv(pte_p, *old_pte, new_pte, old_pte, acq_rel);
5595 	}
5596 	__unreachable_ok_pop
5597 
5598 	if (success && *old_pte != new_pte) {
5599 		if ((*old_pte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE) {
5600 			FLUSH_PTE_STRONG();
5601 			PMAP_UPDATE_TLBS(pmap, v, v + (pt_attr_page_size(pt_attr) * PAGE_RATIO), false, true);
5602 		} else {
5603 			FLUSH_PTE();
5604 			__builtin_arm_isb(ISB_SY);
5605 		}
5606 		changed_wiring = ARM_PTE_IS_COMPRESSED(*old_pte, pte_p) ?
5607 		    (new_pte & ARM_PTE_WIRED) != 0 :
5608 		    (new_pte & ARM_PTE_WIRED) != (*old_pte & ARM_PTE_WIRED);
5609 
5610 		if (pmap != kernel_pmap && changed_wiring) {
5611 			SInt16  *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_info(pte_p)->wiredcnt);
5612 			if (new_pte & ARM_PTE_WIRED) {
5613 				OSAddAtomic16(1, ptd_wiredcnt_ptr);
5614 				pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
5615 			} else {
5616 				OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5617 				pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
5618 			}
5619 		}
5620 
5621 		PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr), PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap),
5622 		    VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + (pt_attr_page_size(pt_attr) * PAGE_RATIO)), new_pte);
5623 	}
5624 	return success;
5625 }
5626 
5627 MARK_AS_PMAP_TEXT static pt_entry_t
5628 wimg_to_pte(unsigned int wimg, __unused pmap_paddr_t pa)
5629 {
5630 	pt_entry_t pte;
5631 
5632 	switch (wimg & (VM_WIMG_MASK)) {
5633 	case VM_WIMG_IO:
5634 		// Map DRAM addresses with VM_WIMG_IO as Device-GRE instead of
5635 		// Device-nGnRnE. On H14+, accesses to them can be reordered by
5636 		// AP, while preserving the security benefits of using device
5637 		// mapping against side-channel attacks. On pre-H14 platforms,
5638 		// the accesses will still be strongly ordered.
5639 		if (is_dram_addr(pa)) {
5640 			pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
5641 		} else {
5642 			pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5643 		}
5644 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5645 		break;
5646 	case VM_WIMG_RT:
5647 #if HAS_UCNORMAL_MEM
5648 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5649 #else
5650 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5651 #endif
5652 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5653 		break;
5654 	case VM_WIMG_POSTED:
5655 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5656 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5657 		break;
5658 	case VM_WIMG_POSTED_REORDERED:
5659 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
5660 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5661 		break;
5662 	case VM_WIMG_POSTED_COMBINED_REORDERED:
5663 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
5664 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5665 		break;
5666 	case VM_WIMG_WCOMB:
5667 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5668 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5669 		break;
5670 	case VM_WIMG_WTHRU:
5671 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5672 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5673 		break;
5674 	case VM_WIMG_COPYBACK:
5675 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5676 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5677 		break;
5678 	case VM_WIMG_INNERWBACK:
5679 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5680 		pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5681 		break;
5682 	default:
5683 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5684 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5685 	}
5686 
5687 	return pte;
5688 }
5689 
5690 
5691 /*
5692  * Construct a PTE (and the physical page attributes) for the given virtual to
5693  * physical mapping.
5694  *
5695  * This function has no side effects and is safe to call so that it is safe to
5696  * call while attempting a pmap_enter transaction.
5697  */
5698 MARK_AS_PMAP_TEXT static pt_entry_t
5699 pmap_construct_pte(
5700 	const pmap_t pmap,
5701 	vm_map_address_t va,
5702 	pmap_paddr_t pa,
5703 	vm_prot_t prot,
5704 	vm_prot_t fault_type,
5705 	boolean_t wired,
5706 	const pt_attr_t* const pt_attr,
5707 	uint16_t *pp_attr_bits /* OUTPUT */
5708 	)
5709 {
5710 	bool set_NX = false, set_XO = false;
5711 	pt_entry_t pte = pa_to_pte(pa) | ARM_PTE_TYPE;
5712 	assert(pp_attr_bits != NULL);
5713 	*pp_attr_bits = 0;
5714 
5715 	if (wired) {
5716 		pte |= ARM_PTE_WIRED;
5717 	}
5718 
5719 #if DEVELOPMENT || DEBUG
5720 	if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5721 #else
5722 	if ((prot & VM_PROT_EXECUTE))
5723 #endif
5724 	{
5725 		set_NX = false;
5726 	} else {
5727 		set_NX = true;
5728 	}
5729 
5730 	if (prot == VM_PROT_EXECUTE) {
5731 		set_XO = true;
5732 	}
5733 
5734 	if (set_NX) {
5735 		pte |= pt_attr_leaf_xn(pt_attr);
5736 	} else {
5737 		if (pmap == kernel_pmap) {
5738 			pte |= ARM_PTE_NX;
5739 		} else {
5740 			pte |= pt_attr_leaf_x(pt_attr);
5741 		}
5742 	}
5743 
5744 	if (pmap == kernel_pmap) {
5745 #if __ARM_KERNEL_PROTECT__
5746 		pte |= ARM_PTE_NG;
5747 #endif /* __ARM_KERNEL_PROTECT__ */
5748 		if (prot & VM_PROT_WRITE) {
5749 			pte |= ARM_PTE_AP(AP_RWNA);
5750 			*pp_attr_bits |= PP_ATTR_MODIFIED | PP_ATTR_REFERENCED;
5751 		} else {
5752 			pte |= ARM_PTE_AP(AP_RONA);
5753 			*pp_attr_bits |= PP_ATTR_REFERENCED;
5754 		}
5755 	} else {
5756 		if (pmap->type != PMAP_TYPE_NESTED) {
5757 			pte |= ARM_PTE_NG;
5758 		} else if ((pmap->nested_region_asid_bitmap)
5759 		    && (va >= pmap->nested_region_addr)
5760 		    && (va < (pmap->nested_region_addr + pmap->nested_region_size))) {
5761 			unsigned int index = (unsigned int)((va - pmap->nested_region_addr)  >> pt_attr_twig_shift(pt_attr));
5762 
5763 			if ((pmap->nested_region_asid_bitmap)
5764 			    && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
5765 				pte |= ARM_PTE_NG;
5766 			}
5767 		}
5768 		if (prot & VM_PROT_WRITE) {
5769 			assert(pmap->type != PMAP_TYPE_NESTED);
5770 			if (pa_valid(pa) && (!ppattr_pa_test_bits(pa, PP_ATTR_MODIFIED))) {
5771 				if (fault_type & VM_PROT_WRITE) {
5772 					if (set_XO) {
5773 						pte |= pt_attr_leaf_rwna(pt_attr);
5774 					} else {
5775 						pte |= pt_attr_leaf_rw(pt_attr);
5776 					}
5777 					*pp_attr_bits |= PP_ATTR_REFERENCED | PP_ATTR_MODIFIED;
5778 				} else {
5779 					if (set_XO) {
5780 						pte |= pt_attr_leaf_rona(pt_attr);
5781 					} else {
5782 						pte |= pt_attr_leaf_ro(pt_attr);
5783 					}
5784 					/*
5785 					 * Mark the page as MODFAULT so that a subsequent write
5786 					 * may be handled through arm_fast_fault().
5787 					 */
5788 					*pp_attr_bits |= PP_ATTR_REFERENCED | PP_ATTR_MODFAULT;
5789 					pte_set_was_writeable(pte, true);
5790 				}
5791 			} else {
5792 				if (set_XO) {
5793 					pte |= pt_attr_leaf_rwna(pt_attr);
5794 				} else {
5795 					pte |= pt_attr_leaf_rw(pt_attr);
5796 				}
5797 				*pp_attr_bits |= PP_ATTR_REFERENCED;
5798 			}
5799 		} else {
5800 			if (set_XO) {
5801 				pte |= pt_attr_leaf_rona(pt_attr);
5802 			} else {
5803 				pte |= pt_attr_leaf_ro(pt_attr);
5804 			}
5805 			*pp_attr_bits |= PP_ATTR_REFERENCED;
5806 		}
5807 	}
5808 
5809 	pte |= ARM_PTE_AF;
5810 	return pte;
5811 }
5812 
5813 MARK_AS_PMAP_TEXT kern_return_t
5814 pmap_enter_options_internal(
5815 	pmap_t pmap,
5816 	vm_map_address_t v,
5817 	pmap_paddr_t pa,
5818 	vm_prot_t prot,
5819 	vm_prot_t fault_type,
5820 	unsigned int flags,
5821 	boolean_t wired,
5822 	unsigned int options)
5823 {
5824 	ppnum_t         pn = (ppnum_t)atop(pa);
5825 	pt_entry_t      pte;
5826 	pt_entry_t      spte;
5827 	pt_entry_t      *pte_p;
5828 	bool            refcnt_updated;
5829 	bool            wiredcnt_updated;
5830 	bool            ro_va = false;
5831 	unsigned int    wimg_bits;
5832 	bool            committed = false, drop_refcnt = false, had_valid_mapping = false, skip_footprint_debit = false;
5833 	pmap_lock_mode_t lock_mode = PMAP_LOCK_SHARED;
5834 	kern_return_t   kr = KERN_SUCCESS;
5835 	uint16_t pp_attr_bits;
5836 	volatile uint16_t *refcnt;
5837 	volatile uint16_t *wiredcnt;
5838 	pv_free_list_t *local_pv_free;
5839 
5840 	validate_pmap_mutable(pmap);
5841 
5842 #if XNU_MONITOR
5843 	if (__improbable((options & PMAP_OPTIONS_NOWAIT) == 0)) {
5844 		panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
5845 	}
5846 #endif
5847 
5848 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5849 
5850 	if ((v) & pt_attr_leaf_offmask(pt_attr)) {
5851 		panic("pmap_enter_options() pmap %p v 0x%llx",
5852 		    pmap, (uint64_t)v);
5853 	}
5854 
5855 	if ((pa) & pt_attr_leaf_offmask(pt_attr)) {
5856 		panic("pmap_enter_options() pmap %p pa 0x%llx",
5857 		    pmap, (uint64_t)pa);
5858 	}
5859 
5860 	/* The PA should not extend beyond the architected physical address space */
5861 	pa &= ARM_PTE_PAGE_MASK;
5862 
5863 	if ((prot & VM_PROT_EXECUTE) && (pmap == kernel_pmap)) {
5864 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
5865 		extern vm_offset_t ctrr_test_page;
5866 		if (__probable(v != ctrr_test_page))
5867 #endif
5868 		panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
5869 	}
5870 	if (__improbable((pmap == kernel_pmap) && zone_spans_ro_va(v, v + pt_attr_page_size(pt_attr)))) {
5871 		if (__improbable(prot != VM_PROT_READ)) {
5872 			panic("%s: attempt to map RO zone VA 0x%llx with prot 0x%x",
5873 			    __func__, (unsigned long long)v, prot);
5874 		}
5875 		ro_va = true;
5876 	}
5877 	assert(pn != vm_page_fictitious_addr);
5878 
5879 	refcnt_updated = false;
5880 	wiredcnt_updated = false;
5881 
5882 	if ((prot & VM_PROT_EXECUTE) || TEST_PAGE_RATIO_4) {
5883 		/*
5884 		 * We need to take the lock exclusive here because of SPLAY_FIND in pmap_cs_enforce.
5885 		 *
5886 		 * See rdar://problem/59655632 for thoughts on synchronization and the splay tree
5887 		 */
5888 		lock_mode = PMAP_LOCK_EXCLUSIVE;
5889 	}
5890 
5891 	if (!pmap_lock_preempt(pmap, lock_mode)) {
5892 		return KERN_ABORTED;
5893 	}
5894 
5895 	/*
5896 	 *	Expand pmap to include this pte.  Assume that
5897 	 *	pmap is always expanded to include enough hardware
5898 	 *	pages to map one VM page.
5899 	 */
5900 	while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
5901 		/* Must unlock to expand the pmap. */
5902 		pmap_unlock(pmap, lock_mode);
5903 
5904 		kr = pmap_expand(pmap, v, options, pt_attr_leaf_level(pt_attr));
5905 
5906 		if (kr != KERN_SUCCESS) {
5907 			return kr;
5908 		}
5909 
5910 		if (!pmap_lock_preempt(pmap, lock_mode)) {
5911 			return KERN_ABORTED;
5912 		}
5913 	}
5914 
5915 	if (options & PMAP_OPTIONS_NOENTER) {
5916 		pmap_unlock(pmap, lock_mode);
5917 		return KERN_SUCCESS;
5918 	}
5919 
5920 	/*
5921 	 * Since we may not hold the pmap lock exclusive, updating the pte is
5922 	 * done via a cmpxchg loop.
5923 	 * We need to be careful about modifying non-local data structures before commiting
5924 	 * the new pte since we may need to re-do the transaction.
5925 	 */
5926 	spte = os_atomic_load(pte_p, relaxed);
5927 	while (!committed) {
5928 		refcnt = NULL;
5929 		wiredcnt = NULL;
5930 		pv_alloc_return_t pv_status = PV_ALLOC_SUCCESS;
5931 		had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
5932 
5933 		if (pmap != kernel_pmap) {
5934 			ptd_info_t *ptd_info = ptep_get_info(pte_p);
5935 			refcnt = &ptd_info->refcnt;
5936 			wiredcnt = &ptd_info->wiredcnt;
5937 			/*
5938 			 * This check is really intended to ensure that mappings in a nested pmap can't be inserted
5939 			 * through a top-level user pmap, which would allow a non-global mapping to be inserted into a shared
5940 			 * region pmap and leveraged into a TLB-based write gadget (rdar://91504354).
5941 			 * It's also a useful sanity check for other pmap types, but note that kernel page tables may not
5942 			 * have PTDs, so we can't use the check there.
5943 			 */
5944 			if (__improbable(ptep_get_pmap(pte_p) != pmap)) {
5945 				panic("%s: attempt to enter mapping at pte %p owned by pmap %p through pmap %p",
5946 				    __func__, pte_p, ptep_get_pmap(pte_p), pmap);
5947 			}
5948 			/*
5949 			 * Bump the wired count to keep the PTE page from being reclaimed.  We need this because
5950 			 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
5951 			 * or acquire the pmap lock exclusive.
5952 			 */
5953 			if (!wiredcnt_updated) {
5954 				OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
5955 				wiredcnt_updated = true;
5956 			}
5957 			if (!refcnt_updated) {
5958 				OSAddAtomic16(1, (volatile int16_t*)refcnt);
5959 				refcnt_updated = true;
5960 				drop_refcnt = true;
5961 			}
5962 		}
5963 
5964 		if (had_valid_mapping && (pte_to_pa(spte) != pa)) {
5965 			/*
5966 			 * There is already a mapping here & it's for a different physical page.
5967 			 * First remove that mapping.
5968 			 *
5969 			 * This requires that we take the pmap lock exclusive in order to call pmap_remove_range.
5970 			 */
5971 			if (lock_mode == PMAP_LOCK_SHARED) {
5972 				if (pmap_lock_shared_to_exclusive(pmap)) {
5973 					lock_mode = PMAP_LOCK_EXCLUSIVE;
5974 				} else {
5975 					/*
5976 					 * We failed to upgrade to an exclusive lock.
5977 					 * As a result we no longer hold the lock at all,
5978 					 * so we need to re-acquire it and restart the transaction.
5979 					 */
5980 					pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
5981 					lock_mode = PMAP_LOCK_EXCLUSIVE;
5982 					/* pmap might have changed after we dropped the lock. Try again. */
5983 					spte = os_atomic_load(pte_p, relaxed);
5984 					continue;
5985 				}
5986 			}
5987 			pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO);
5988 			spte = ARM_PTE_TYPE_FAULT;
5989 			assert(os_atomic_load(pte_p, acquire) == ARM_PTE_TYPE_FAULT);
5990 		}
5991 
5992 		/*
5993 		 * The XO index is used for TPRO mappings. To avoid exposing them as --x,
5994 		 * the VM code tracks VM_MAP_TPRO requests and couples them with the proper
5995 		 * read-write protection. The PMAP layer though still needs to use the right
5996 		 * index, which is the older XO-now-TPRO one and that is specially selected
5997 		 * here thanks to PMAP_OPTIONS_MAP_TPRO.
5998 		 */
5999 		if (options & PMAP_OPTIONS_MAP_TPRO) {
6000 			pte = pmap_construct_pte(pmap, v, pa, VM_PROT_RORW_TP, fault_type, wired, pt_attr, &pp_attr_bits);
6001 		} else {
6002 			pte = pmap_construct_pte(pmap, v, pa, prot, fault_type, wired, pt_attr, &pp_attr_bits);
6003 		}
6004 
6005 		if (pa_valid(pa)) {
6006 			unsigned int pai;
6007 			boolean_t   is_altacct = FALSE, is_internal = FALSE, is_reusable = FALSE, is_external = FALSE;
6008 
6009 			is_internal = FALSE;
6010 			is_altacct = FALSE;
6011 
6012 			pai = pa_index(pa);
6013 
6014 			pvh_lock(pai);
6015 
6016 			/*
6017 			 * Make sure that the current per-cpu PV free list has
6018 			 * enough entries (2 in the worst-case scenario) to handle the enter_pv
6019 			 * if the transaction succeeds. We're either in the
6020 			 * PPL (which can't be preempted) or we've explicitly disabled preemptions.
6021 			 * Note that we can still be interrupted, but a primary
6022 			 * interrupt handler can never enter the pmap.
6023 			 */
6024 #if !XNU_MONITOR
6025 			assert(get_preemption_level() > 0);
6026 #endif
6027 			local_pv_free = &pmap_get_cpu_data()->pv_free;
6028 			pv_entry_t **pv_h = pai_to_pvh(pai);
6029 			const bool allocation_required = !pvh_test_type(pv_h, PVH_TYPE_NULL) &&
6030 			    !(pvh_test_type(pv_h, PVH_TYPE_PTEP) && pvh_ptep(pv_h) == pte_p);
6031 
6032 			if (__improbable(allocation_required && (local_pv_free->count < 2))) {
6033 				pv_entry_t *new_pve_p[2] = {PV_ENTRY_NULL};
6034 				int new_allocated_pves = 0;
6035 
6036 				while (new_allocated_pves < 2) {
6037 					local_pv_free = &pmap_get_cpu_data()->pv_free;
6038 					pv_status = pv_alloc(pmap, pai, lock_mode, options, &new_pve_p[new_allocated_pves]);
6039 					if (pv_status == PV_ALLOC_FAIL) {
6040 						break;
6041 					} else if (pv_status == PV_ALLOC_RETRY) {
6042 						/*
6043 						 * In the case that pv_alloc() had to grab a new page of PVEs,
6044 						 * it will have dropped the pmap lock while doing so.
6045 						 * On non-PPL devices, dropping the lock re-enables preemption so we may
6046 						 * be on a different CPU now.
6047 						 */
6048 						local_pv_free = &pmap_get_cpu_data()->pv_free;
6049 					} else {
6050 						/* If we've gotten this far then a node should've been allocated. */
6051 						assert(new_pve_p[new_allocated_pves] != PV_ENTRY_NULL);
6052 
6053 						new_allocated_pves++;
6054 					}
6055 				}
6056 
6057 				for (int i = 0; i < new_allocated_pves; i++) {
6058 					pv_free(new_pve_p[i]);
6059 				}
6060 			}
6061 
6062 			if (pv_status == PV_ALLOC_FAIL) {
6063 				pvh_unlock(pai);
6064 				kr = KERN_RESOURCE_SHORTAGE;
6065 				break;
6066 			} else if (pv_status == PV_ALLOC_RETRY) {
6067 				pvh_unlock(pai);
6068 				/* We dropped the pmap and PVH locks to allocate. Retry transaction. */
6069 				spte = os_atomic_load(pte_p, relaxed);
6070 				continue;
6071 			}
6072 
6073 			if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6074 				wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6075 			} else {
6076 				wimg_bits = pmap_cache_attributes(pn);
6077 			}
6078 
6079 			/* We may be retrying this operation after dropping the PVH lock.
6080 			 * Cache attributes for the physical page may have changed while the lock
6081 			 * was dropped, so clear any cache attributes we may have previously set
6082 			 * in the PTE template. */
6083 			pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6084 			pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits, pa);
6085 
6086 #if XNU_MONITOR
6087 			/* The regular old kernel is not allowed to remap PPL pages. */
6088 			if (__improbable(ppattr_pa_test_monitor(pa))) {
6089 				panic("%s: page belongs to PPL, "
6090 				    "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
6091 				    __FUNCTION__,
6092 				    pmap, v, (void*)pa, prot, fault_type, flags, wired, options);
6093 			}
6094 
6095 			if (__improbable(pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN_MASK)) {
6096 				panic("%s: page locked down, "
6097 				    "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
6098 				    __FUNCTION__,
6099 				    pmap, v, (void *)pa, prot, fault_type, flags, wired, options);
6100 			}
6101 #endif
6102 
6103 
6104 
6105 			committed = pmap_enter_pte(pmap, pte_p, &spte, pte, v);
6106 			if (!committed) {
6107 				pvh_unlock(pai);
6108 				continue;
6109 			}
6110 			had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
6111 			/* End of transaction. Commit pv changes, pa bits, and memory accounting. */
6112 
6113 			assert(!had_valid_mapping || (pte_to_pa(spte) == pa));
6114 			/*
6115 			 * If there was already a valid pte here then we reuse its reference
6116 			 * on the ptd and drop the one that we took above.
6117 			 */
6118 			drop_refcnt = had_valid_mapping;
6119 
6120 			if (!had_valid_mapping) {
6121 				pv_entry_t *new_pve_p = PV_ENTRY_NULL;
6122 				int pve_ptep_idx = 0;
6123 				pv_status = pmap_enter_pv(pmap, pte_p, pai, options, lock_mode, &new_pve_p, &pve_ptep_idx);
6124 				/* We did all the allocations up top. So this shouldn't be able to fail. */
6125 				if (pv_status != PV_ALLOC_SUCCESS) {
6126 					panic("%s: unexpected pmap_enter_pv ret code: %d. new_pve_p=%p pmap=%p",
6127 					    __func__, pv_status, new_pve_p, pmap);
6128 				}
6129 
6130 				if (pmap != kernel_pmap) {
6131 					if (options & PMAP_OPTIONS_INTERNAL) {
6132 						ppattr_pve_set_internal(pai, new_pve_p, pve_ptep_idx);
6133 						if ((options & PMAP_OPTIONS_ALT_ACCT) ||
6134 						    PMAP_FOOTPRINT_SUSPENDED(pmap)) {
6135 							/*
6136 							 * Make a note to ourselves that this
6137 							 * mapping is using alternative
6138 							 * accounting. We'll need this in order
6139 							 * to know which ledger to debit when
6140 							 * the mapping is removed.
6141 							 *
6142 							 * The altacct bit must be set while
6143 							 * the pv head is locked. Defer the
6144 							 * ledger accounting until after we've
6145 							 * dropped the lock.
6146 							 */
6147 							ppattr_pve_set_altacct(pai, new_pve_p, pve_ptep_idx);
6148 							is_altacct = TRUE;
6149 						}
6150 					}
6151 					if (ppattr_test_reusable(pai) &&
6152 					    !is_altacct) {
6153 						is_reusable = TRUE;
6154 					} else if (options & PMAP_OPTIONS_INTERNAL) {
6155 						is_internal = TRUE;
6156 					} else {
6157 						is_external = TRUE;
6158 					}
6159 				}
6160 			}
6161 
6162 			pvh_unlock(pai);
6163 
6164 			if (pp_attr_bits != 0) {
6165 				ppattr_pa_set_bits(pa, pp_attr_bits);
6166 			}
6167 
6168 			if (!had_valid_mapping && (pmap != kernel_pmap)) {
6169 				pmap_ledger_credit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6170 
6171 				if (is_internal) {
6172 					/*
6173 					 * Make corresponding adjustments to
6174 					 * phys_footprint statistics.
6175 					 */
6176 					pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6177 					if (is_altacct) {
6178 						/*
6179 						 * If this page is internal and
6180 						 * in an IOKit region, credit
6181 						 * the task's total count of
6182 						 * dirty, internal IOKit pages.
6183 						 * It should *not* count towards
6184 						 * the task's total physical
6185 						 * memory footprint, because
6186 						 * this entire region was
6187 						 * already billed to the task
6188 						 * at the time the mapping was
6189 						 * created.
6190 						 *
6191 						 * Put another way, this is
6192 						 * internal++ and
6193 						 * alternate_accounting++, so
6194 						 * net effect on phys_footprint
6195 						 * is 0. That means: don't
6196 						 * touch phys_footprint here.
6197 						 */
6198 						pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6199 					} else {
6200 						if (ARM_PTE_IS_COMPRESSED(spte, pte_p) && !(spte & ARM_PTE_COMPRESSED_ALT)) {
6201 							/* Replacing a compressed page (with internal accounting). No change to phys_footprint. */
6202 							skip_footprint_debit = true;
6203 						} else {
6204 							pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6205 						}
6206 					}
6207 				}
6208 				if (is_reusable) {
6209 					pmap_ledger_credit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6210 				} else if (is_external) {
6211 					pmap_ledger_credit(pmap, task_ledgers.external, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6212 				}
6213 			}
6214 		} else {
6215 			if (prot & VM_PROT_EXECUTE) {
6216 				kr = KERN_FAILURE;
6217 				break;
6218 			}
6219 
6220 			wimg_bits = pmap_cache_attributes(pn);
6221 			if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6222 				wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6223 			}
6224 
6225 			pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits, pa);
6226 
6227 #if XNU_MONITOR
6228 			if ((wimg_bits & PP_ATTR_MONITOR) && !pmap_ppl_disable) {
6229 				uint64_t xprr_perm = pte_to_xprr_perm(pte);
6230 				switch (xprr_perm) {
6231 				case XPRR_KERN_RO_PERM:
6232 					break;
6233 				case XPRR_KERN_RW_PERM:
6234 					pte &= ~ARM_PTE_XPRR_MASK;
6235 					pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
6236 					break;
6237 				default:
6238 					panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
6239 				}
6240 			}
6241 #endif
6242 			committed = pmap_enter_pte(pmap, pte_p, &spte, pte, v);
6243 			if (committed) {
6244 				had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
6245 				assert(!had_valid_mapping || (pte_to_pa(spte) == pa));
6246 
6247 				/**
6248 				 * If there was already a valid pte here then we reuse its
6249 				 * reference on the ptd and drop the one that we took above.
6250 				 */
6251 				drop_refcnt = had_valid_mapping;
6252 			}
6253 		}
6254 		if (committed) {
6255 			if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
6256 				assert(pmap != kernel_pmap);
6257 
6258 				/* One less "compressed" */
6259 				pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
6260 				    pt_attr_page_size(pt_attr) * PAGE_RATIO);
6261 
6262 				if (spte & ARM_PTE_COMPRESSED_ALT) {
6263 					pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6264 				} else if (!skip_footprint_debit) {
6265 					/* Was part of the footprint */
6266 					pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6267 				}
6268 				/* The old entry held a reference so drop the extra one that we took above. */
6269 				drop_refcnt = true;
6270 			}
6271 		}
6272 	}
6273 
6274 	if (drop_refcnt && refcnt != NULL) {
6275 		assert(refcnt_updated);
6276 		if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
6277 			panic("pmap_enter(): over-release of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
6278 		}
6279 	}
6280 
6281 	if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
6282 		panic("pmap_enter(): over-unwire of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
6283 	}
6284 
6285 	pmap_unlock(pmap, lock_mode);
6286 
6287 	if (__improbable(ro_va && kr == KERN_SUCCESS)) {
6288 		pmap_phys_write_disable(v);
6289 	}
6290 
6291 	return kr;
6292 }
6293 
6294 kern_return_t
6295 pmap_enter_options_addr(
6296 	pmap_t pmap,
6297 	vm_map_address_t v,
6298 	pmap_paddr_t pa,
6299 	vm_prot_t prot,
6300 	vm_prot_t fault_type,
6301 	unsigned int flags,
6302 	boolean_t wired,
6303 	unsigned int options,
6304 	__unused void   *arg)
6305 {
6306 	kern_return_t kr = KERN_FAILURE;
6307 
6308 
6309 	PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6310 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pa, prot);
6311 
6312 
6313 	const bool nowait_requested = (options & PMAP_OPTIONS_NOWAIT) != 0;
6314 	do {
6315 #if XNU_MONITOR
6316 		kr = pmap_enter_options_ppl(pmap, v, pa, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT);
6317 #else
6318 		kr = pmap_enter_options_internal(pmap, v, pa, prot, fault_type, flags, wired, options);
6319 #endif
6320 
6321 		if (kr == KERN_RESOURCE_SHORTAGE) {
6322 #if XNU_MONITOR
6323 			pmap_alloc_page_for_ppl(nowait_requested ? PMAP_PAGES_ALLOCATE_NOWAIT : 0);
6324 #endif
6325 			if (nowait_requested) {
6326 				break;
6327 			}
6328 		}
6329 	} while (kr == KERN_RESOURCE_SHORTAGE || kr == KERN_ABORTED);
6330 
6331 #if XNU_MONITOR
6332 	pmap_ledger_check_balance(pmap);
6333 #endif
6334 
6335 	PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6336 
6337 	return kr;
6338 }
6339 
6340 kern_return_t
6341 pmap_enter_options(
6342 	pmap_t pmap,
6343 	vm_map_address_t v,
6344 	ppnum_t pn,
6345 	vm_prot_t prot,
6346 	vm_prot_t fault_type,
6347 	unsigned int flags,
6348 	boolean_t wired,
6349 	unsigned int options,
6350 	__unused void   *arg)
6351 {
6352 	return pmap_enter_options_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired, options, arg);
6353 }
6354 
6355 /*
6356  *	Routine:	pmap_change_wiring
6357  *	Function:	Change the wiring attribute for a map/virtual-address
6358  *			pair.
6359  *	In/out conditions:
6360  *			The mapping must already exist in the pmap.
6361  */
6362 MARK_AS_PMAP_TEXT void
6363 pmap_change_wiring_internal(
6364 	pmap_t pmap,
6365 	vm_map_address_t v,
6366 	boolean_t wired)
6367 {
6368 	pt_entry_t     *pte_p;
6369 	pmap_paddr_t    pa;
6370 
6371 	validate_pmap_mutable(pmap);
6372 
6373 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6374 
6375 	const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
6376 
6377 	pte_p = pmap_pte(pmap, v);
6378 	if (pte_p == PT_ENTRY_NULL) {
6379 		if (!wired) {
6380 			/*
6381 			 * The PTE may have already been cleared by a disconnect/remove operation, and the L3 table
6382 			 * may have been freed by a remove operation.
6383 			 */
6384 			goto pmap_change_wiring_return;
6385 		} else {
6386 			panic("%s: Attempt to wire nonexistent PTE for pmap %p", __func__, pmap);
6387 		}
6388 	}
6389 	/*
6390 	 * Use volatile loads to prevent the compiler from collapsing references to 'pa' back to loads of pte_p
6391 	 * until we've grabbed the final PVH lock; PTE contents may change during this time.
6392 	 */
6393 	pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
6394 
6395 	while (pa_valid(pa)) {
6396 		pmap_paddr_t new_pa;
6397 
6398 		pvh_lock(pa_index(pa));
6399 		new_pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
6400 
6401 		if (pa == new_pa) {
6402 			break;
6403 		}
6404 
6405 		pvh_unlock(pa_index(pa));
6406 		pa = new_pa;
6407 	}
6408 
6409 	/* PTE checks must be performed after acquiring the PVH lock (if applicable for the PA) */
6410 	if ((*pte_p == ARM_PTE_EMPTY) || (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
6411 		if (!wired) {
6412 			/* PTE cleared by prior remove/disconnect operation */
6413 			goto pmap_change_wiring_cleanup;
6414 		} else {
6415 			panic("%s: Attempt to wire empty/compressed PTE %p (=0x%llx) for pmap %p",
6416 			    __func__, pte_p, (uint64_t)*pte_p, pmap);
6417 		}
6418 	}
6419 
6420 	assertf((*pte_p & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", pte_p, (uint64_t)*pte_p);
6421 	if (wired != pte_is_wired(*pte_p)) {
6422 		pte_set_wired(pmap, pte_p, wired);
6423 		if (pmap != kernel_pmap) {
6424 			if (wired) {
6425 				pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6426 			} else if (!wired) {
6427 				pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6428 			}
6429 		}
6430 	}
6431 
6432 pmap_change_wiring_cleanup:
6433 	if (pa_valid(pa)) {
6434 		pvh_unlock(pa_index(pa));
6435 	}
6436 
6437 pmap_change_wiring_return:
6438 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6439 }
6440 
6441 void
6442 pmap_change_wiring(
6443 	pmap_t pmap,
6444 	vm_map_address_t v,
6445 	boolean_t wired)
6446 {
6447 #if XNU_MONITOR
6448 	pmap_change_wiring_ppl(pmap, v, wired);
6449 
6450 	pmap_ledger_check_balance(pmap);
6451 #else
6452 	pmap_change_wiring_internal(pmap, v, wired);
6453 #endif
6454 }
6455 
6456 MARK_AS_PMAP_TEXT pmap_paddr_t
6457 pmap_find_pa_internal(
6458 	pmap_t pmap,
6459 	addr64_t va)
6460 {
6461 	pmap_paddr_t    pa = 0;
6462 
6463 	validate_pmap(pmap);
6464 
6465 	if (pmap != kernel_pmap) {
6466 		pmap_lock(pmap, PMAP_LOCK_SHARED);
6467 	}
6468 
6469 	pa = pmap_vtophys(pmap, va);
6470 
6471 	if (pmap != kernel_pmap) {
6472 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
6473 	}
6474 
6475 	return pa;
6476 }
6477 
6478 pmap_paddr_t
6479 pmap_find_pa_nofault(pmap_t pmap, addr64_t va)
6480 {
6481 	pmap_paddr_t pa = 0;
6482 
6483 	if (pmap == kernel_pmap) {
6484 		pa = mmu_kvtop(va);
6485 	} else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
6486 		/*
6487 		 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
6488 		 * translation even if PAN would prevent kernel access through the translation.
6489 		 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
6490 		 */
6491 		pa = mmu_uvtop(va);
6492 	}
6493 	return pa;
6494 }
6495 
6496 pmap_paddr_t
6497 pmap_find_pa(
6498 	pmap_t pmap,
6499 	addr64_t va)
6500 {
6501 	pmap_paddr_t pa = pmap_find_pa_nofault(pmap, va);
6502 
6503 	if (pa != 0) {
6504 		return pa;
6505 	}
6506 
6507 	if (not_in_kdp) {
6508 #if XNU_MONITOR
6509 		return pmap_find_pa_ppl(pmap, va);
6510 #else
6511 		return pmap_find_pa_internal(pmap, va);
6512 #endif
6513 	} else {
6514 		return pmap_vtophys(pmap, va);
6515 	}
6516 }
6517 
6518 ppnum_t
6519 pmap_find_phys_nofault(
6520 	pmap_t pmap,
6521 	addr64_t va)
6522 {
6523 	ppnum_t ppn;
6524 	ppn = atop(pmap_find_pa_nofault(pmap, va));
6525 	return ppn;
6526 }
6527 
6528 ppnum_t
6529 pmap_find_phys(
6530 	pmap_t pmap,
6531 	addr64_t va)
6532 {
6533 	ppnum_t ppn;
6534 	ppn = atop(pmap_find_pa(pmap, va));
6535 	return ppn;
6536 }
6537 
6538 /**
6539  * Translate a kernel virtual address into a physical address.
6540  *
6541  * @param va The kernel virtual address to translate. Does not work on user
6542  *           virtual addresses.
6543  *
6544  * @return The physical address if the translation was successful, or zero if
6545  *         no valid mappings were found for the given virtual address.
6546  */
6547 pmap_paddr_t
6548 kvtophys(vm_offset_t va)
6549 {
6550 	/**
6551 	 * Attempt to do the translation first in hardware using the AT (address
6552 	 * translation) instruction. This will attempt to use the MMU to do the
6553 	 * translation for us.
6554 	 */
6555 	pmap_paddr_t pa = mmu_kvtop(va);
6556 
6557 	if (pa) {
6558 		return pa;
6559 	}
6560 
6561 	/* If the MMU can't find the mapping, then manually walk the page tables. */
6562 	return pmap_vtophys(kernel_pmap, va);
6563 }
6564 
6565 /**
6566  * Variant of kvtophys that can't fail. If no mapping is found or the mapping
6567  * points to a non-kernel-managed physical page, then this call will panic().
6568  *
6569  * @note The output of this function is guaranteed to be a kernel-managed
6570  *       physical page, which means it's safe to pass the output directly to
6571  *       pa_index() to create a physical address index for various pmap data
6572  *       structures.
6573  *
6574  * @param va The kernel virtual address to translate. Does not work on user
6575  *           virtual addresses.
6576  *
6577  * @return The translated physical address for the given virtual address.
6578  */
6579 pmap_paddr_t
6580 kvtophys_nofail(vm_offset_t va)
6581 {
6582 	pmap_paddr_t pa = kvtophys(va);
6583 
6584 	if (!pa_valid(pa)) {
6585 		panic("%s: Invalid or non-kernel-managed physical page returned, "
6586 		    "pa: %#llx, va: %p", __func__, (uint64_t)pa, (void *)va);
6587 	}
6588 
6589 	return pa;
6590 }
6591 
6592 pmap_paddr_t
6593 pmap_vtophys(
6594 	pmap_t pmap,
6595 	addr64_t va)
6596 {
6597 	if ((va < pmap->min) || (va >= pmap->max)) {
6598 		return 0;
6599 	}
6600 
6601 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6602 
6603 	tt_entry_t * ttp = NULL;
6604 	tt_entry_t * ttep = NULL;
6605 	tt_entry_t   tte = ARM_TTE_EMPTY;
6606 	pmap_paddr_t pa = 0;
6607 	unsigned int cur_level;
6608 
6609 	ttp = pmap->tte;
6610 
6611 	for (cur_level = pt_attr_root_level(pt_attr); cur_level <= pt_attr_leaf_level(pt_attr); cur_level++) {
6612 		ttep = &ttp[ttn_index(pt_attr, va, cur_level)];
6613 
6614 		tte = *ttep;
6615 
6616 		const uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
6617 		const uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
6618 		const uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
6619 		const uint64_t offmask = pt_attr->pta_level_info[cur_level].offmask;
6620 
6621 		if ((tte & valid_mask) != valid_mask) {
6622 			return (pmap_paddr_t) 0;
6623 		}
6624 
6625 		/* This detects both leaf entries and intermediate block mappings. */
6626 		if ((tte & type_mask) == type_block) {
6627 			pa = ((tte & ARM_TTE_PA_MASK & ~offmask) | (va & offmask));
6628 			break;
6629 		}
6630 
6631 		ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
6632 	}
6633 
6634 	return pa;
6635 }
6636 
6637 /*
6638  *	pmap_init_pte_page - Initialize a page table page.
6639  */
6640 MARK_AS_PMAP_TEXT void
6641 pmap_init_pte_page(
6642 	pmap_t pmap,
6643 	pt_entry_t *pte_p,
6644 	vm_offset_t va,
6645 	unsigned int ttlevel,
6646 	boolean_t alloc_ptd)
6647 {
6648 	pt_desc_t   *ptdp = NULL;
6649 	pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p)));
6650 
6651 	if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6652 		if (alloc_ptd) {
6653 			/*
6654 			 * This path should only be invoked from arm_vm_init.  If we are emulating 16KB pages
6655 			 * on 4KB hardware, we may already have allocated a page table descriptor for a
6656 			 * bootstrap request, so we check for an existing PTD here.
6657 			 */
6658 			ptdp = ptd_alloc(pmap);
6659 			if (ptdp == NULL) {
6660 				panic("%s: unable to allocate PTD", __func__);
6661 			}
6662 			pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
6663 			/* Clear all PVH flags when using a page for a PTD to avoid tripping unexpected page flag usage checks. */
6664 			pvh_set_flags(pvh, 0);
6665 		} else {
6666 			panic("pmap_init_pte_page(): pte_p %p", pte_p);
6667 		}
6668 	} else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
6669 		ptdp = pvh_ptd(pvh);
6670 	} else {
6671 		panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
6672 	}
6673 
6674 	// below barrier ensures previous updates to the page are visible to PTW before
6675 	// it is linked to the PTE of previous level
6676 	__builtin_arm_dmb(DMB_ISHST);
6677 	ptd_info_init(ptdp, pmap, va, ttlevel, pte_p);
6678 }
6679 
6680 /*
6681  *	Routine:	pmap_expand
6682  *
6683  *	Expands a pmap to be able to map the specified virtual address.
6684  *
6685  *	Allocates new memory for the default (COARSE) translation table
6686  *	entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6687  *	also allocates space for the corresponding pv entries.
6688  *
6689  *	Nothing should be locked.
6690  */
6691 MARK_AS_PMAP_TEXT static kern_return_t
6692 pmap_expand(
6693 	pmap_t pmap,
6694 	vm_map_address_t v,
6695 	unsigned int options,
6696 	unsigned int level)
6697 {
6698 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6699 
6700 	if (__improbable((v < pmap->min) || (v >= pmap->max))) {
6701 		return KERN_INVALID_ADDRESS;
6702 	}
6703 	pmap_paddr_t    pa;
6704 	unsigned int    ttlevel = pt_attr_root_level(pt_attr);
6705 	tt_entry_t              *tte_p;
6706 	tt_entry_t              *tt_p;
6707 
6708 	pa = 0x0ULL;
6709 	tt_p =  (tt_entry_t *)NULL;
6710 
6711 	for (; ttlevel < level; ttlevel++) {
6712 		pmap_lock(pmap, PMAP_LOCK_SHARED);
6713 
6714 		if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
6715 			pmap_unlock(pmap, PMAP_LOCK_SHARED);
6716 			while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6717 				if (options & PMAP_OPTIONS_NOWAIT) {
6718 					return KERN_RESOURCE_SHORTAGE;
6719 				}
6720 #if XNU_MONITOR
6721 				panic("%s: failed to allocate tt, "
6722 				    "pmap=%p, v=%p, options=0x%x, level=%u",
6723 				    __FUNCTION__,
6724 				    pmap, (void *)v, options, level);
6725 #else
6726 				VM_PAGE_WAIT();
6727 #endif
6728 			}
6729 			pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6730 			if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
6731 				pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE);
6732 				pa = kvtophys_nofail((vm_offset_t)tt_p);
6733 				tte_p = pmap_ttne(pmap, ttlevel, v);
6734 				*tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6735 				PMAP_TRACE(4 + ttlevel, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
6736 				    VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
6737 				pa = 0x0ULL;
6738 				tt_p = (tt_entry_t *)NULL;
6739 			}
6740 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6741 		} else {
6742 			pmap_unlock(pmap, PMAP_LOCK_SHARED);
6743 		}
6744 
6745 		if (tt_p != (tt_entry_t *)NULL) {
6746 			pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
6747 			tt_p = (tt_entry_t *)NULL;
6748 		}
6749 	}
6750 
6751 	return KERN_SUCCESS;
6752 }
6753 
6754 /*
6755  *	Routine:	pmap_gc
6756  *	Function:
6757  *              Pmap garbage collection
6758  *		Called by the pageout daemon when pages are scarce.
6759  *
6760  */
6761 void
6762 pmap_gc(void)
6763 {
6764 	/*
6765 	 * TODO: as far as I can tell this has never been implemented to do anything meaninful.
6766 	 * We can't just destroy any old pmap on the chance that it may be active on a CPU
6767 	 * or may contain wired mappings.  However, with the relatively recent change to
6768 	 * make pmap_page_reclaim() non-fatal in the event that it doesn't find an eligible
6769 	 * page, it may make sense to call that function here.
6770 	 */
6771 }
6772 
6773 /*
6774  *      By default, don't attempt pmap GC more frequently
6775  *      than once / 1 minutes.
6776  */
6777 
6778 void
6779 compute_pmap_gc_throttle(
6780 	void *arg __unused)
6781 {
6782 }
6783 
6784 /*
6785  * pmap_attribute_cache_sync(vm_offset_t pa)
6786  *
6787  * Invalidates all of the instruction cache on a physical page and
6788  * pushes any dirty data from the data cache for the same physical page
6789  */
6790 
6791 kern_return_t
6792 pmap_attribute_cache_sync(
6793 	ppnum_t pp,
6794 	vm_size_t size,
6795 	__unused vm_machine_attribute_t attribute,
6796 	__unused vm_machine_attribute_val_t * value)
6797 {
6798 	if (size > PAGE_SIZE) {
6799 		panic("pmap_attribute_cache_sync size: 0x%llx", (uint64_t)size);
6800 	} else {
6801 		cache_sync_page(pp);
6802 	}
6803 
6804 	return KERN_SUCCESS;
6805 }
6806 
6807 /*
6808  * pmap_sync_page_data_phys(ppnum_t pp)
6809  *
6810  * Invalidates all of the instruction cache on a physical page and
6811  * pushes any dirty data from the data cache for the same physical page
6812  */
6813 void
6814 pmap_sync_page_data_phys(
6815 	ppnum_t pp)
6816 {
6817 	cache_sync_page(pp);
6818 }
6819 
6820 /*
6821  * pmap_sync_page_attributes_phys(ppnum_t pp)
6822  *
6823  * Write back and invalidate all cachelines on a physical page.
6824  */
6825 void
6826 pmap_sync_page_attributes_phys(
6827 	ppnum_t pp)
6828 {
6829 	flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
6830 }
6831 
6832 #if CONFIG_COREDUMP
6833 /* temporary workaround */
6834 boolean_t
6835 coredumpok(
6836 	vm_map_t map,
6837 	mach_vm_offset_t va)
6838 {
6839 	pt_entry_t     *pte_p;
6840 	pt_entry_t      spte;
6841 
6842 	pte_p = pmap_pte(map->pmap, va);
6843 	if (0 == pte_p) {
6844 		return FALSE;
6845 	}
6846 	if (vm_map_entry_has_device_pager(map, va)) {
6847 		return FALSE;
6848 	}
6849 	spte = *pte_p;
6850 	return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
6851 }
6852 #endif
6853 
6854 void
6855 fillPage(
6856 	ppnum_t pn,
6857 	unsigned int fill)
6858 {
6859 	unsigned int   *addr;
6860 	int             count;
6861 
6862 	addr = (unsigned int *) phystokv(ptoa(pn));
6863 	count = PAGE_SIZE / sizeof(unsigned int);
6864 	while (count--) {
6865 		*addr++ = fill;
6866 	}
6867 }
6868 
6869 extern void     mapping_set_mod(ppnum_t pn);
6870 
6871 void
6872 mapping_set_mod(
6873 	ppnum_t pn)
6874 {
6875 	pmap_set_modify(pn);
6876 }
6877 
6878 extern void     mapping_set_ref(ppnum_t pn);
6879 
6880 void
6881 mapping_set_ref(
6882 	ppnum_t pn)
6883 {
6884 	pmap_set_reference(pn);
6885 }
6886 
6887 /*
6888  * Clear specified attribute bits.
6889  *
6890  * Try to force an arm_fast_fault() for all mappings of
6891  * the page - to force attributes to be set again at fault time.
6892  * If the forcing succeeds, clear the cached bits at the head.
6893  * Otherwise, something must have been wired, so leave the cached
6894  * attributes alone.
6895  */
6896 MARK_AS_PMAP_TEXT static void
6897 phys_attribute_clear_with_flush_range(
6898 	ppnum_t         pn,
6899 	unsigned int    bits,
6900 	int             options,
6901 	void            *arg,
6902 	pmap_tlb_flush_range_t *flush_range)
6903 {
6904 	pmap_paddr_t    pa = ptoa(pn);
6905 	vm_prot_t       allow_mode = VM_PROT_ALL;
6906 
6907 #if XNU_MONITOR
6908 	if (__improbable(bits & PP_ATTR_PPL_OWNED_BITS)) {
6909 		panic("%s: illegal request, "
6910 		    "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
6911 		    __FUNCTION__,
6912 		    pn, bits, options, arg, flush_range);
6913 	}
6914 #endif
6915 	if ((arg != NULL) || (flush_range != NULL)) {
6916 		options = options & ~PMAP_OPTIONS_NOFLUSH;
6917 	}
6918 
6919 	if (__improbable((bits & PP_ATTR_MODIFIED) &&
6920 	    (options & PMAP_OPTIONS_NOFLUSH))) {
6921 		panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
6922 		    "should not clear 'modified' without flushing TLBs\n",
6923 		    pn, bits, options, arg, flush_range);
6924 	}
6925 
6926 	assert(pn != vm_page_fictitious_addr);
6927 
6928 	if (options & PMAP_OPTIONS_CLEAR_WRITE) {
6929 		assert(bits == PP_ATTR_MODIFIED);
6930 
6931 		pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), options, flush_range);
6932 		/*
6933 		 * We short circuit this case; it should not need to
6934 		 * invoke arm_force_fast_fault, so just clear the modified bit.
6935 		 * pmap_page_protect has taken care of resetting
6936 		 * the state so that we'll see the next write as a fault to
6937 		 * the VM (i.e. we don't want a fast fault).
6938 		 */
6939 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
6940 		return;
6941 	}
6942 	if (bits & PP_ATTR_REFERENCED) {
6943 		allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
6944 	}
6945 	if (bits & PP_ATTR_MODIFIED) {
6946 		allow_mode &= ~VM_PROT_WRITE;
6947 	}
6948 
6949 	if (bits == PP_ATTR_NOENCRYPT) {
6950 		/*
6951 		 * We short circuit this case; it should not need to
6952 		 * invoke arm_force_fast_fault, so just clear and
6953 		 * return.  On ARM, this bit is just a debugging aid.
6954 		 */
6955 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
6956 		return;
6957 	}
6958 
6959 	if (arm_force_fast_fault_with_flush_range(pn, allow_mode, options, flush_range)) {
6960 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
6961 	}
6962 }
6963 
6964 MARK_AS_PMAP_TEXT void
6965 phys_attribute_clear_internal(
6966 	ppnum_t         pn,
6967 	unsigned int    bits,
6968 	int             options,
6969 	void            *arg)
6970 {
6971 	phys_attribute_clear_with_flush_range(pn, bits, options, arg, NULL);
6972 }
6973 
6974 #if __ARM_RANGE_TLBI__
6975 MARK_AS_PMAP_TEXT static vm_map_address_t
6976 phys_attribute_clear_twig_internal(
6977 	pmap_t pmap,
6978 	vm_map_address_t start,
6979 	vm_map_address_t end,
6980 	unsigned int bits,
6981 	unsigned int options,
6982 	pmap_tlb_flush_range_t *flush_range)
6983 {
6984 	pmap_assert_locked(pmap, PMAP_LOCK_SHARED);
6985 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6986 	assert(end >= start);
6987 	assert((end - start) <= pt_attr_twig_size(pt_attr));
6988 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
6989 	vm_map_address_t va = start;
6990 	pt_entry_t     *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
6991 	tt_entry_t     *tte_p;
6992 	tte_p = pmap_tte(pmap, start);
6993 	unsigned int npages = 0;
6994 
6995 	if (tte_p == (tt_entry_t *) NULL) {
6996 		return end;
6997 	}
6998 
6999 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7000 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
7001 
7002 		start_pte_p = &pte_p[pte_index(pt_attr, start)];
7003 		end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
7004 		assert(end_pte_p >= start_pte_p);
7005 		for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++, va += pmap_page_size) {
7006 			if (__improbable(npages++ && pmap_pending_preemption())) {
7007 				return va;
7008 			}
7009 			pmap_paddr_t pa = pte_to_pa(*((volatile pt_entry_t*)curr_pte_p));
7010 			if (pa_valid(pa)) {
7011 				ppnum_t pn = (ppnum_t) atop(pa);
7012 				phys_attribute_clear_with_flush_range(pn, bits, options, NULL, flush_range);
7013 			}
7014 		}
7015 	}
7016 	return end;
7017 }
7018 
7019 MARK_AS_PMAP_TEXT vm_map_address_t
7020 phys_attribute_clear_range_internal(
7021 	pmap_t pmap,
7022 	vm_map_address_t start,
7023 	vm_map_address_t end,
7024 	unsigned int bits,
7025 	unsigned int options)
7026 {
7027 	if (__improbable(end < start)) {
7028 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
7029 	}
7030 	validate_pmap_mutable(pmap);
7031 
7032 	vm_map_address_t va = start;
7033 	pmap_tlb_flush_range_t flush_range = {
7034 		.ptfr_pmap = pmap,
7035 		.ptfr_start = start,
7036 		.ptfr_end = end,
7037 		.ptfr_flush_needed = false
7038 	};
7039 
7040 	pmap_lock(pmap, PMAP_LOCK_SHARED);
7041 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7042 
7043 	while (va < end) {
7044 		vm_map_address_t curr_end;
7045 
7046 		curr_end = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
7047 		if (curr_end > end) {
7048 			curr_end = end;
7049 		}
7050 
7051 		va = phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
7052 		if ((va < curr_end) || pmap_pending_preemption()) {
7053 			break;
7054 		}
7055 	}
7056 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
7057 	if (flush_range.ptfr_flush_needed) {
7058 		flush_range.ptfr_end = va;
7059 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(
7060 			flush_range.ptfr_start,
7061 			flush_range.ptfr_end - flush_range.ptfr_start,
7062 			flush_range.ptfr_pmap,
7063 			true);
7064 		sync_tlb_flush();
7065 	}
7066 	return va;
7067 }
7068 
7069 static void
7070 phys_attribute_clear_range(
7071 	pmap_t pmap,
7072 	vm_map_address_t start,
7073 	vm_map_address_t end,
7074 	unsigned int bits,
7075 	unsigned int options)
7076 {
7077 	/*
7078 	 * We allow single-page requests to execute non-preemptibly,
7079 	 * as it doesn't make sense to sample AST_URGENT for a single-page
7080 	 * operation, and there are a couple of special use cases that
7081 	 * require a non-preemptible single-page operation.
7082 	 */
7083 	if ((end - start) > (pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO)) {
7084 		pmap_verify_preemptible();
7085 	}
7086 
7087 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
7088 
7089 	while (start < end) {
7090 #if XNU_MONITOR
7091 		start = phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
7092 #else
7093 		start = phys_attribute_clear_range_internal(pmap, start, end, bits, options);
7094 #endif
7095 	}
7096 
7097 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
7098 }
7099 #endif /* __ARM_RANGE_TLBI__ */
7100 
7101 static void
7102 phys_attribute_clear(
7103 	ppnum_t         pn,
7104 	unsigned int    bits,
7105 	int             options,
7106 	void            *arg)
7107 {
7108 	/*
7109 	 * Do we really want this tracepoint?  It will be extremely chatty.
7110 	 * Also, should we have a corresponding trace point for the set path?
7111 	 */
7112 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7113 
7114 #if XNU_MONITOR
7115 	phys_attribute_clear_ppl(pn, bits, options, arg);
7116 #else
7117 	phys_attribute_clear_internal(pn, bits, options, arg);
7118 #endif
7119 
7120 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7121 }
7122 
7123 /*
7124  *	Set specified attribute bits.
7125  *
7126  *	Set cached value in the pv head because we have
7127  *	no per-mapping hardware support for referenced and
7128  *	modify bits.
7129  */
7130 MARK_AS_PMAP_TEXT void
7131 phys_attribute_set_internal(
7132 	ppnum_t pn,
7133 	unsigned int bits)
7134 {
7135 	pmap_paddr_t    pa = ptoa(pn);
7136 	assert(pn != vm_page_fictitious_addr);
7137 
7138 #if XNU_MONITOR
7139 	if (bits & PP_ATTR_PPL_OWNED_BITS) {
7140 		panic("%s: illegal request, "
7141 		    "pn=%u, bits=%#x",
7142 		    __FUNCTION__,
7143 		    pn, bits);
7144 	}
7145 #endif
7146 
7147 	ppattr_pa_set_bits(pa, (uint16_t)bits);
7148 
7149 	return;
7150 }
7151 
7152 static void
7153 phys_attribute_set(
7154 	ppnum_t pn,
7155 	unsigned int bits)
7156 {
7157 #if XNU_MONITOR
7158 	phys_attribute_set_ppl(pn, bits);
7159 #else
7160 	phys_attribute_set_internal(pn, bits);
7161 #endif
7162 }
7163 
7164 
7165 /*
7166  *	Check specified attribute bits.
7167  *
7168  *	use the software cached bits (since no hw support).
7169  */
7170 static boolean_t
7171 phys_attribute_test(
7172 	ppnum_t pn,
7173 	unsigned int bits)
7174 {
7175 	pmap_paddr_t    pa = ptoa(pn);
7176 	assert(pn != vm_page_fictitious_addr);
7177 	return ppattr_pa_test_bits(pa, (pp_attr_t)bits);
7178 }
7179 
7180 
7181 /*
7182  *	Set the modify/reference bits on the specified physical page.
7183  */
7184 void
7185 pmap_set_modify(ppnum_t pn)
7186 {
7187 	phys_attribute_set(pn, PP_ATTR_MODIFIED);
7188 }
7189 
7190 
7191 /*
7192  *	Clear the modify bits on the specified physical page.
7193  */
7194 void
7195 pmap_clear_modify(
7196 	ppnum_t pn)
7197 {
7198 	phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7199 }
7200 
7201 
7202 /*
7203  *	pmap_is_modified:
7204  *
7205  *	Return whether or not the specified physical page is modified
7206  *	by any physical maps.
7207  */
7208 boolean_t
7209 pmap_is_modified(
7210 	ppnum_t pn)
7211 {
7212 	return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7213 }
7214 
7215 
7216 /*
7217  *	Set the reference bit on the specified physical page.
7218  */
7219 static void
7220 pmap_set_reference(
7221 	ppnum_t pn)
7222 {
7223 	phys_attribute_set(pn, PP_ATTR_REFERENCED);
7224 }
7225 
7226 /*
7227  *	Clear the reference bits on the specified physical page.
7228  */
7229 void
7230 pmap_clear_reference(
7231 	ppnum_t pn)
7232 {
7233 	phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7234 }
7235 
7236 
7237 /*
7238  *	pmap_is_referenced:
7239  *
7240  *	Return whether or not the specified physical page is referenced
7241  *	by any physical maps.
7242  */
7243 boolean_t
7244 pmap_is_referenced(
7245 	ppnum_t pn)
7246 {
7247 	return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7248 }
7249 
7250 /*
7251  * pmap_get_refmod(phys)
7252  *  returns the referenced and modified bits of the specified
7253  *  physical page.
7254  */
7255 unsigned int
7256 pmap_get_refmod(
7257 	ppnum_t pn)
7258 {
7259 	return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7260 	       | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
7261 }
7262 
7263 static inline unsigned int
7264 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask)
7265 {
7266 	return ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7267 	       ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7268 }
7269 
7270 /*
7271  * pmap_clear_refmod(phys, mask)
7272  *  clears the referenced and modified bits as specified by the mask
7273  *  of the specified physical page.
7274  */
7275 void
7276 pmap_clear_refmod_options(
7277 	ppnum_t         pn,
7278 	unsigned int    mask,
7279 	unsigned int    options,
7280 	void            *arg)
7281 {
7282 	unsigned int    bits;
7283 
7284 	bits = pmap_clear_refmod_mask_to_modified_bits(mask);
7285 	phys_attribute_clear(pn, bits, options, arg);
7286 }
7287 
7288 /*
7289  * Perform pmap_clear_refmod_options on a virtual address range.
7290  * The operation will be performed in bulk & tlb flushes will be coalesced
7291  * if possible.
7292  *
7293  * Returns true if the operation is supported on this platform.
7294  * If this function returns false, the operation is not supported and
7295  * nothing has been modified in the pmap.
7296  */
7297 bool
7298 pmap_clear_refmod_range_options(
7299 	pmap_t pmap __unused,
7300 	vm_map_address_t start __unused,
7301 	vm_map_address_t end __unused,
7302 	unsigned int mask __unused,
7303 	unsigned int options __unused)
7304 {
7305 #if __ARM_RANGE_TLBI__
7306 	unsigned int    bits;
7307 	bits = pmap_clear_refmod_mask_to_modified_bits(mask);
7308 	phys_attribute_clear_range(pmap, start, end, bits, options);
7309 	return true;
7310 #else /* __ARM_RANGE_TLBI__ */
7311 #pragma unused(pmap, start, end, mask, options)
7312 	/*
7313 	 * This operation allows the VM to bulk modify refmod bits on a virtually
7314 	 * contiguous range of addresses. This is large performance improvement on
7315 	 * platforms that support ranged tlbi instructions. But on older platforms,
7316 	 * we can only flush per-page or the entire asid. So we currently
7317 	 * only support this operation on platforms that support ranged tlbi.
7318 	 * instructions. On other platforms, we require that
7319 	 * the VM modify the bits on a per-page basis.
7320 	 */
7321 	return false;
7322 #endif /* __ARM_RANGE_TLBI__ */
7323 }
7324 
7325 void
7326 pmap_clear_refmod(
7327 	ppnum_t pn,
7328 	unsigned int mask)
7329 {
7330 	pmap_clear_refmod_options(pn, mask, 0, NULL);
7331 }
7332 
7333 unsigned int
7334 pmap_disconnect_options(
7335 	ppnum_t pn,
7336 	unsigned int options,
7337 	void *arg)
7338 {
7339 	if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7340 		/*
7341 		 * On ARM, the "modified" bit is managed by software, so
7342 		 * we know up-front if the physical page is "modified",
7343 		 * without having to scan all the PTEs pointing to it.
7344 		 * The caller should have made the VM page "busy" so noone
7345 		 * should be able to establish any new mapping and "modify"
7346 		 * the page behind us.
7347 		 */
7348 		if (pmap_is_modified(pn)) {
7349 			/*
7350 			 * The page has been modified and will be sent to
7351 			 * the VM compressor.
7352 			 */
7353 			options |= PMAP_OPTIONS_COMPRESSOR;
7354 		} else {
7355 			/*
7356 			 * The page hasn't been modified and will be freed
7357 			 * instead of compressed.
7358 			 */
7359 		}
7360 	}
7361 
7362 	/* disconnect the page */
7363 	pmap_page_protect_options(pn, 0, options, arg);
7364 
7365 	/* return ref/chg status */
7366 	return pmap_get_refmod(pn);
7367 }
7368 
7369 /*
7370  *	Routine:
7371  *		pmap_disconnect
7372  *
7373  *	Function:
7374  *		Disconnect all mappings for this page and return reference and change status
7375  *		in generic format.
7376  *
7377  */
7378 unsigned int
7379 pmap_disconnect(
7380 	ppnum_t pn)
7381 {
7382 	pmap_page_protect(pn, 0);       /* disconnect the page */
7383 	return pmap_get_refmod(pn);   /* return ref/chg status */
7384 }
7385 
7386 boolean_t
7387 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7388 {
7389 	if (ptoa(first) >= vm_last_phys) {
7390 		return FALSE;
7391 	}
7392 	if (ptoa(last) < vm_first_phys) {
7393 		return FALSE;
7394 	}
7395 
7396 	return TRUE;
7397 }
7398 
7399 /*
7400  * The state maintained by the noencrypt functions is used as a
7401  * debugging aid on ARM.  This incurs some overhead on the part
7402  * of the caller.  A special case check in phys_attribute_clear
7403  * (the most expensive path) currently minimizes this overhead,
7404  * but stubbing these functions out on RELEASE kernels yields
7405  * further wins.
7406  */
7407 boolean_t
7408 pmap_is_noencrypt(
7409 	ppnum_t pn)
7410 {
7411 #if DEVELOPMENT || DEBUG
7412 	boolean_t result = FALSE;
7413 
7414 	if (!pa_valid(ptoa(pn))) {
7415 		return FALSE;
7416 	}
7417 
7418 	result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7419 
7420 	return result;
7421 #else
7422 #pragma unused(pn)
7423 	return FALSE;
7424 #endif
7425 }
7426 
7427 void
7428 pmap_set_noencrypt(
7429 	ppnum_t pn)
7430 {
7431 #if DEVELOPMENT || DEBUG
7432 	if (!pa_valid(ptoa(pn))) {
7433 		return;
7434 	}
7435 
7436 	phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7437 #else
7438 #pragma unused(pn)
7439 #endif
7440 }
7441 
7442 void
7443 pmap_clear_noencrypt(
7444 	ppnum_t pn)
7445 {
7446 #if DEVELOPMENT || DEBUG
7447 	if (!pa_valid(ptoa(pn))) {
7448 		return;
7449 	}
7450 
7451 	phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7452 #else
7453 #pragma unused(pn)
7454 #endif
7455 }
7456 
7457 #if XNU_MONITOR
7458 boolean_t
7459 pmap_is_monitor(ppnum_t pn)
7460 {
7461 	assert(pa_valid(ptoa(pn)));
7462 	return phys_attribute_test(pn, PP_ATTR_MONITOR);
7463 }
7464 #endif
7465 
7466 void
7467 pmap_lock_phys_page(ppnum_t pn)
7468 {
7469 #if !XNU_MONITOR
7470 	unsigned int    pai;
7471 	pmap_paddr_t    phys = ptoa(pn);
7472 
7473 	if (pa_valid(phys)) {
7474 		pai = pa_index(phys);
7475 		pvh_lock(pai);
7476 	} else
7477 #else
7478 	(void)pn;
7479 #endif
7480 	{ simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
7481 }
7482 
7483 
7484 void
7485 pmap_unlock_phys_page(ppnum_t pn)
7486 {
7487 #if !XNU_MONITOR
7488 	unsigned int    pai;
7489 	pmap_paddr_t    phys = ptoa(pn);
7490 
7491 	if (pa_valid(phys)) {
7492 		pai = pa_index(phys);
7493 		pvh_unlock(pai);
7494 	} else
7495 #else
7496 	(void)pn;
7497 #endif
7498 	{ simple_unlock(&phys_backup_lock);}
7499 }
7500 
7501 MARK_AS_PMAP_TEXT static void
7502 pmap_switch_user_ttb(pmap_t pmap, pmap_cpu_data_t *cpu_data_ptr)
7503 {
7504 	if (pmap != kernel_pmap) {
7505 		cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7506 		cpu_data_ptr->cpu_nested_pmap_attr = (cpu_data_ptr->cpu_nested_pmap == NULL) ?
7507 		    NULL : pmap_get_pt_attr(cpu_data_ptr->cpu_nested_pmap);
7508 		cpu_data_ptr->cpu_nested_region_addr = pmap->nested_region_addr;
7509 		cpu_data_ptr->cpu_nested_region_size = pmap->nested_region_size;
7510 #if __ARM_MIXED_PAGE_SIZE__
7511 		cpu_data_ptr->commpage_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
7512 #endif
7513 	}
7514 
7515 
7516 #if __ARM_MIXED_PAGE_SIZE__
7517 	if ((pmap != kernel_pmap) && (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr())) {
7518 		set_tcr(pmap_get_pt_attr(pmap)->pta_tcr_value);
7519 	}
7520 #endif /* __ARM_MIXED_PAGE_SIZE__ */
7521 
7522 
7523 	if (pmap != kernel_pmap) {
7524 		set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
7525 	} else if (!pmap_user_ttb_is_clear()) {
7526 		pmap_clear_user_ttb_internal();
7527 	}
7528 }
7529 
7530 MARK_AS_PMAP_TEXT void
7531 pmap_clear_user_ttb_internal(void)
7532 {
7533 	set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7534 }
7535 
7536 void
7537 pmap_clear_user_ttb(void)
7538 {
7539 	PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_START, NULL, 0, 0);
7540 #if XNU_MONITOR
7541 	pmap_clear_user_ttb_ppl();
7542 #else
7543 	pmap_clear_user_ttb_internal();
7544 #endif
7545 	PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_END);
7546 }
7547 
7548 
7549 #if defined(__arm64__)
7550 /*
7551  * Marker for use in multi-pass fast-fault PV list processing.
7552  * ARM_PTE_COMPRESSED should never otherwise be set on PTEs processed by
7553  * these functions, as compressed PTEs should never be present in PV lists.
7554  * Note that this only holds true for arm64; for arm32 we don't have enough
7555  * SW bits in the PTE, so the same bit does double-duty as the COMPRESSED
7556  * and WRITEABLE marker depending on whether the PTE is valid.
7557  */
7558 #define ARM_PTE_FF_MARKER ARM_PTE_COMPRESSED
7559 _Static_assert(ARM_PTE_COMPRESSED != ARM_PTE_WRITEABLE, "compressed bit aliases writeable");
7560 _Static_assert(ARM_PTE_COMPRESSED != ARM_PTE_WIRED, "compressed bit aliases wired");
7561 #endif
7562 
7563 
7564 MARK_AS_PMAP_TEXT static boolean_t
7565 arm_force_fast_fault_with_flush_range(
7566 	ppnum_t         ppnum,
7567 	vm_prot_t       allow_mode,
7568 	int             options,
7569 	pmap_tlb_flush_range_t *flush_range)
7570 {
7571 	pmap_paddr_t     phys = ptoa(ppnum);
7572 	pv_entry_t      *pve_p;
7573 	pt_entry_t      *pte_p;
7574 	unsigned int     pai;
7575 	unsigned int     pass1_updated = 0;
7576 	unsigned int     pass2_updated = 0;
7577 	boolean_t        result;
7578 	pv_entry_t     **pv_h;
7579 	bool             is_reusable;
7580 	bool             ref_fault;
7581 	bool             mod_fault;
7582 	bool             clear_write_fault = false;
7583 	bool             ref_aliases_mod = false;
7584 	bool             mustsynch = ((options & PMAP_OPTIONS_FF_LOCKED) == 0);
7585 
7586 	assert(ppnum != vm_page_fictitious_addr);
7587 
7588 	if (!pa_valid(phys)) {
7589 		return FALSE;   /* Not a managed page. */
7590 	}
7591 
7592 	result = TRUE;
7593 	ref_fault = false;
7594 	mod_fault = false;
7595 	pai = pa_index(phys);
7596 	if (__probable(mustsynch)) {
7597 		pvh_lock(pai);
7598 	}
7599 	pv_h = pai_to_pvh(pai);
7600 
7601 #if XNU_MONITOR
7602 	if (__improbable(ppattr_pa_test_monitor(phys))) {
7603 		panic("%s: PA 0x%llx belongs to PPL.", __func__, (uint64_t)phys);
7604 	}
7605 #endif
7606 	pte_p = PT_ENTRY_NULL;
7607 	pve_p = PV_ENTRY_NULL;
7608 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7609 		pte_p = pvh_ptep(pv_h);
7610 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7611 		pve_p = pvh_pve_list(pv_h);
7612 	} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
7613 		panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)phys);
7614 	}
7615 
7616 	is_reusable = ppattr_test_reusable(pai);
7617 
7618 	/*
7619 	 * issue_tlbi is used to indicate that this function will need to issue at least one TLB
7620 	 * invalidation during pass 2.  tlb_flush_needed only indicates that PTE permissions have
7621 	 * changed and that a TLB flush will be needed *at some point*, so we'll need to call
7622 	 * FLUSH_PTE_STRONG() to synchronize prior PTE updates.  In the case of a flush_range
7623 	 * operation, TLB invalidation may be handled by the caller so it's possible for
7624 	 * tlb_flush_needed to be true while issue_tlbi is false.
7625 	 */
7626 	bool issue_tlbi = false;
7627 	bool tlb_flush_needed = false;
7628 
7629 	pv_entry_t *orig_pve_p = pve_p;
7630 	pt_entry_t *orig_pte_p = pte_p;
7631 	int pve_ptep_idx = 0;
7632 
7633 	/*
7634 	 * Pass 1: Make any necessary PTE updates, marking PTEs that will require
7635 	 * TLB invalidation in pass 2.
7636 	 */
7637 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7638 		pt_entry_t       spte;
7639 		pt_entry_t       tmplate;
7640 
7641 		if (pve_p != PV_ENTRY_NULL) {
7642 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
7643 			if (pte_p == PT_ENTRY_NULL) {
7644 				goto fff_skip_pve_pass1;
7645 			}
7646 		}
7647 
7648 #ifdef PVH_FLAG_IOMMU
7649 		if (pvh_ptep_is_iommu(pte_p)) {
7650 			goto fff_skip_pve_pass1;
7651 		}
7652 #endif
7653 		if (*pte_p == ARM_PTE_EMPTY) {
7654 			panic("pte is empty: pte_p=%p ppnum=0x%x", pte_p, ppnum);
7655 		}
7656 		if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
7657 			panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x", pte_p, ppnum);
7658 		}
7659 
7660 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
7661 		const pmap_t pmap = ptdp->pmap;
7662 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
7663 		const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7664 
7665 		assert(va >= pmap->min && va < pmap->max);
7666 
7667 		/* update pmap stats and ledgers */
7668 		const bool is_internal = ppattr_pve_is_internal(pai, pve_p, pve_ptep_idx);
7669 		const bool is_altacct = ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx);
7670 		if (is_altacct) {
7671 			/*
7672 			 * We do not track "reusable" status for
7673 			 * "alternate accounting" mappings.
7674 			 */
7675 		} else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7676 		    is_reusable &&
7677 		    is_internal &&
7678 		    pmap != kernel_pmap) {
7679 			/* one less "reusable" */
7680 			pmap_ledger_debit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7681 			/* one more "internal" */
7682 			pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7683 			pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7684 
7685 			/*
7686 			 * Since the page is being marked non-reusable, we assume that it will be
7687 			 * modified soon.  Avoid the cost of another trap to handle the fast
7688 			 * fault when we next write to this page.
7689 			 */
7690 			clear_write_fault = true;
7691 		} else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7692 		    !is_reusable &&
7693 		    is_internal &&
7694 		    pmap != kernel_pmap) {
7695 			/* one more "reusable" */
7696 			pmap_ledger_credit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7697 			pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7698 			pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7699 		}
7700 
7701 		bool wiredskip = pte_is_wired(*pte_p) &&
7702 		    ((options & PMAP_OPTIONS_FF_WIRED) == 0);
7703 
7704 		if (wiredskip) {
7705 			result = FALSE;
7706 			goto fff_skip_pve_pass1;
7707 		}
7708 
7709 		spte = *pte_p;
7710 		tmplate = spte;
7711 
7712 		if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
7713 			/* read protection sets the pte to fault */
7714 			tmplate =  tmplate & ~ARM_PTE_AF;
7715 			ref_fault = true;
7716 		}
7717 		if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
7718 			/* take away write permission if set */
7719 			if (pmap == kernel_pmap) {
7720 				if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
7721 					tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7722 					pte_set_was_writeable(tmplate, true);
7723 					mod_fault = true;
7724 				}
7725 			} else {
7726 				if ((tmplate & ARM_PTE_APMASK) == pt_attr_leaf_rw(pt_attr)) {
7727 					tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7728 					pte_set_was_writeable(tmplate, true);
7729 					mod_fault = true;
7730 				}
7731 			}
7732 		}
7733 
7734 #if MACH_ASSERT && XNU_MONITOR
7735 		if (is_pte_xprr_protected(pmap, spte)) {
7736 			if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
7737 				panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
7738 				    "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
7739 				    __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
7740 				    ppnum, options, allow_mode);
7741 			}
7742 		}
7743 #endif /* MACH_ASSERT && XNU_MONITOR */
7744 
7745 		if (result && (tmplate != spte)) {
7746 			if ((spte & (~ARM_PTE_WRITEABLE)) != (tmplate & (~ARM_PTE_WRITEABLE)) &&
7747 			    !(options & PMAP_OPTIONS_NOFLUSH)) {
7748 				tlb_flush_needed = true;
7749 				if (!flush_range || (flush_range->ptfr_pmap != pmap) ||
7750 				    va >= flush_range->ptfr_end || va < flush_range->ptfr_start) {
7751 #ifdef ARM_PTE_FF_MARKER
7752 					assert(!(spte & ARM_PTE_FF_MARKER));
7753 					tmplate |= ARM_PTE_FF_MARKER;
7754 					++pass1_updated;
7755 #endif
7756 					issue_tlbi = true;
7757 				}
7758 			}
7759 			write_pte_fast(pte_p, tmplate);
7760 		}
7761 
7762 fff_skip_pve_pass1:
7763 		pte_p = PT_ENTRY_NULL;
7764 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
7765 			pve_ptep_idx = 0;
7766 			pve_p = pve_next(pve_p);
7767 		}
7768 	}
7769 
7770 	if (tlb_flush_needed) {
7771 		FLUSH_PTE_STRONG();
7772 	}
7773 
7774 	if (!issue_tlbi) {
7775 		goto fff_finish;
7776 	}
7777 
7778 	/* Pass 2: Issue any required TLB invalidations */
7779 	pve_p = orig_pve_p;
7780 	pte_p = orig_pte_p;
7781 	pve_ptep_idx = 0;
7782 
7783 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7784 		if (pve_p != PV_ENTRY_NULL) {
7785 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
7786 			if (pte_p == PT_ENTRY_NULL) {
7787 				goto fff_skip_pve_pass2;
7788 			}
7789 		}
7790 
7791 #ifdef PVH_FLAG_IOMMU
7792 		if (pvh_ptep_is_iommu(pte_p)) {
7793 			goto fff_skip_pve_pass2;
7794 		}
7795 #endif
7796 
7797 #ifdef ARM_PTE_FF_MARKER
7798 		pt_entry_t spte = *pte_p;
7799 
7800 		if (!(spte & ARM_PTE_FF_MARKER)) {
7801 			goto fff_skip_pve_pass2;
7802 		} else {
7803 			spte &= (~ARM_PTE_FF_MARKER);
7804 			/* No need to synchronize with the TLB flush; we're changing a SW-managed bit */
7805 			write_pte_fast(pte_p, spte);
7806 			++pass2_updated;
7807 		}
7808 #endif
7809 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
7810 		const pmap_t pmap = ptdp->pmap;
7811 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
7812 
7813 		if (!flush_range || (flush_range->ptfr_pmap != pmap) ||
7814 		    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
7815 			pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
7816 			    pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
7817 		}
7818 
7819 fff_skip_pve_pass2:
7820 		pte_p = PT_ENTRY_NULL;
7821 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
7822 			pve_ptep_idx = 0;
7823 			pve_p = pve_next(pve_p);
7824 		}
7825 	}
7826 
7827 fff_finish:
7828 	if (__improbable(pass1_updated != pass2_updated)) {
7829 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
7830 		    __func__, pass1_updated, pass2_updated);
7831 	}
7832 
7833 	/*
7834 	 * If we are using the same approach for ref and mod
7835 	 * faults on this PTE, do not clear the write fault;
7836 	 * this would cause both ref and mod to be set on the
7837 	 * page again, and prevent us from taking ANY read/write
7838 	 * fault on the mapping.
7839 	 */
7840 	if (clear_write_fault && !ref_aliases_mod) {
7841 		arm_clear_fast_fault(ppnum, VM_PROT_WRITE, PT_ENTRY_NULL);
7842 	}
7843 	if (tlb_flush_needed) {
7844 		if (flush_range) {
7845 			/* Delayed flush. Signal to the caller that the flush is needed. */
7846 			flush_range->ptfr_flush_needed = true;
7847 		} else {
7848 			sync_tlb_flush();
7849 		}
7850 	}
7851 
7852 	/* update global "reusable" status for this page */
7853 	if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) && is_reusable) {
7854 		ppattr_clear_reusable(pai);
7855 	} else if ((options & PMAP_OPTIONS_SET_REUSABLE) && !is_reusable) {
7856 		ppattr_set_reusable(pai);
7857 	}
7858 
7859 	if (mod_fault) {
7860 		ppattr_set_modfault(pai);
7861 	}
7862 	if (ref_fault) {
7863 		ppattr_set_reffault(pai);
7864 	}
7865 	if (__probable(mustsynch)) {
7866 		pvh_unlock(pai);
7867 	}
7868 	return result;
7869 }
7870 
7871 MARK_AS_PMAP_TEXT boolean_t
7872 arm_force_fast_fault_internal(
7873 	ppnum_t         ppnum,
7874 	vm_prot_t       allow_mode,
7875 	int             options)
7876 {
7877 	if (__improbable((options & (PMAP_OPTIONS_FF_LOCKED | PMAP_OPTIONS_NOFLUSH)) != 0)) {
7878 		panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
7879 	}
7880 	return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
7881 }
7882 
7883 /*
7884  *	Routine:	arm_force_fast_fault
7885  *
7886  *	Function:
7887  *		Force all mappings for this page to fault according
7888  *		to the access modes allowed, so we can gather ref/modify
7889  *		bits again.
7890  */
7891 
7892 boolean_t
7893 arm_force_fast_fault(
7894 	ppnum_t         ppnum,
7895 	vm_prot_t       allow_mode,
7896 	int             options,
7897 	__unused void   *arg)
7898 {
7899 	pmap_paddr_t    phys = ptoa(ppnum);
7900 
7901 	assert(ppnum != vm_page_fictitious_addr);
7902 
7903 	if (!pa_valid(phys)) {
7904 		return FALSE;   /* Not a managed page. */
7905 	}
7906 
7907 #if XNU_MONITOR
7908 	return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
7909 #else
7910 	return arm_force_fast_fault_internal(ppnum, allow_mode, options);
7911 #endif
7912 }
7913 
7914 /*
7915  *	Routine:	arm_clear_fast_fault
7916  *
7917  *	Function:
7918  *		Clear pending force fault for all mappings for this page based on
7919  *		the observed fault type, update ref/modify bits.
7920  */
7921 MARK_AS_PMAP_TEXT static boolean_t
7922 arm_clear_fast_fault(
7923 	ppnum_t ppnum,
7924 	vm_prot_t fault_type,
7925 	pt_entry_t *pte_p)
7926 {
7927 	pmap_paddr_t    pa = ptoa(ppnum);
7928 	pv_entry_t     *pve_p;
7929 	unsigned int    pai;
7930 	boolean_t       result;
7931 	bool            tlb_flush_needed = false;
7932 	pv_entry_t    **pv_h;
7933 	unsigned int    npve = 0;
7934 	unsigned int    pass1_updated = 0;
7935 	unsigned int    pass2_updated = 0;
7936 
7937 	assert(ppnum != vm_page_fictitious_addr);
7938 
7939 	if (!pa_valid(pa)) {
7940 		return FALSE;   /* Not a managed page. */
7941 	}
7942 
7943 	result = FALSE;
7944 	pai = pa_index(pa);
7945 	pvh_assert_locked(pai);
7946 	pv_h = pai_to_pvh(pai);
7947 
7948 	pve_p = PV_ENTRY_NULL;
7949 	if (pte_p == PT_ENTRY_NULL) {
7950 		if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7951 			pte_p = pvh_ptep(pv_h);
7952 		} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7953 			pve_p = pvh_pve_list(pv_h);
7954 		} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
7955 			panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)pa);
7956 		}
7957 	}
7958 
7959 	pv_entry_t *orig_pve_p = pve_p;
7960 	pt_entry_t *orig_pte_p = pte_p;
7961 	int pve_ptep_idx = 0;
7962 
7963 	/*
7964 	 * Pass 1: Make any necessary PTE updates, marking PTEs that will require
7965 	 * TLB invalidation in pass 2.
7966 	 */
7967 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7968 		pt_entry_t spte;
7969 		pt_entry_t tmplate;
7970 
7971 		if (pve_p != PV_ENTRY_NULL) {
7972 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
7973 			if (pte_p == PT_ENTRY_NULL) {
7974 				goto cff_skip_pve_pass1;
7975 			}
7976 		}
7977 
7978 #ifdef PVH_FLAG_IOMMU
7979 		if (pvh_ptep_is_iommu(pte_p)) {
7980 			goto cff_skip_pve_pass1;
7981 		}
7982 #endif
7983 		if (*pte_p == ARM_PTE_EMPTY) {
7984 			panic("pte is empty: pte_p=%p ppnum=0x%x", pte_p, ppnum);
7985 		}
7986 
7987 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
7988 		const pmap_t pmap = ptdp->pmap;
7989 		__assert_only const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
7990 
7991 		assert(va >= pmap->min && va < pmap->max);
7992 
7993 		spte = *pte_p;
7994 		tmplate = spte;
7995 
7996 		if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
7997 			{
7998 				if (pmap == kernel_pmap) {
7999 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
8000 				} else {
8001 					assert(pmap->type != PMAP_TYPE_NESTED);
8002 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
8003 				}
8004 			}
8005 
8006 			tmplate |= ARM_PTE_AF;
8007 
8008 			pte_set_was_writeable(tmplate, false);
8009 			ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8010 		} else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
8011 			tmplate = spte | ARM_PTE_AF;
8012 
8013 			{
8014 				ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
8015 			}
8016 		}
8017 
8018 #if MACH_ASSERT && XNU_MONITOR
8019 		if (is_pte_xprr_protected(pmap, spte)) {
8020 			if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
8021 				panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
8022 				    "ppnum=0x%x, fault_type=0x%x",
8023 				    __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
8024 				    ppnum, fault_type);
8025 			}
8026 		}
8027 #endif /* MACH_ASSERT && XNU_MONITOR */
8028 
8029 		assert(spte != ARM_PTE_TYPE_FAULT);
8030 		if (spte != tmplate) {
8031 			if ((spte & (~ARM_PTE_WRITEABLE)) != (tmplate & (~ARM_PTE_WRITEABLE))) {
8032 #ifdef ARM_PTE_FF_MARKER
8033 				assert(!(spte & ARM_PTE_FF_MARKER));
8034 				tmplate |= ARM_PTE_FF_MARKER;
8035 				++pass1_updated;
8036 #endif
8037 				tlb_flush_needed = true;
8038 			}
8039 			write_pte_fast(pte_p, tmplate);
8040 			result = TRUE;
8041 		}
8042 
8043 cff_skip_pve_pass1:
8044 		pte_p = PT_ENTRY_NULL;
8045 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8046 			pve_ptep_idx = 0;
8047 			pve_p = pve_next(pve_p);
8048 			++npve;
8049 			if (__improbable(npve == PMAP_MAX_PV_LIST_CHUNK_SIZE)) {
8050 				break;
8051 			}
8052 		}
8053 	}
8054 
8055 	if (!tlb_flush_needed) {
8056 		goto cff_finish;
8057 	}
8058 
8059 	FLUSH_PTE_STRONG();
8060 
8061 	/* Pass 2: Issue any required TLB invalidations */
8062 	pve_p = orig_pve_p;
8063 	pte_p = orig_pte_p;
8064 	pve_ptep_idx = 0;
8065 	npve = 0;
8066 
8067 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8068 		if (pve_p != PV_ENTRY_NULL) {
8069 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
8070 			if (pte_p == PT_ENTRY_NULL) {
8071 				goto cff_skip_pve_pass2;
8072 			}
8073 		}
8074 
8075 #ifdef PVH_FLAG_IOMMU
8076 		if (pvh_ptep_is_iommu(pte_p)) {
8077 			goto cff_skip_pve_pass2;
8078 		}
8079 #endif
8080 
8081 #ifdef ARM_PTE_FF_MARKER
8082 		pt_entry_t spte = *pte_p;
8083 
8084 		if (!(spte & ARM_PTE_FF_MARKER)) {
8085 			goto cff_skip_pve_pass2;
8086 		} else {
8087 			spte &= (~ARM_PTE_FF_MARKER);
8088 			/* No need to synchronize with the TLB flush; we're changing a SW-managed bit */
8089 			write_pte_fast(pte_p, spte);
8090 			++pass2_updated;
8091 		}
8092 #endif
8093 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
8094 		const pmap_t pmap = ptdp->pmap;
8095 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
8096 
8097 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
8098 
8099 cff_skip_pve_pass2:
8100 		pte_p = PT_ENTRY_NULL;
8101 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8102 			pve_ptep_idx = 0;
8103 			pve_p = pve_next(pve_p);
8104 			++npve;
8105 			if (__improbable(npve == PMAP_MAX_PV_LIST_CHUNK_SIZE)) {
8106 				break;
8107 			}
8108 		}
8109 	}
8110 
8111 cff_finish:
8112 	if (__improbable(pass1_updated != pass2_updated)) {
8113 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
8114 		    __func__, pass1_updated, pass2_updated);
8115 	}
8116 	if (tlb_flush_needed) {
8117 		sync_tlb_flush();
8118 	}
8119 	return result;
8120 }
8121 
8122 /*
8123  * Determine if the fault was induced by software tracking of
8124  * modify/reference bits.  If so, re-enable the mapping (and set
8125  * the appropriate bits).
8126  *
8127  * Returns KERN_SUCCESS if the fault was induced and was
8128  * successfully handled.
8129  *
8130  * Returns KERN_FAILURE if the fault was not induced and
8131  * the function was unable to deal with it.
8132  *
8133  * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8134  * disallows this type of access.
8135  *
8136  * Returns KERN_ABORTED if the pmap lock is taken and a
8137  * preemption is pending.
8138  *
8139  */
8140 MARK_AS_PMAP_TEXT kern_return_t
8141 arm_fast_fault_internal(
8142 	pmap_t pmap,
8143 	vm_map_address_t va,
8144 	vm_prot_t fault_type,
8145 	__unused bool was_af_fault,
8146 	__unused bool from_user)
8147 {
8148 	kern_return_t   result = KERN_FAILURE;
8149 	pt_entry_t     *ptep;
8150 	pt_entry_t      spte = ARM_PTE_TYPE_FAULT;
8151 	unsigned int    pai;
8152 	pmap_paddr_t    pa;
8153 	validate_pmap_mutable(pmap);
8154 
8155 	if (!pmap_lock_preempt(pmap, PMAP_LOCK_SHARED)) {
8156 		return KERN_ABORTED;
8157 	}
8158 
8159 	/*
8160 	 * If the entry doesn't exist, is completely invalid, or is already
8161 	 * valid, we can't fix it here.
8162 	 */
8163 
8164 	const uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO;
8165 	ptep = pmap_pte(pmap, va & ~(pmap_page_size - 1));
8166 	if (ptep != PT_ENTRY_NULL) {
8167 		while (true) {
8168 			spte = *((volatile pt_entry_t*)ptep);
8169 
8170 			pa = pte_to_pa(spte);
8171 
8172 			if ((spte == ARM_PTE_TYPE_FAULT) ||
8173 			    ARM_PTE_IS_COMPRESSED(spte, ptep)) {
8174 				pmap_unlock(pmap, PMAP_LOCK_SHARED);
8175 				return result;
8176 			}
8177 
8178 			if (!pa_valid(pa)) {
8179 				pmap_unlock(pmap, PMAP_LOCK_SHARED);
8180 #if XNU_MONITOR
8181 				if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
8182 					return KERN_PROTECTION_FAILURE;
8183 				} else
8184 #endif
8185 				return result;
8186 			}
8187 			pai = pa_index(pa);
8188 			pvh_lock(pai);
8189 			if (*ptep == spte) {
8190 				/*
8191 				 * Double-check the spte value, as we care about the AF bit.
8192 				 * It's also possible that pmap_page_protect() transitioned the
8193 				 * PTE to compressed/empty before we grabbed the PVH lock.
8194 				 */
8195 				break;
8196 			}
8197 			pvh_unlock(pai);
8198 		}
8199 	} else {
8200 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
8201 		return result;
8202 	}
8203 
8204 
8205 	if ((result != KERN_SUCCESS) &&
8206 	    ((ppattr_test_reffault(pai)) || ((fault_type & VM_PROT_WRITE) && ppattr_test_modfault(pai)))) {
8207 		/*
8208 		 * An attempted access will always clear ref/mod fault state, as
8209 		 * appropriate for the fault type.  arm_clear_fast_fault will
8210 		 * update the associated PTEs for the page as appropriate; if
8211 		 * any PTEs are updated, we redrive the access.  If the mapping
8212 		 * does not actually allow for the attempted access, the
8213 		 * following fault will (hopefully) fail to update any PTEs, and
8214 		 * thus cause arm_fast_fault to decide that it failed to handle
8215 		 * the fault.
8216 		 */
8217 		if (ppattr_test_reffault(pai)) {
8218 			ppattr_clear_reffault(pai);
8219 		}
8220 		if ((fault_type & VM_PROT_WRITE) && ppattr_test_modfault(pai)) {
8221 			ppattr_clear_modfault(pai);
8222 		}
8223 
8224 		if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type, PT_ENTRY_NULL)) {
8225 			/*
8226 			 * Should this preserve KERN_PROTECTION_FAILURE?  The
8227 			 * cost of not doing so is a another fault in a case
8228 			 * that should already result in an exception.
8229 			 */
8230 			result = KERN_SUCCESS;
8231 		}
8232 	}
8233 
8234 	/*
8235 	 * If the PTE already has sufficient permissions, we can report the fault as handled.
8236 	 * This may happen, for example, if multiple threads trigger roughly simultaneous faults
8237 	 * on mappings of the same page
8238 	 */
8239 	if ((result == KERN_FAILURE) && (spte & ARM_PTE_AF)) {
8240 		uintptr_t ap_ro, ap_rw, ap_x;
8241 		if (pmap == kernel_pmap) {
8242 			ap_ro = ARM_PTE_AP(AP_RONA);
8243 			ap_rw = ARM_PTE_AP(AP_RWNA);
8244 			ap_x = ARM_PTE_NX;
8245 		} else {
8246 			ap_ro = pt_attr_leaf_ro(pmap_get_pt_attr(pmap));
8247 			ap_rw = pt_attr_leaf_rw(pmap_get_pt_attr(pmap));
8248 			ap_x = pt_attr_leaf_x(pmap_get_pt_attr(pmap));
8249 		}
8250 		/*
8251 		 * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
8252 		 * hardware they may be xPRR-protected, in which case they'll be handled
8253 		 * by the is_pte_xprr_protected() case above.  Additionally, the exception
8254 		 * handling path currently does not call arm_fast_fault() without at least
8255 		 * VM_PROT_READ in fault_type.
8256 		 */
8257 		if (((spte & ARM_PTE_APMASK) == ap_rw) ||
8258 		    (!(fault_type & VM_PROT_WRITE) && ((spte & ARM_PTE_APMASK) == ap_ro))) {
8259 			if (!(fault_type & VM_PROT_EXECUTE) || ((spte & ARM_PTE_XMASK) == ap_x)) {
8260 				result = KERN_SUCCESS;
8261 			}
8262 		}
8263 	}
8264 
8265 	if ((result == KERN_FAILURE) && arm_clear_fast_fault((ppnum_t)atop(pa), fault_type, ptep)) {
8266 		/*
8267 		 * A prior arm_clear_fast_fault() operation may have returned early due to
8268 		 * another pending PV list operation or an excessively large PV list.
8269 		 * Attempt a targeted fixup of the PTE that caused the fault to avoid repeatedly
8270 		 * taking a fault on the same mapping.
8271 		 */
8272 		result = KERN_SUCCESS;
8273 	}
8274 
8275 	pvh_unlock(pai);
8276 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
8277 	return result;
8278 }
8279 
8280 kern_return_t
8281 arm_fast_fault(
8282 	pmap_t pmap,
8283 	vm_map_address_t va,
8284 	vm_prot_t fault_type,
8285 	bool was_af_fault,
8286 	__unused bool from_user)
8287 {
8288 	kern_return_t   result = KERN_FAILURE;
8289 
8290 	if (va < pmap->min || va >= pmap->max) {
8291 		return result;
8292 	}
8293 
8294 	PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
8295 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
8296 	    from_user);
8297 
8298 	do {
8299 #if XNU_MONITOR
8300 		result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
8301 #else
8302 		result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
8303 #endif
8304 	} while (result == KERN_ABORTED);
8305 
8306 	PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
8307 
8308 	return result;
8309 }
8310 
8311 void
8312 pmap_copy_page(
8313 	ppnum_t psrc,
8314 	ppnum_t pdst)
8315 {
8316 	bcopy_phys((addr64_t) (ptoa(psrc)),
8317 	    (addr64_t) (ptoa(pdst)),
8318 	    PAGE_SIZE);
8319 }
8320 
8321 
8322 /*
8323  *	pmap_copy_page copies the specified (machine independent) pages.
8324  */
8325 void
8326 pmap_copy_part_page(
8327 	ppnum_t psrc,
8328 	vm_offset_t src_offset,
8329 	ppnum_t pdst,
8330 	vm_offset_t dst_offset,
8331 	vm_size_t len)
8332 {
8333 	bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8334 	    (addr64_t) (ptoa(pdst) + dst_offset),
8335 	    len);
8336 }
8337 
8338 
8339 /*
8340  *	pmap_zero_page zeros the specified (machine independent) page.
8341  */
8342 void
8343 pmap_zero_page(
8344 	ppnum_t pn)
8345 {
8346 	assert(pn != vm_page_fictitious_addr);
8347 	bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8348 }
8349 
8350 /*
8351  *	pmap_zero_part_page
8352  *	zeros the specified (machine independent) part of a page.
8353  */
8354 void
8355 pmap_zero_part_page(
8356 	ppnum_t pn,
8357 	vm_offset_t offset,
8358 	vm_size_t len)
8359 {
8360 	assert(pn != vm_page_fictitious_addr);
8361 	assert(offset + len <= PAGE_SIZE);
8362 	bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8363 }
8364 
8365 void
8366 pmap_map_globals(
8367 	void)
8368 {
8369 	pt_entry_t      *ptep, pte;
8370 
8371 	ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8372 	assert(ptep != PT_ENTRY_NULL);
8373 	assert(*ptep == ARM_PTE_EMPTY);
8374 
8375 	pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8376 #if __ARM_KERNEL_PROTECT__
8377 	pte |= ARM_PTE_NG;
8378 #endif /* __ARM_KERNEL_PROTECT__ */
8379 	pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8380 	pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8381 	*ptep = pte;
8382 	FLUSH_PTE();
8383 	PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false, true);
8384 
8385 #if KASAN
8386 	kasan_notify_address(LOWGLOBAL_ALIAS, PAGE_SIZE);
8387 #endif
8388 }
8389 
8390 vm_offset_t
8391 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8392 {
8393 	if (__improbable(index >= CPUWINDOWS_MAX)) {
8394 		panic("%s: invalid index %u", __func__, index);
8395 	}
8396 	return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8397 }
8398 
8399 MARK_AS_PMAP_TEXT unsigned int
8400 pmap_map_cpu_windows_copy_internal(
8401 	ppnum_t pn,
8402 	vm_prot_t prot,
8403 	unsigned int wimg_bits)
8404 {
8405 	pt_entry_t      *ptep = NULL, pte;
8406 	pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8407 	unsigned int    cpu_num;
8408 	unsigned int    i;
8409 	vm_offset_t     cpu_copywindow_vaddr = 0;
8410 	bool            need_strong_sync = false;
8411 
8412 #if XNU_MONITOR
8413 	unsigned int    cacheattr = (!pa_valid(ptoa(pn) & ARM_PTE_PAGE_MASK) ? pmap_cache_attributes(pn) : 0);
8414 	need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
8415 #endif
8416 
8417 #if XNU_MONITOR
8418 #ifdef  __ARM_COHERENT_IO__
8419 	if (__improbable(pa_valid(ptoa(pn) & ARM_PTE_PAGE_MASK) && !pmap_ppl_disable)) {
8420 		panic("%s: attempted to map a managed page, "
8421 		    "pn=%u, prot=0x%x, wimg_bits=0x%x",
8422 		    __FUNCTION__,
8423 		    pn, prot, wimg_bits);
8424 	}
8425 	if (__improbable((cacheattr & PP_ATTR_MONITOR) && (prot != VM_PROT_READ) && !pmap_ppl_disable)) {
8426 		panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__, (uint64_t)ptoa(pn));
8427 	}
8428 
8429 #else /* __ARM_COHERENT_IO__ */
8430 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
8431 #endif /* __ARM_COHERENT_IO__ */
8432 #endif /* XNU_MONITOR */
8433 	cpu_num = pmap_cpu_data->cpu_number;
8434 
8435 	for (i = 0; i < CPUWINDOWS_MAX; i++) {
8436 		cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8437 		ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8438 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
8439 		if (*ptep == ARM_PTE_TYPE_FAULT) {
8440 			break;
8441 		}
8442 	}
8443 	if (i == CPUWINDOWS_MAX) {
8444 		panic("pmap_map_cpu_windows_copy: out of window");
8445 	}
8446 
8447 	pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8448 #if __ARM_KERNEL_PROTECT__
8449 	pte |= ARM_PTE_NG;
8450 #endif /* __ARM_KERNEL_PROTECT__ */
8451 
8452 	pte |= wimg_to_pte(wimg_bits, ptoa(pn));
8453 
8454 	if (prot & VM_PROT_WRITE) {
8455 		pte |= ARM_PTE_AP(AP_RWNA);
8456 	} else {
8457 		pte |= ARM_PTE_AP(AP_RONA);
8458 	}
8459 
8460 	write_pte_fast(ptep, pte);
8461 	/*
8462 	 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8463 	 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8464 	 */
8465 	FLUSH_PTE_STRONG();
8466 	PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i], true);
8467 	pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
8468 
8469 	return i;
8470 }
8471 
8472 unsigned int
8473 pmap_map_cpu_windows_copy(
8474 	ppnum_t pn,
8475 	vm_prot_t prot,
8476 	unsigned int wimg_bits)
8477 {
8478 #if XNU_MONITOR
8479 	return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
8480 #else
8481 	return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8482 #endif
8483 }
8484 
8485 MARK_AS_PMAP_TEXT void
8486 pmap_unmap_cpu_windows_copy_internal(
8487 	unsigned int index)
8488 {
8489 	pt_entry_t      *ptep;
8490 	unsigned int    cpu_num;
8491 	vm_offset_t     cpu_copywindow_vaddr = 0;
8492 	pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8493 
8494 	cpu_num = pmap_cpu_data->cpu_number;
8495 
8496 	cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8497 	/* Issue full-system DSB to ensure prior operations on the per-CPU window
8498 	 * (which are likely to have been on I/O memory) are complete before
8499 	 * tearing down the mapping. */
8500 	__builtin_arm_dsb(DSB_SY);
8501 	ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8502 	write_pte_strong(ptep, ARM_PTE_TYPE_FAULT);
8503 	PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index], true);
8504 }
8505 
8506 void
8507 pmap_unmap_cpu_windows_copy(
8508 	unsigned int index)
8509 {
8510 #if XNU_MONITOR
8511 	return pmap_unmap_cpu_windows_copy_ppl(index);
8512 #else
8513 	return pmap_unmap_cpu_windows_copy_internal(index);
8514 #endif
8515 }
8516 
8517 #if XNU_MONITOR
8518 
8519 MARK_AS_PMAP_TEXT void
8520 pmap_invoke_with_page(
8521 	ppnum_t page_number,
8522 	void *ctx,
8523 	void (*callback)(void *ctx, ppnum_t page_number, const void *page))
8524 {
8525 	#pragma unused(page_number, ctx, callback)
8526 }
8527 
8528 /*
8529  * Loop over every pmap_io_range (I/O ranges marked as owned by
8530  * the PPL in the device tree) and conditionally call callback() on each range
8531  * that needs to be included in the hibernation image.
8532  *
8533  * @param ctx      Will be passed as-is into the callback method. Use NULL if no
8534  *                 context is needed in the callback.
8535  * @param callback Callback function invoked on each range (gated by flag).
8536  */
8537 MARK_AS_PMAP_TEXT void
8538 pmap_hibernate_invoke(void *ctx, void (*callback)(void *ctx, uint64_t addr, uint64_t len))
8539 {
8540 	extern const pmap_io_range_t* io_attr_table;
8541 	extern const unsigned int num_io_rgns;
8542 	for (unsigned int i = 0; i < num_io_rgns; ++i) {
8543 		if (io_attr_table[i].wimg & PMAP_IO_RANGE_NEEDS_HIBERNATING) {
8544 			callback(ctx, io_attr_table[i].addr, io_attr_table[i].len);
8545 		}
8546 	}
8547 }
8548 
8549 /**
8550  * Set the HASHED pv_head_table flag for the passed in physical page if it's a
8551  * PPL-owned page. Otherwise, do nothing.
8552  *
8553  * @param addr Physical address of the page to set the HASHED flag on.
8554  */
8555 MARK_AS_PMAP_TEXT void
8556 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr)
8557 {
8558 	/* Ignore non-managed kernel memory. */
8559 	if (!pa_valid(addr)) {
8560 		return;
8561 	}
8562 
8563 	const unsigned int pai = pa_index(addr);
8564 	if (pp_attr_table[pai] & PP_ATTR_MONITOR) {
8565 		pv_entry_t **pv_h = pai_to_pvh(pai);
8566 
8567 		/* Mark that the PPL-owned page has been hashed into the hibernation image. */
8568 		pvh_lock(pai);
8569 		pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_HASHED);
8570 		pvh_unlock(pai);
8571 	}
8572 }
8573 
8574 /**
8575  * Loop through every physical page in the system and clear out the HASHED flag
8576  * on every PPL-owned page. That flag is used to keep track of which pages have
8577  * been hashed into the hibernation image during the hibernation entry process.
8578  *
8579  * The HASHED flag needs to be cleared out between hibernation cycles because the
8580  * pv_head_table and pp_attr_table's might have been copied into the hibernation
8581  * image with the HASHED flag set on certain pages. It's important to clear the
8582  * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
8583  * into the hibernation image can't be compromised across hibernation cycles.
8584  */
8585 MARK_AS_PMAP_TEXT void
8586 pmap_clear_ppl_hashed_flag_all(void)
8587 {
8588 	const unsigned int last_index = pa_index(vm_last_phys);
8589 	pv_entry_t **pv_h = NULL;
8590 
8591 	for (int pai = 0; pai < last_index; ++pai) {
8592 		pv_h = pai_to_pvh(pai);
8593 
8594 		/* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
8595 		if ((pvh_get_flags(pv_h) & PVH_FLAG_HASHED) &&
8596 		    (pp_attr_table[pai] & PP_ATTR_MONITOR)) {
8597 			pvh_lock(pai);
8598 			pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_HASHED);
8599 			pvh_unlock(pai);
8600 		}
8601 	}
8602 }
8603 
8604 /**
8605  * Enforce that all PPL-owned pages were hashed into the hibernation image. The
8606  * ppl_hib driver will call this after all wired pages have been copied into the
8607  * hibernation image.
8608  */
8609 MARK_AS_PMAP_TEXT void
8610 pmap_check_ppl_hashed_flag_all(void)
8611 {
8612 	const unsigned int last_index = pa_index(vm_last_phys);
8613 	pv_entry_t **pv_h = NULL;
8614 
8615 	for (int pai = 0; pai < last_index; ++pai) {
8616 		pv_h = pai_to_pvh(pai);
8617 
8618 		/**
8619 		 * The PMAP stacks are explicitly not saved into the image so skip checking
8620 		 * the pages that contain the PMAP stacks.
8621 		 */
8622 		const bool is_pmap_stack = (pai >= pa_index(pmap_stacks_start_pa)) &&
8623 		    (pai < pa_index(pmap_stacks_end_pa));
8624 
8625 		if (!is_pmap_stack &&
8626 		    (pp_attr_table[pai] & PP_ATTR_MONITOR) &&
8627 		    !(pvh_get_flags(pv_h) & PVH_FLAG_HASHED)) {
8628 			panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai);
8629 		}
8630 	}
8631 }
8632 
8633 #endif /* XNU_MONITOR */
8634 
8635 /*
8636  * Indicate that a pmap is intended to be used as a nested pmap
8637  * within one or more larger address spaces.  This must be set
8638  * before pmap_nest() is called with this pmap as the 'subordinate'.
8639  */
8640 MARK_AS_PMAP_TEXT void
8641 pmap_set_nested_internal(
8642 	pmap_t pmap)
8643 {
8644 	validate_pmap_mutable(pmap);
8645 	if (__improbable(pmap->type != PMAP_TYPE_USER)) {
8646 		panic("%s: attempt to nest unsupported pmap %p of type 0x%hhx",
8647 		    __func__, pmap, pmap->type);
8648 	}
8649 	pmap->type = PMAP_TYPE_NESTED;
8650 	pmap_get_pt_ops(pmap)->free_id(pmap);
8651 }
8652 
8653 void
8654 pmap_set_nested(
8655 	pmap_t pmap)
8656 {
8657 #if XNU_MONITOR
8658 	pmap_set_nested_ppl(pmap);
8659 #else
8660 	pmap_set_nested_internal(pmap);
8661 #endif
8662 }
8663 
8664 /*
8665  * pmap_trim_range(pmap, start, end)
8666  *
8667  * pmap  = pmap to operate on
8668  * start = start of the range
8669  * end   = end of the range
8670  *
8671  * Attempts to deallocate TTEs for the given range in the nested range.
8672  */
8673 MARK_AS_PMAP_TEXT static void
8674 pmap_trim_range(
8675 	pmap_t pmap,
8676 	addr64_t start,
8677 	addr64_t end)
8678 {
8679 	addr64_t cur;
8680 	addr64_t nested_region_start;
8681 	addr64_t nested_region_end;
8682 	addr64_t adjusted_start;
8683 	addr64_t adjusted_end;
8684 	addr64_t adjust_offmask;
8685 	tt_entry_t * tte_p;
8686 	pt_entry_t * pte_p;
8687 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8688 
8689 	if (__improbable(end < start)) {
8690 		panic("%s: invalid address range, "
8691 		    "pmap=%p, start=%p, end=%p",
8692 		    __func__,
8693 		    pmap, (void*)start, (void*)end);
8694 	}
8695 
8696 	nested_region_start = pmap->nested_region_addr;
8697 	nested_region_end = nested_region_start + pmap->nested_region_size;
8698 
8699 	if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
8700 		panic("%s: range outside nested region %p-%p, "
8701 		    "pmap=%p, start=%p, end=%p",
8702 		    __func__, (void *)nested_region_start, (void *)nested_region_end,
8703 		    pmap, (void*)start, (void*)end);
8704 	}
8705 
8706 	/* Contract the range to TT page boundaries. */
8707 	adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8708 	adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
8709 	adjusted_end = end & ~adjust_offmask;
8710 
8711 	/* Iterate over the range, trying to remove TTEs. */
8712 	for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
8713 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
8714 
8715 		tte_p = pmap_tte(pmap, cur);
8716 
8717 		if ((tte_p != NULL) && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
8718 			pte_p = (pt_entry_t *) ttetokv(*tte_p);
8719 
8720 			/* pmap_tte_deallocate()/pmap_tte_remove() will drop the pmap lock */
8721 			if ((pmap->type == PMAP_TYPE_NESTED) && (ptep_get_info(pte_p)->refcnt == 0)) {
8722 				/* Deallocate for the nested map. */
8723 				pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
8724 			} else if (pmap->type == PMAP_TYPE_USER) {
8725 				/**
8726 				 * Just remove for the parent map. If the leaf table pointed
8727 				 * to by the TTE being removed (owned by the nested pmap)
8728 				 * has any mappings, then this call will panic. This
8729 				 * enforces the policy that tables being trimmed must be
8730 				 * empty to prevent possible use-after-free attacks.
8731 				 */
8732 				pmap_tte_remove(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
8733 			} else {
8734 				panic("%s: Unsupported pmap type for nesting %p %d", __func__, pmap, pmap->type);
8735 			}
8736 		} else {
8737 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
8738 		}
8739 	}
8740 
8741 	/* Remove empty L2 TTs. */
8742 	adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
8743 	adjusted_end = end & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL);
8744 
8745 	for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_ln_size(pt_attr, PMAP_TT_L1_LEVEL)) {
8746 		/* For each L1 entry in our range... */
8747 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
8748 
8749 		bool remove_tt1e = true;
8750 		tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
8751 		tt_entry_t * tt2e_start;
8752 		tt_entry_t * tt2e_end;
8753 		tt_entry_t * tt2e_p;
8754 		tt_entry_t tt1e;
8755 
8756 		if (tt1e_p == NULL) {
8757 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
8758 			continue;
8759 		}
8760 
8761 		tt1e = *tt1e_p;
8762 
8763 		if (tt1e == ARM_TTE_TYPE_FAULT) {
8764 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
8765 			continue;
8766 		}
8767 
8768 		tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
8769 		tt2e_end = &tt2e_start[pt_attr_page_size(pt_attr) / sizeof(*tt2e_start)];
8770 
8771 		for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
8772 			if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
8773 				/*
8774 				 * If any TTEs are populated, don't remove the
8775 				 * L1 TT.
8776 				 */
8777 				remove_tt1e = false;
8778 			}
8779 		}
8780 
8781 		if (remove_tt1e) {
8782 			pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tt1e_p, PMAP_TT_L1_LEVEL);
8783 		} else {
8784 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
8785 		}
8786 	}
8787 }
8788 
8789 /**
8790  * State machine for multi-step pmap trimming. Trimming is the action of
8791  * deallocating the TTEs of the shared region of pmaps down to a given range.
8792  * On PPL-enabled systems, this needs to be done in multiple steps to avoid
8793  * disabling preemption for too long. These steps include computing the bounds
8794  * of the shared region, trimming the head of the "grand", trimming the tail of
8795  * the "grand", and trimming the "subord". Some of the steps can be skipped under
8796  * different conditions.
8797  *
8798  * @param grand the pmap in which the pages are nested
8799  * @param subord the pmap from which the pages are shared, or nested
8800  * @param vstart start of the used range in "grand"
8801  * @param size size of the used range
8802  * @param state the current state of the state machine
8803  *
8804  * @return the next state of the state machine, to be used in the next call
8805  *         into this function.
8806  */
8807 MARK_AS_PMAP_TEXT pmap_trim_state_t
8808 pmap_trim_internal(
8809 	pmap_t grand,
8810 	pmap_t subord,
8811 	addr64_t vstart,
8812 	uint64_t size,
8813 	pmap_trim_state_t state)
8814 {
8815 	/* Validation needs to be done regardless of state. */
8816 	addr64_t vend;
8817 
8818 	if (__improbable(os_add_overflow(vstart, size, &vend))) {
8819 		panic("%s: grand addr wraps around, "
8820 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8821 		    __func__, grand, subord, (void*)vstart, size, state);
8822 	}
8823 
8824 	validate_pmap_mutable(grand);
8825 	validate_pmap(subord);
8826 
8827 	if (__improbable(subord->type != PMAP_TYPE_NESTED)) {
8828 		panic("%s: subord is of non-nestable type 0x%hhx, "
8829 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8830 		    __func__, subord->type, grand, subord, (void*)vstart, size, state);
8831 	}
8832 
8833 	if (__improbable(grand->type != PMAP_TYPE_USER)) {
8834 		panic("%s: grand is of unsupprted type 0x%hhx for nesting, "
8835 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8836 		    __func__, grand->type, grand, subord, (void*)vstart, size, state);
8837 	}
8838 
8839 	if (__improbable(grand->nested_pmap != subord)) {
8840 		panic("%s: grand->nested != subord, "
8841 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8842 		    __func__, grand, subord, (void*)vstart, size, state);
8843 	}
8844 
8845 	if (__improbable((size != 0) &&
8846 	    ((vstart < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))))) {
8847 		panic("%s: grand range not in nested region, "
8848 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8849 		    __func__, grand, subord, (void*)vstart, size, state);
8850 	}
8851 
8852 	/* Trimming starts with figuring out the bounds for the grand. */
8853 	if (state == PMAP_TRIM_STATE_START) {
8854 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
8855 
8856 		/**
8857 		 * The "nested_has_no_bounds_ref" flag is set by `pmap_nest()` if the subord is nested into
8858 		 * the grand when the bounds are not known yet. Therefore, if it is not set, either any nesting
8859 		 * has not happened, or trimming has been done, or nesting has been done with bounds known so
8860 		 * the "extra" region was not nested in the first place. Anyway, trimming is not needed so
8861 		 * we exit early with PMAP_TRIM_STATE_DONE.
8862 		 */
8863 		if (!grand->nested_has_no_bounds_ref) {
8864 			assert(subord->nested_bounds_set);
8865 
8866 			/* Nothing to do if the grand already has bounds set, otherwise inherit from the subord. */
8867 			if (!grand->nested_bounds_set) {
8868 				/* Inherit the bounds from subord. */
8869 				grand->nested_region_true_start = subord->nested_region_true_start;
8870 				grand->nested_region_true_end = subord->nested_region_true_end;
8871 				grand->nested_bounds_set = true;
8872 			}
8873 
8874 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
8875 
8876 			/* Now that the grand has bounds, we are done. */
8877 			return PMAP_TRIM_STATE_DONE;
8878 		}
8879 
8880 		/* If the subord doesn't have bounds set yet, compute them from vstart and a non-zero size. */
8881 		if ((!subord->nested_bounds_set) && size) {
8882 			const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
8883 			const addr64_t adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8884 
8885 			subord->nested_region_true_start = vstart;
8886 			subord->nested_region_true_end = vend;
8887 			subord->nested_region_true_start &= ~adjust_offmask;
8888 
8889 			if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
8890 				panic("%s: padded true end wraps around, "
8891 				    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8892 				    __func__, grand, subord, (void*)vstart, size, state);
8893 			}
8894 
8895 			subord->nested_region_true_end &= ~adjust_offmask;
8896 			subord->nested_bounds_set = true;
8897 		}
8898 
8899 		/* If the subord has bounds set now, let the grand inherit and continue to trim. Otherwise, we are done. */
8900 		if (subord->nested_bounds_set) {
8901 			/* Inherit the bounds from subord. */
8902 			grand->nested_region_true_start = subord->nested_region_true_start;
8903 			grand->nested_region_true_end = subord->nested_region_true_end;
8904 			grand->nested_bounds_set = true;
8905 
8906 			/* If we know the bounds, we can trim the pmap. */
8907 			grand->nested_has_no_bounds_ref = false;
8908 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
8909 
8910 			state = PMAP_TRIM_STATE_GRAND_BEFORE;
8911 		} else {
8912 			/* Don't trim if we don't know the bounds. */
8913 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
8914 
8915 			return PMAP_TRIM_STATE_DONE;
8916 		}
8917 	}
8918 
8919 	/* Sanity check here: we are ready to trim, do we know the bounds yet? */
8920 	if (!grand->nested_bounds_set) {
8921 		panic("%s: !grand->nested_bounds_set, "
8922 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8923 		    __func__, grand, subord, (void*)vstart, size, state);
8924 	}
8925 
8926 	if (state == PMAP_TRIM_STATE_GRAND_BEFORE) {
8927 		pmap_trim_range(grand, grand->nested_region_addr, grand->nested_region_true_start);
8928 
8929 #if XNU_MONITOR
8930 		if (pmap_pending_preemption()) {
8931 			return PMAP_TRIM_STATE_GRAND_AFTER;
8932 		}
8933 #endif
8934 
8935 		state = PMAP_TRIM_STATE_GRAND_AFTER;
8936 	}
8937 
8938 	if (state == PMAP_TRIM_STATE_GRAND_AFTER) {
8939 		pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_addr + grand->nested_region_size));
8940 
8941 #if XNU_MONITOR
8942 		if (pmap_pending_preemption()) {
8943 			return PMAP_TRIM_STATE_SUBORD;
8944 		}
8945 #endif
8946 
8947 		state = PMAP_TRIM_STATE_SUBORD;
8948 	}
8949 
8950 	/* START state is guaranteed to compute the bounds for the subord. */
8951 	if (!subord->nested_bounds_set) {
8952 		panic("%s: !subord->nested_bounds_set, "
8953 		    "grand=%p, subord=%p, vstart=%p, size=%#llx, state=%u",
8954 		    __func__, grand, subord, (void*)vstart, size, state);
8955 	}
8956 
8957 	if (state == PMAP_TRIM_STATE_SUBORD) {
8958 		pmap_trim_subord(subord);
8959 	}
8960 
8961 	return PMAP_TRIM_STATE_DONE;
8962 }
8963 
8964 MARK_AS_PMAP_TEXT static void
8965 pmap_trim_self(pmap_t pmap)
8966 {
8967 	if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
8968 		/* If we have a no bounds ref, we need to drop it. */
8969 		pmap_lock(pmap->nested_pmap, PMAP_LOCK_SHARED);
8970 		pmap->nested_has_no_bounds_ref = false;
8971 		boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
8972 		vm_map_offset_t nested_region_true_start = pmap->nested_pmap->nested_region_true_start;
8973 		vm_map_offset_t nested_region_true_end = pmap->nested_pmap->nested_region_true_end;
8974 		pmap_unlock(pmap->nested_pmap, PMAP_LOCK_SHARED);
8975 
8976 		if (nested_bounds_set) {
8977 			pmap_trim_range(pmap, pmap->nested_region_addr, nested_region_true_start);
8978 			pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_addr + pmap->nested_region_size));
8979 		}
8980 		/*
8981 		 * Try trimming the nested pmap, in case we had the
8982 		 * last reference.
8983 		 */
8984 		pmap_trim_subord(pmap->nested_pmap);
8985 	}
8986 }
8987 
8988 /*
8989  * pmap_trim_subord(grand, subord)
8990  *
8991  * grand  = pmap that we have nested subord in
8992  * subord = nested pmap we are attempting to trim
8993  *
8994  * Trims subord if possible
8995  */
8996 MARK_AS_PMAP_TEXT static void
8997 pmap_trim_subord(pmap_t subord)
8998 {
8999 	bool contract_subord = false;
9000 
9001 	pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9002 
9003 	subord->nested_no_bounds_refcnt--;
9004 
9005 	if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
9006 		/* If this was the last no bounds reference, trim subord. */
9007 		contract_subord = true;
9008 	}
9009 
9010 	pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9011 
9012 	if (contract_subord) {
9013 		pmap_trim_range(subord, subord->nested_region_addr, subord->nested_region_true_start);
9014 		pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_addr + subord->nested_region_size);
9015 	}
9016 }
9017 
9018 /**
9019  * Deallocates the TTEs of the shared region of pmaps down to a given range.
9020  * On PPL-enabled systems, this needs to be done in multiple steps to avoid
9021  * disabling preemption for too long.
9022  *
9023  * @note When we load the shared region we always create pages tables for the
9024  *       entire region. In practice, the shared cache may use just a portion
9025  *       of that. Before we know the bounds of the shared region, it can
9026  *       already be mapped into processes. Therefore, once the bounds are
9027  *       known, "trimming" comes in handy to remove the unnecessary page
9028  *       tables in the processes the shared region is mapped in, and eventually
9029  *       those in the shared region itself. Note that the shared region must
9030  *       be trimmed after the user processes because it has the L3 entries
9031  *       everyone else is pointing to.
9032  *
9033  * @param grand the pmap in which the pages are nested
9034  * @param subord the pmap from which the pages are shared, or nested
9035  * @param vstart start of the used range in "grand"
9036  * @param size size of the used range
9037  */
9038 void
9039 pmap_trim(
9040 	pmap_t grand,
9041 	pmap_t subord,
9042 	addr64_t vstart,
9043 	uint64_t size)
9044 {
9045 	pmap_trim_state_t state = PMAP_TRIM_STATE_START;
9046 
9047 #if XNU_MONITOR
9048 	/* On PPL systems, drives the state machine until its done. */
9049 	while (state != PMAP_TRIM_STATE_DONE) {
9050 		__assert_only pmap_trim_state_t old_state = state;
9051 		state = pmap_trim_ppl(grand, subord, vstart, size, state);
9052 
9053 		/* Are we making progress? */
9054 		assert(old_state != state);
9055 	}
9056 
9057 	pmap_ledger_check_balance(grand);
9058 	pmap_ledger_check_balance(subord);
9059 #else
9060 	state = pmap_trim_internal(grand, subord, vstart, size, state);
9061 
9062 	/* On non-PPL systems, we expect the implementation to finish in one call. */
9063 	assert(state == PMAP_TRIM_STATE_DONE);
9064 #endif
9065 }
9066 
9067 #if HAS_APPLE_PAC
9068 void *
9069 pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9070 {
9071 	void *res = NULL;
9072 	uint64_t current_intr_state = pmap_interrupts_disable();
9073 
9074 	uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
9075 	switch (key) {
9076 	case ptrauth_key_asia:
9077 		res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
9078 		break;
9079 	case ptrauth_key_asda:
9080 		res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
9081 		break;
9082 	default:
9083 		panic("attempt to sign user pointer without process independent key");
9084 	}
9085 	ml_disable_user_jop_key(jop_key, saved_jop_state);
9086 
9087 	pmap_interrupts_restore(current_intr_state);
9088 
9089 	return res;
9090 }
9091 
9092 void *
9093 pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9094 {
9095 	return pmap_sign_user_ptr_internal(value, key, discriminator, jop_key);
9096 }
9097 
9098 void *
9099 pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9100 {
9101 	if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
9102 		panic("attempt to auth user pointer without process independent key");
9103 	}
9104 
9105 	void *res = NULL;
9106 	uint64_t current_intr_state = pmap_interrupts_disable();
9107 
9108 	uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
9109 	res = ml_auth_ptr_unchecked(value, key, discriminator);
9110 	ml_disable_user_jop_key(jop_key, saved_jop_state);
9111 
9112 	pmap_interrupts_restore(current_intr_state);
9113 
9114 	return res;
9115 }
9116 
9117 void *
9118 pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9119 {
9120 	return pmap_auth_user_ptr_internal(value, key, discriminator, jop_key);
9121 }
9122 #endif /* HAS_APPLE_PAC */
9123 
9124 /*
9125  * Marker to indicate that a pmap_[un]nest() operation has finished operating on
9126  * the 'subordinate' pmap and has begun operating on the 'grand' pmap.  This
9127  * flag is supplied in the low-order bit of the 'vrestart' param as well as the
9128  * return value, to indicate where a preempted [un]nest operation should resume.
9129  * When the return value contains the ending address of the nested region with
9130  * PMAP_NEST_GRAND in the low-order bit, the operation has completed.
9131  */
9132 #define PMAP_NEST_GRAND ((vm_map_offset_t) 0x1)
9133 
9134 /*
9135  *	kern_return_t pmap_nest(grand, subord, vstart, size)
9136  *
9137  *	grand  = the pmap that we will nest subord into
9138  *	subord = the pmap that goes into the grand
9139  *	vstart  = start of range in pmap to be inserted
9140  *	size   = Size of nest area (up to 16TB)
9141  *
9142  *	Inserts a pmap into another.  This is used to implement shared segments.
9143  *
9144  */
9145 
9146 /**
9147  * Embeds a range of mappings from one pmap ('subord') into another ('grand')
9148  * by inserting the twig-level TTEs from 'subord' directly into 'grand'.
9149  * This function operates in 3 main phases:
9150  * 1. Bookkeeping to ensure tracking structures for the nested region are set up.
9151  * 2. Expansion of subord to ensure the required leaf-level page table pages for
9152  *    the mapping range are present in subord.
9153  * 3. Copying of twig-level TTEs from subord to grand, such that grand ultimately
9154  *    contains pointers to subord's leaf-level pagetable pages for the specified
9155  *    VA range.
9156  *
9157  * This function may return early due to pending AST_URGENT preemption; if so
9158  * it will indicate the need to be re-entered.
9159  *
9160  * @param grand pmap to insert the TTEs into.  Must be a user pmap.
9161  * @param subord pmap from which to extract the TTEs.  Must be a nested pmap.
9162  * @param vstart twig-aligned virtual address for the beginning of the nesting range
9163  * @param size twig-aligned size of the nesting range
9164  * @param vrestart the twig-aligned starting address of the current call.  May contain
9165  *        PMAP_NEST_GRAND in bit 0 to indicate the operation should skip to step 3) above.
9166  * @param krp Should be initialized to KERN_SUCCESS by caller, will be set to
9167  *        KERN_RESOURCE_SHORTAGE on allocation failure.
9168  *
9169  * @return the virtual address at which to restart the operation, possibly including
9170  *         PMAP_NEST_GRAND to indicate the phase at which to restart.  If
9171  *         (vstart + size) | PMAP_NEST_GRAND is returned, the operation completed.
9172  */
9173 MARK_AS_PMAP_TEXT vm_map_offset_t
9174 pmap_nest_internal(
9175 	pmap_t grand,
9176 	pmap_t subord,
9177 	addr64_t vstart,
9178 	uint64_t size,
9179 	vm_map_offset_t vrestart,
9180 	kern_return_t *krp)
9181 {
9182 	kern_return_t kr = KERN_FAILURE;
9183 	vm_map_offset_t vaddr;
9184 	tt_entry_t     *stte_p;
9185 	tt_entry_t     *gtte_p;
9186 	unsigned int    nested_region_asid_bitmap_size;
9187 	unsigned int*   nested_region_asid_bitmap;
9188 	int             expand_options = 0;
9189 	bool            deref_subord = true;
9190 
9191 	addr64_t vend;
9192 	if (__improbable(os_add_overflow(vstart, size, &vend))) {
9193 		panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
9194 	}
9195 	if (__improbable(((vrestart & ~PMAP_NEST_GRAND) > vend) ||
9196 	    ((vrestart & ~PMAP_NEST_GRAND) < vstart))) {
9197 		panic("%s: vrestart 0x%llx is outside range [0x%llx, 0x%llx)", __func__,
9198 		    (unsigned long long)vrestart, (unsigned long long)vstart, (unsigned long long)vend);
9199 	}
9200 
9201 	assert(krp != NULL);
9202 	validate_pmap_mutable(grand);
9203 	validate_pmap(subord);
9204 #if XNU_MONITOR
9205 	/*
9206 	 * Ordering is important here.  validate_pmap() has already ensured subord is a
9207 	 * PPL-controlled pmap pointer, but it could have already been destroyed or could
9208 	 * be in the process of being destroyed.  If destruction is already committed,
9209 	 * then the check of ref_count below will cover us.  If destruction is initiated
9210 	 * during or after this call, then pmap_destroy() will catch the non-zero
9211 	 * nested_count.
9212 	 */
9213 	os_atomic_inc(&subord->nested_count, relaxed);
9214 	os_atomic_thread_fence(seq_cst);
9215 #endif
9216 	if (__improbable(os_atomic_inc_orig(&subord->ref_count, relaxed) <= 0)) {
9217 		panic("%s: invalid subordinate pmap %p", __func__, subord);
9218 	}
9219 
9220 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9221 	if (__improbable(pmap_get_pt_attr(subord) != pt_attr)) {
9222 		panic("%s: attempt to nest pmap %p into pmap %p with mismatched attributes", __func__, subord, grand);
9223 	}
9224 
9225 #if XNU_MONITOR
9226 	expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
9227 #endif
9228 
9229 	if (__improbable(((size | vstart | (vrestart & ~PMAP_NEST_GRAND)) &
9230 	    (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL)) {
9231 		panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx",
9232 		    grand, vstart, size, (unsigned long long)vrestart);
9233 	}
9234 
9235 	if (__improbable(subord->type != PMAP_TYPE_NESTED)) {
9236 		panic("%s: subordinate pmap %p is of non-nestable type 0x%hhx", __func__, subord, subord->type);
9237 	}
9238 
9239 	if (__improbable(grand->type != PMAP_TYPE_USER)) {
9240 		panic("%s: grand pmap %p is of unsupported type 0x%hhx for nesting", __func__, grand, grand->type);
9241 	}
9242 
9243 	if (subord->nested_region_asid_bitmap == NULL) {
9244 		nested_region_asid_bitmap_size  = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
9245 
9246 #if XNU_MONITOR
9247 		pmap_paddr_t pa = 0;
9248 
9249 		if (__improbable((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
9250 			panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
9251 			    "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
9252 			    __FUNCTION__, nested_region_asid_bitmap_size,
9253 			    grand, subord, vstart, size);
9254 		}
9255 
9256 		kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
9257 
9258 		if (kr != KERN_SUCCESS) {
9259 			goto nest_cleanup;
9260 		}
9261 
9262 		assert(pa);
9263 
9264 		nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
9265 #else
9266 		nested_region_asid_bitmap = kalloc_data(
9267 			nested_region_asid_bitmap_size * sizeof(unsigned int),
9268 			Z_WAITOK | Z_ZERO);
9269 #endif
9270 
9271 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9272 		if (subord->nested_region_asid_bitmap == NULL) {
9273 			subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
9274 			subord->nested_region_addr = vstart;
9275 			subord->nested_region_size = (mach_vm_offset_t) size;
9276 
9277 			/**
9278 			 * Ensure that the rest of the subord->nested_region_* fields are
9279 			 * initialized and visible before setting the nested_region_asid_bitmap
9280 			 * field (which is used as the flag to say that the rest are initialized).
9281 			 */
9282 			__builtin_arm_dmb(DMB_ISHST);
9283 			subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
9284 			nested_region_asid_bitmap = NULL;
9285 		}
9286 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9287 		if (nested_region_asid_bitmap != NULL) {
9288 #if XNU_MONITOR
9289 			pmap_pages_free(kvtophys_nofail((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
9290 #else
9291 			kfree_data(nested_region_asid_bitmap,
9292 			    nested_region_asid_bitmap_size * sizeof(unsigned int));
9293 #endif
9294 		}
9295 	}
9296 
9297 	/**
9298 	 * Ensure subsequent reads of the subord->nested_region_* fields don't get
9299 	 * speculated before their initialization.
9300 	 */
9301 	__builtin_arm_dmb(DMB_ISHLD);
9302 
9303 	if ((subord->nested_region_addr + subord->nested_region_size) < vend) {
9304 		uint64_t        new_size;
9305 		unsigned int    new_nested_region_asid_bitmap_size;
9306 		unsigned int*   new_nested_region_asid_bitmap;
9307 
9308 		nested_region_asid_bitmap = NULL;
9309 		nested_region_asid_bitmap_size = 0;
9310 		new_size =  vend - subord->nested_region_addr;
9311 
9312 		/* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
9313 		new_nested_region_asid_bitmap_size  = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
9314 
9315 #if XNU_MONITOR
9316 		pmap_paddr_t pa = 0;
9317 
9318 		if (__improbable((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
9319 			panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
9320 			    "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
9321 			    __FUNCTION__, new_nested_region_asid_bitmap_size,
9322 			    grand, subord, vstart, new_size);
9323 		}
9324 
9325 		kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
9326 
9327 		if (kr != KERN_SUCCESS) {
9328 			goto nest_cleanup;
9329 		}
9330 
9331 		assert(pa);
9332 
9333 		new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
9334 #else
9335 		new_nested_region_asid_bitmap = kalloc_data(
9336 			new_nested_region_asid_bitmap_size * sizeof(unsigned int),
9337 			Z_WAITOK | Z_ZERO);
9338 #endif
9339 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9340 		if (subord->nested_region_size < new_size) {
9341 			bcopy(subord->nested_region_asid_bitmap,
9342 			    new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
9343 			nested_region_asid_bitmap_size  = subord->nested_region_asid_bitmap_size;
9344 			nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
9345 			subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
9346 			subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
9347 			subord->nested_region_size = new_size;
9348 			new_nested_region_asid_bitmap = NULL;
9349 		}
9350 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9351 		if (nested_region_asid_bitmap != NULL) {
9352 #if XNU_MONITOR
9353 			pmap_pages_free(kvtophys_nofail((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
9354 #else
9355 			kfree_data(nested_region_asid_bitmap,
9356 			    nested_region_asid_bitmap_size * sizeof(unsigned int));
9357 #endif
9358 		}
9359 		if (new_nested_region_asid_bitmap != NULL) {
9360 #if XNU_MONITOR
9361 			pmap_pages_free(kvtophys_nofail((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);
9362 #else
9363 			kfree_data(new_nested_region_asid_bitmap,
9364 			    new_nested_region_asid_bitmap_size * sizeof(unsigned int));
9365 #endif
9366 		}
9367 	}
9368 
9369 	pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9370 
9371 	if (os_atomic_cmpxchg(&grand->nested_pmap, PMAP_NULL, subord, relaxed)) {
9372 		/*
9373 		 * If this is grand's first nesting operation, keep the reference on subord.
9374 		 * It will be released by pmap_destroy_internal() when grand is destroyed.
9375 		 */
9376 		deref_subord = false;
9377 
9378 		if (!subord->nested_bounds_set) {
9379 			/*
9380 			 * We are nesting without the shared regions bounds
9381 			 * being known.  We'll have to trim the pmap later.
9382 			 */
9383 			grand->nested_has_no_bounds_ref = true;
9384 			subord->nested_no_bounds_refcnt++;
9385 		}
9386 
9387 		grand->nested_region_addr = vstart;
9388 		grand->nested_region_size = (mach_vm_offset_t) size;
9389 	} else {
9390 		if (__improbable(grand->nested_pmap != subord)) {
9391 			panic("pmap_nest() pmap %p has a nested pmap", grand);
9392 		} else if (__improbable(grand->nested_region_addr > vstart)) {
9393 			panic("pmap_nest() pmap %p : attempt to nest outside the nested region", grand);
9394 		} else if ((grand->nested_region_addr + grand->nested_region_size) < vend) {
9395 			grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_addr + size);
9396 		}
9397 	}
9398 
9399 	vaddr = vrestart & ~PMAP_NEST_GRAND;
9400 	if (vaddr < subord->nested_region_true_start) {
9401 		vaddr = subord->nested_region_true_start;
9402 	}
9403 
9404 	addr64_t true_end = vend;
9405 	if (true_end > subord->nested_region_true_end) {
9406 		true_end = subord->nested_region_true_end;
9407 	}
9408 	__unused unsigned int ttecount = 0;
9409 
9410 	if (vrestart & PMAP_NEST_GRAND) {
9411 		goto nest_grand;
9412 	}
9413 
9414 	while (vaddr < true_end) {
9415 		stte_p = pmap_tte(subord, vaddr);
9416 		if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
9417 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9418 			kr = pmap_expand(subord, vaddr, expand_options, pt_attr_leaf_level(pt_attr));
9419 
9420 			if (kr != KERN_SUCCESS) {
9421 				pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9422 				goto done;
9423 			}
9424 
9425 			pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9426 		}
9427 		vaddr += pt_attr_twig_size(pt_attr);
9428 		vrestart = vaddr;
9429 		++ttecount;
9430 		if (__improbable(!(ttecount % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
9431 		    pmap_pending_preemption())) {
9432 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9433 			kr = KERN_SUCCESS;
9434 			pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9435 			goto done;
9436 		}
9437 	}
9438 	/*
9439 	 * copy TTEs from subord pmap into grand pmap
9440 	 */
9441 
9442 	vaddr = (vm_map_offset_t) vstart;
9443 	if (vaddr < subord->nested_region_true_start) {
9444 		vaddr = subord->nested_region_true_start;
9445 	}
9446 	vrestart = vaddr | PMAP_NEST_GRAND;
9447 
9448 nest_grand:
9449 	pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9450 	pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9451 	while (vaddr < true_end) {
9452 		stte_p = pmap_tte(subord, vaddr);
9453 		gtte_p = pmap_tte(grand, vaddr);
9454 		if (gtte_p == PT_ENTRY_NULL) {
9455 			pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9456 			kr = pmap_expand(grand, vaddr, expand_options, pt_attr_twig_level(pt_attr));
9457 			pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9458 
9459 			if (kr != KERN_SUCCESS) {
9460 				goto done;
9461 			}
9462 
9463 			gtte_p = pmap_tt2e(grand, vaddr);
9464 		}
9465 		/* Don't leak a page table page.  Don't violate break-before-make. */
9466 		if (__improbable(*gtte_p != ARM_TTE_EMPTY)) {
9467 			panic("%s: attempting to overwrite non-empty TTE %p in pmap %p",
9468 			    __func__, gtte_p, grand);
9469 		}
9470 		*gtte_p = *stte_p;
9471 
9472 		vaddr += pt_attr_twig_size(pt_attr);
9473 		vrestart = vaddr | PMAP_NEST_GRAND;
9474 		++ttecount;
9475 		if (__improbable(!(ttecount % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
9476 		    pmap_pending_preemption())) {
9477 			break;
9478 		}
9479 	}
9480 	if (vaddr >= true_end) {
9481 		vrestart = vend | PMAP_NEST_GRAND;
9482 	}
9483 
9484 	kr = KERN_SUCCESS;
9485 done:
9486 
9487 	FLUSH_PTE();
9488 	__builtin_arm_isb(ISB_SY);
9489 
9490 	pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9491 #if XNU_MONITOR
9492 nest_cleanup:
9493 	if (kr != KERN_SUCCESS) {
9494 		pmap_pin_kernel_pages((vm_offset_t)krp, sizeof(*krp));
9495 		*krp = kr;
9496 		pmap_unpin_kernel_pages((vm_offset_t)krp, sizeof(*krp));
9497 	}
9498 #else
9499 	if (kr != KERN_SUCCESS) {
9500 		*krp = kr;
9501 	}
9502 #endif
9503 	if (deref_subord) {
9504 #if XNU_MONITOR
9505 		os_atomic_dec(&subord->nested_count, relaxed);
9506 #endif
9507 		pmap_destroy_internal(subord);
9508 	}
9509 	return vrestart;
9510 }
9511 
9512 kern_return_t
9513 pmap_nest(
9514 	pmap_t grand,
9515 	pmap_t subord,
9516 	addr64_t vstart,
9517 	uint64_t size)
9518 {
9519 	kern_return_t kr = KERN_SUCCESS;
9520 	vm_map_offset_t vaddr = (vm_map_offset_t)vstart;
9521 	vm_map_offset_t vend = vaddr + size;
9522 	__unused vm_map_offset_t vlast = vaddr;
9523 
9524 	PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
9525 	    VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
9526 	    VM_KERNEL_ADDRHIDE(vstart));
9527 
9528 	pmap_verify_preemptible();
9529 #if XNU_MONITOR
9530 	while (vaddr != (vend | PMAP_NEST_GRAND)) {
9531 		vaddr = pmap_nest_ppl(grand, subord, vstart, size, vaddr, &kr);
9532 		if (kr == KERN_RESOURCE_SHORTAGE) {
9533 			pmap_alloc_page_for_ppl(0);
9534 			kr = KERN_SUCCESS;
9535 		} else if (kr != KERN_SUCCESS) {
9536 			break;
9537 		} else if (vaddr == vlast) {
9538 			panic("%s: failed to make forward progress from 0x%llx to 0x%llx at 0x%llx",
9539 			    __func__, (unsigned long long)vstart, (unsigned long long)vend, (unsigned long long)vaddr);
9540 		}
9541 		vlast = vaddr;
9542 	}
9543 
9544 	pmap_ledger_check_balance(grand);
9545 	pmap_ledger_check_balance(subord);
9546 #else
9547 	while ((vaddr != (vend | PMAP_NEST_GRAND)) && (kr == KERN_SUCCESS)) {
9548 		vaddr = pmap_nest_internal(grand, subord, vstart, size, vaddr, &kr);
9549 	}
9550 #endif
9551 
9552 	PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
9553 
9554 	return kr;
9555 }
9556 
9557 /*
9558  *	kern_return_t pmap_unnest(grand, vaddr)
9559  *
9560  *	grand  = the pmap that will have the virtual range unnested
9561  *	vaddr  = start of range in pmap to be unnested
9562  *	size   = size of range in pmap to be unnested
9563  *
9564  */
9565 
9566 kern_return_t
9567 pmap_unnest(
9568 	pmap_t grand,
9569 	addr64_t vaddr,
9570 	uint64_t size)
9571 {
9572 	return pmap_unnest_options(grand, vaddr, size, 0);
9573 }
9574 
9575 /**
9576  * Undoes a prior pmap_nest() operation by removing a range of nesting mappings
9577  * from a top-level pmap ('grand').  The corresponding mappings in the nested
9578  * pmap will be marked non-global to avoid TLB conflicts with pmaps that may
9579  * still have the region nested.  The mappings in 'grand' will be left empty
9580  * with the assumption that they will be demand-filled by subsequent access faults.
9581  *
9582  * This function operates in 2 main phases:
9583  * 1. Iteration over the nested pmap's mappings for the specified range to mark
9584  *    them non-global.
9585  * 2. Clearing of the twig-level TTEs for the address range in grand.
9586  *
9587  * This function may return early due to pending AST_URGENT preemption; if so
9588  * it will indicate the need to be re-entered.
9589  *
9590  * @param grand pmap from which to unnest mappings
9591  * @param vaddr twig-aligned virtual address for the beginning of the nested range
9592  * @param size twig-aligned size of the nested range
9593  * @param vrestart the page-aligned starting address of the current call.  May contain
9594  *        PMAP_NEST_GRAND in bit 0 to indicate the operation should skip to step 2) above.
9595  * @param option Extra control flags; may contain PMAP_UNNEST_CLEAN to indicate that
9596  *        grand is being torn down and step 1) above is not needed.
9597  *
9598  * @return the virtual address at which to restart the operation, possibly including
9599  *         PMAP_NEST_GRAND to indicate the phase at which to restart.  If
9600  *         (vaddr + size) | PMAP_NEST_GRAND is returned, the operation completed.
9601  */
9602 MARK_AS_PMAP_TEXT vm_map_offset_t
9603 pmap_unnest_options_internal(
9604 	pmap_t grand,
9605 	addr64_t vaddr,
9606 	uint64_t size,
9607 	vm_map_offset_t vrestart,
9608 	unsigned int option)
9609 {
9610 	vm_map_offset_t start;
9611 	vm_map_offset_t addr;
9612 	tt_entry_t     *tte_p;
9613 	unsigned int    current_index;
9614 	unsigned int    start_index;
9615 	unsigned int    max_index;
9616 	unsigned int    entry_count = 0;
9617 
9618 	addr64_t vend;
9619 	addr64_t true_end;
9620 	if (__improbable(os_add_overflow(vaddr, size, &vend))) {
9621 		panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
9622 	}
9623 	if (__improbable(((vrestart & ~PMAP_NEST_GRAND) > vend) ||
9624 	    ((vrestart & ~PMAP_NEST_GRAND) < vaddr))) {
9625 		panic("%s: vrestart 0x%llx is outside range [0x%llx, 0x%llx)", __func__,
9626 		    (unsigned long long)vrestart, (unsigned long long)vaddr, (unsigned long long)vend);
9627 	}
9628 
9629 	validate_pmap_mutable(grand);
9630 
9631 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9632 
9633 	if (__improbable(((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL)) {
9634 		panic("%s: unaligned base address 0x%llx or size 0x%llx", __func__,
9635 		    (unsigned long long)vaddr, (unsigned long long)size);
9636 	}
9637 
9638 	if (__improbable(grand->nested_pmap == NULL)) {
9639 		panic("%s: %p has no nested pmap", __func__, grand);
9640 	}
9641 
9642 	true_end = vend;
9643 	if (true_end > grand->nested_pmap->nested_region_true_end) {
9644 		true_end = grand->nested_pmap->nested_region_true_end;
9645 	}
9646 
9647 	if (((option & PMAP_UNNEST_CLEAN) == 0) && !(vrestart & PMAP_NEST_GRAND)) {
9648 		if ((vaddr < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))) {
9649 			panic("%s: %p: unnest request to not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
9650 		}
9651 
9652 		pmap_lock(grand->nested_pmap, PMAP_LOCK_EXCLUSIVE);
9653 
9654 		start = vrestart;
9655 		if (start < grand->nested_pmap->nested_region_true_start) {
9656 			start = grand->nested_pmap->nested_region_true_start;
9657 		}
9658 		start_index = (unsigned int)((start - grand->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
9659 		max_index = (unsigned int)((true_end - grand->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
9660 		bool flush_tlb = false;
9661 
9662 		for (current_index = start_index, addr = start; current_index < max_index; current_index++) {
9663 			pt_entry_t  *bpte, *cpte;
9664 
9665 			vm_map_offset_t vlim = (addr + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
9666 
9667 			bpte = pmap_pte(grand->nested_pmap, addr);
9668 
9669 			/*
9670 			 * If we've re-entered this function partway through unnesting a leaf region, the
9671 			 * 'unnest' bit will be set in the ASID bitmap, but we won't have finished updating
9672 			 * the run of PTEs.  We therefore also need to check for a non-twig-aligned starting
9673 			 * address.
9674 			 */
9675 			if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap) ||
9676 			    (addr & pt_attr_twig_offmask(pt_attr))) {
9677 				/*
9678 				 * Mark the 'twig' region as being unnested.  Every mapping entered within
9679 				 * the nested pmap in this region will now be marked non-global.  Do this
9680 				 * before marking any of the PTEs within the region as non-global to avoid
9681 				 * the possibility of pmap_enter() subsequently inserting a global mapping
9682 				 * in the region, which could lead to a TLB conflict if a non-global entry
9683 				 * is later inserted for the same VA in a pmap which has fully unnested this
9684 				 * region.
9685 				 */
9686 				setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
9687 				for (cpte = bpte; (bpte != NULL) && (addr < vlim); cpte += PAGE_RATIO) {
9688 					pmap_paddr_t    pa;
9689 					unsigned int    pai = 0;
9690 					boolean_t               managed = FALSE;
9691 					pt_entry_t  spte;
9692 
9693 					if ((*cpte != ARM_PTE_TYPE_FAULT)
9694 					    && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
9695 						spte = *((volatile pt_entry_t*)cpte);
9696 						while (!managed) {
9697 							pa = pte_to_pa(spte);
9698 							if (!pa_valid(pa)) {
9699 								break;
9700 							}
9701 							pai = pa_index(pa);
9702 							pvh_lock(pai);
9703 							spte = *((volatile pt_entry_t*)cpte);
9704 							pa = pte_to_pa(spte);
9705 							if (pai == pa_index(pa)) {
9706 								managed = TRUE;
9707 								break; // Leave the PVH locked as we'll unlock it after we update the PTE
9708 							}
9709 							pvh_unlock(pai);
9710 						}
9711 
9712 						if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
9713 							write_pte_fast(cpte, (spte | ARM_PTE_NG));
9714 							flush_tlb = true;
9715 						}
9716 
9717 						if (managed) {
9718 							pvh_assert_locked(pai);
9719 							pvh_unlock(pai);
9720 						}
9721 					}
9722 
9723 					addr += (pt_attr_page_size(pt_attr) * PAGE_RATIO);
9724 					vrestart = addr;
9725 					++entry_count;
9726 					if (__improbable(!(entry_count % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
9727 					    pmap_pending_preemption())) {
9728 						goto unnest_subord_done;
9729 					}
9730 				}
9731 			}
9732 			addr = vlim;
9733 			vrestart = addr;
9734 			++entry_count;
9735 			if (__improbable(!(entry_count % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
9736 			    pmap_pending_preemption())) {
9737 				break;
9738 			}
9739 		}
9740 
9741 unnest_subord_done:
9742 		if (flush_tlb) {
9743 			FLUSH_PTE_STRONG();
9744 			PMAP_UPDATE_TLBS(grand->nested_pmap, start, vrestart, false, true);
9745 		}
9746 
9747 		pmap_unlock(grand->nested_pmap, PMAP_LOCK_EXCLUSIVE);
9748 		if (current_index < max_index) {
9749 			return vrestart;
9750 		}
9751 	}
9752 
9753 	pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9754 
9755 	/*
9756 	 * invalidate all pdes for segment at vaddr in pmap grand
9757 	 */
9758 	if (vrestart & PMAP_NEST_GRAND) {
9759 		addr = vrestart & ~PMAP_NEST_GRAND;
9760 		if (__improbable(addr & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
9761 			panic("%s: unaligned vrestart 0x%llx", __func__, (unsigned long long)addr);
9762 		}
9763 	} else {
9764 		addr = vaddr;
9765 		vrestart = vaddr | PMAP_NEST_GRAND;
9766 	}
9767 
9768 	if (addr < grand->nested_pmap->nested_region_true_start) {
9769 		addr = grand->nested_pmap->nested_region_true_start;
9770 	}
9771 
9772 	while (addr < true_end) {
9773 		tte_p = pmap_tte(grand, addr);
9774 		/*
9775 		 * The nested pmap may have been trimmed before pmap_nest() completed for grand,
9776 		 * so it's possible that a region we're trying to unnest may not have been
9777 		 * nested in the first place.
9778 		 */
9779 		if (tte_p != NULL) {
9780 			*tte_p = ARM_TTE_TYPE_FAULT;
9781 		}
9782 		addr += pt_attr_twig_size(pt_attr);
9783 		vrestart = addr | PMAP_NEST_GRAND;
9784 		++entry_count;
9785 		if (__improbable(!(entry_count % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
9786 		    pmap_pending_preemption())) {
9787 			break;
9788 		}
9789 	}
9790 	if (addr >= true_end) {
9791 		vrestart = vend | PMAP_NEST_GRAND;
9792 	}
9793 
9794 	FLUSH_PTE_STRONG();
9795 	PMAP_UPDATE_TLBS(grand, start, addr, false, false);
9796 
9797 	pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9798 
9799 	return vrestart;
9800 }
9801 
9802 kern_return_t
9803 pmap_unnest_options(
9804 	pmap_t grand,
9805 	addr64_t vaddr,
9806 	uint64_t size,
9807 	unsigned int option)
9808 {
9809 	vm_map_offset_t vrestart = (vm_map_offset_t)vaddr;
9810 	vm_map_offset_t vend = vaddr + size;
9811 	__unused vm_map_offset_t vlast = vrestart;
9812 
9813 	PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9814 	    VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9815 
9816 	pmap_verify_preemptible();
9817 	while (vrestart != (vend | PMAP_NEST_GRAND)) {
9818 #if XNU_MONITOR
9819 		vrestart = pmap_unnest_options_ppl(grand, vaddr, size, vrestart, option);
9820 		if (vrestart == vlast) {
9821 			panic("%s: failed to make forward progress from 0x%llx to 0x%llx at 0x%llx",
9822 			    __func__, (unsigned long long)vaddr, (unsigned long long)vend, (unsigned long long)vrestart);
9823 		}
9824 		vlast = vrestart;
9825 #else
9826 		vrestart = pmap_unnest_options_internal(grand, vaddr, size, vrestart, option);
9827 #endif
9828 	}
9829 
9830 	PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
9831 
9832 	return KERN_SUCCESS;
9833 }
9834 
9835 boolean_t
9836 pmap_adjust_unnest_parameters(
9837 	__unused pmap_t p,
9838 	__unused vm_map_offset_t *s,
9839 	__unused vm_map_offset_t *e)
9840 {
9841 	return TRUE; /* to get to log_unnest_badness()... */
9842 }
9843 
9844 #if PMAP_FORK_NEST
9845 /**
9846  * Perform any necessary pre-nesting of the parent's shared region at fork()
9847  * time.
9848  *
9849  * @note This should only be called from vm_map_fork().
9850  *
9851  * @param old_pmap The pmap of the parent task.
9852  * @param new_pmap The pmap of the child task.
9853  * @param nesting_start An output parameter that is updated with the start
9854  *                      address of the range that was pre-nested
9855  * @param nesting_end An output parameter that is updated with the end
9856  *                      address of the range that was pre-nested
9857  *
9858  * @return KERN_SUCCESS if the pre-nesting was succesfully completed.
9859  *         KERN_INVALID_ARGUMENT if the arguments were not valid.
9860  */
9861 kern_return_t
9862 pmap_fork_nest(
9863 	pmap_t old_pmap,
9864 	pmap_t new_pmap,
9865 	vm_map_offset_t *nesting_start,
9866 	vm_map_offset_t *nesting_end)
9867 {
9868 	if (old_pmap == NULL || new_pmap == NULL) {
9869 		return KERN_INVALID_ARGUMENT;
9870 	}
9871 	if (old_pmap->nested_pmap == NULL) {
9872 		return KERN_SUCCESS;
9873 	}
9874 	pmap_nest(new_pmap,
9875 	    old_pmap->nested_pmap,
9876 	    old_pmap->nested_region_addr,
9877 	    old_pmap->nested_region_size);
9878 	assertf(new_pmap->nested_pmap == old_pmap->nested_pmap &&
9879 	    new_pmap->nested_region_addr == old_pmap->nested_region_addr &&
9880 	    new_pmap->nested_region_size == old_pmap->nested_region_size,
9881 	    "nested new (%p,0x%llx,0x%llx) old (%p,0x%llx,0x%llx)",
9882 	    new_pmap->nested_pmap,
9883 	    new_pmap->nested_region_addr,
9884 	    new_pmap->nested_region_size,
9885 	    old_pmap->nested_pmap,
9886 	    old_pmap->nested_region_addr,
9887 	    old_pmap->nested_region_size);
9888 	*nesting_start = old_pmap->nested_region_addr;
9889 	*nesting_end = *nesting_start + old_pmap->nested_region_size;
9890 	return KERN_SUCCESS;
9891 }
9892 #endif /* PMAP_FORK_NEST */
9893 
9894 /*
9895  * disable no-execute capability on
9896  * the specified pmap
9897  */
9898 #if DEVELOPMENT || DEBUG
9899 void
9900 pmap_disable_NX(
9901 	pmap_t pmap)
9902 {
9903 	pmap->nx_enabled = FALSE;
9904 }
9905 #else
9906 void
9907 pmap_disable_NX(
9908 	__unused pmap_t pmap)
9909 {
9910 }
9911 #endif
9912 
9913 /*
9914  * flush a range of hardware TLB entries.
9915  * NOTE: assumes the smallest TLB entry in use will be for
9916  * an ARM small page (4K).
9917  */
9918 
9919 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9920 
9921 #if __ARM_RANGE_TLBI__
9922 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
9923 #define ARM64_FULL_TLB_FLUSH_THRESHOLD  ARM64_TLB_RANGE_PAGES
9924 #else
9925 #define ARM64_FULL_TLB_FLUSH_THRESHOLD  256
9926 #endif // __ARM_RANGE_TLBI__
9927 
9928 static void
9929 flush_mmu_tlb_region_asid_async(
9930 	vm_offset_t va,
9931 	size_t length,
9932 	pmap_t pmap,
9933 	bool last_level_only __unused)
9934 {
9935 	unsigned long pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
9936 	const uint64_t pmap_page_size = 1ULL << pmap_page_shift;
9937 	ppnum_t npages = (ppnum_t)(length >> pmap_page_shift);
9938 	uint32_t    asid;
9939 
9940 	asid = pmap->hw_asid;
9941 
9942 	if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
9943 		boolean_t       flush_all = FALSE;
9944 
9945 		if ((asid == 0) || (pmap->type == PMAP_TYPE_NESTED)) {
9946 			flush_all = TRUE;
9947 		}
9948 		if (flush_all) {
9949 			flush_mmu_tlb_async();
9950 		} else {
9951 			flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
9952 		}
9953 		return;
9954 	}
9955 #if __ARM_RANGE_TLBI__
9956 	if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
9957 		va = generate_rtlbi_param(npages, asid, va, pmap_page_shift);
9958 		if (pmap->type == PMAP_TYPE_NESTED) {
9959 			flush_mmu_tlb_allrange_async(va, last_level_only);
9960 		} else {
9961 			flush_mmu_tlb_range_async(va, last_level_only);
9962 		}
9963 		return;
9964 	}
9965 #endif
9966 	vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
9967 	va = tlbi_asid(asid) | tlbi_addr(va);
9968 
9969 	if (pmap->type == PMAP_TYPE_NESTED) {
9970 		flush_mmu_tlb_allentries_async(va, end, pmap_page_size, last_level_only);
9971 	} else {
9972 		flush_mmu_tlb_entries_async(va, end, pmap_page_size, last_level_only);
9973 	}
9974 }
9975 
9976 MARK_AS_PMAP_TEXT static void
9977 flush_mmu_tlb_full_asid_async(pmap_t pmap)
9978 {
9979 	flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
9980 }
9981 
9982 void
9983 flush_mmu_tlb_region(
9984 	vm_offset_t va,
9985 	unsigned length)
9986 {
9987 	flush_mmu_tlb_region_asid_async(va, length, kernel_pmap, true);
9988 	sync_tlb_flush();
9989 }
9990 
9991 unsigned int
9992 pmap_cache_attributes(
9993 	ppnum_t pn)
9994 {
9995 	pmap_paddr_t    paddr;
9996 	unsigned int    pai;
9997 	unsigned int    result;
9998 	pp_attr_t       pp_attr_current;
9999 
10000 	paddr = ptoa(pn);
10001 
10002 	assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
10003 
10004 	if (!pa_valid(paddr)) {
10005 		pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
10006 		return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
10007 	}
10008 
10009 	result = VM_WIMG_DEFAULT;
10010 
10011 	pai = pa_index(paddr);
10012 
10013 	pp_attr_current = pp_attr_table[pai];
10014 	if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10015 		result = pp_attr_current & PP_ATTR_WIMG_MASK;
10016 	}
10017 	return result;
10018 }
10019 
10020 MARK_AS_PMAP_TEXT static void
10021 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
10022 {
10023 	if ((wimg_bits_prev != wimg_bits_new)
10024 	    && ((wimg_bits_prev == VM_WIMG_COPYBACK)
10025 	    || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
10026 	    && (wimg_bits_new != VM_WIMG_COPYBACK))
10027 	    || ((wimg_bits_prev == VM_WIMG_WTHRU)
10028 	    && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
10029 		pmap_sync_page_attributes_phys(pn);
10030 	}
10031 
10032 	if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
10033 		pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
10034 	}
10035 }
10036 
10037 MARK_AS_PMAP_TEXT __unused void
10038 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
10039 {
10040 	pmap_paddr_t paddr = ptoa(pn);
10041 	const unsigned int pai = pa_index(paddr);
10042 
10043 	if (__improbable(!pa_valid(paddr))) {
10044 		panic("%s called on non-managed page 0x%08x", __func__, pn);
10045 	}
10046 
10047 	pvh_lock(pai);
10048 
10049 #if XNU_MONITOR
10050 	if (__improbable(ppattr_pa_test_monitor(paddr))) {
10051 		panic("%s invoked on PPL page 0x%08x", __func__, pn);
10052 	}
10053 #endif
10054 
10055 	pmap_update_cache_attributes_locked(pn, new_cacheattr, true);
10056 
10057 	pvh_unlock(pai);
10058 
10059 	pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
10060 }
10061 
10062 void *
10063 pmap_map_compressor_page(ppnum_t pn)
10064 {
10065 #if __ARM_PTE_PHYSMAP__
10066 	unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
10067 	if (cacheattr != VM_WIMG_DEFAULT) {
10068 #if XNU_MONITOR
10069 		pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
10070 #else
10071 		pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
10072 #endif
10073 	}
10074 #endif
10075 	return (void*)phystokv(ptoa(pn));
10076 }
10077 
10078 void
10079 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
10080 {
10081 #if __ARM_PTE_PHYSMAP__
10082 	unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
10083 	if (cacheattr != VM_WIMG_DEFAULT) {
10084 #if XNU_MONITOR
10085 		pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
10086 #else
10087 		pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
10088 #endif
10089 	}
10090 #endif
10091 }
10092 
10093 /**
10094  * Batch updates the cache attributes of a list of pages. This is a wrapper for
10095  * the ppl call on PPL-enabled platforms or the _internal helper on other platforms.
10096  *
10097  * @param user_page_list List of pages to be updated.
10098  * @param page_cnt Number of pages in total in user_page_list.
10099  * @param cacheattr The new cache attribute.
10100  *
10101  * @return Success if true is returned.
10102  */
10103 bool
10104 pmap_batch_set_cache_attributes(
10105 	upl_page_info_array_t user_page_list,
10106 	unsigned int page_cnt,
10107 	unsigned int cacheattr)
10108 {
10109 	PMAP_TRACE(2, PMAP_CODE(PMAP__BATCH_UPDATE_CACHING) | DBG_FUNC_START, page_cnt, cacheattr, 0xCECC0DE0);
10110 
10111 	batch_set_cache_attr_state_t states;
10112 	states.page_index = 0;
10113 	states.state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_UPDATE_PASS;
10114 	states.tlb_flush_pass_needed = false;
10115 	states.rt_cache_flush_pass_needed = false;
10116 
10117 	/* Verify we are being called from a preemptible context. */
10118 	pmap_verify_preemptible();
10119 
10120 	while (states.state != PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE) {
10121 #if XNU_MONITOR
10122 		states = pmap_batch_set_cache_attributes_ppl((volatile upl_page_info_t *) user_page_list, states, page_cnt, cacheattr);
10123 #else /* !XNU_MONITOR */
10124 		states = pmap_batch_set_cache_attributes_internal(user_page_list, states, page_cnt, cacheattr);
10125 #endif /* XNU_MONITOR */
10126 	}
10127 
10128 	PMAP_TRACE(2, PMAP_CODE(PMAP__BATCH_UPDATE_CACHING) | DBG_FUNC_END, page_cnt, cacheattr, 0xCECC0DEF);
10129 	return true;
10130 }
10131 
10132 /**
10133  * Flushes TLB entries associated with the page numbered by pn, but do not
10134  * issue barriers yet.
10135  *
10136  * @param paddr The physical address to be flushed from TLB. Must be a managed address.
10137  */
10138 MARK_AS_PMAP_TEXT static void
10139 pmap_flush_tlb_for_paddr_locked_async(pmap_paddr_t paddr)
10140 {
10141 #if __ARM_PTE_PHYSMAP__
10142 	/* Flush the physical aperture mappings. */
10143 	const vm_offset_t kva = phystokv(paddr);
10144 	flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap, true);
10145 #endif /* __ARM_PTE_PHYSMAP__ */
10146 
10147 	/* Flush the mappings tracked in the ptes. */
10148 	const unsigned int pai = pa_index(paddr);
10149 	pv_entry_t **pv_h = pai_to_pvh(pai);
10150 
10151 	pt_entry_t *pte_p = PT_ENTRY_NULL;
10152 	pv_entry_t *pve_p = PV_ENTRY_NULL;
10153 
10154 	pvh_assert_locked(pai);
10155 
10156 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10157 		pte_p = pvh_ptep(pv_h);
10158 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10159 		pve_p = pvh_pve_list(pv_h);
10160 		pte_p = PT_ENTRY_NULL;
10161 	}
10162 
10163 	int pve_ptep_idx = 0;
10164 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10165 		if (pve_p != PV_ENTRY_NULL) {
10166 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
10167 			if (pte_p == PT_ENTRY_NULL) {
10168 				goto flush_tlb_skip_pte;
10169 			}
10170 		}
10171 
10172 #ifdef PVH_FLAG_IOMMU
10173 		if (pvh_ptep_is_iommu(pte_p)) {
10174 			goto flush_tlb_skip_pte;
10175 		}
10176 #endif /* PVH_FLAG_IOMMU */
10177 		pmap_t pmap = ptep_get_pmap(pte_p);
10178 		vm_map_address_t va = ptep_get_va(pte_p);
10179 
10180 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
10181 
10182 flush_tlb_skip_pte:
10183 		pte_p = PT_ENTRY_NULL;
10184 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
10185 			pve_ptep_idx = 0;
10186 			pve_p = pve_next(pve_p);
10187 		}
10188 	}
10189 }
10190 
10191 /**
10192  * Updates the pp_attr_table entry indexed by pai with cacheattr atomically.
10193  *
10194  * @param pai The Physical Address Index of the entry.
10195  * @param cacheattr The new cache attribute.
10196  */
10197 MARK_AS_PMAP_TEXT static void
10198 pmap_update_pp_attr_wimg_bits_locked(unsigned int pai, unsigned int cacheattr)
10199 {
10200 	pvh_assert_locked(pai);
10201 
10202 	pp_attr_t pp_attr_current, pp_attr_template;
10203 	do {
10204 		pp_attr_current = pp_attr_table[pai];
10205 		pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr);
10206 
10207 		/**
10208 		 * WIMG bits should only be updated under the PVH lock, but we should do
10209 		 * this in a CAS loop to avoid losing simultaneous updates to other bits like refmod.
10210 		 */
10211 	} while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
10212 }
10213 
10214 /**
10215  * Batch updates the cache attributes of a list of pages in three passes.
10216  *
10217  * In pass one, the pp_attr_table and the pte are updated for the pages in the list.
10218  * In pass two, TLB entries are flushed for each page in the list if necessary.
10219  * In pass three, caches are cleaned for each page in the list if necessary.
10220  *
10221  * When running in PPL, this function may decide to return to the caller in response
10222  * to AST_URGENT.
10223  *
10224  * @param user_page_list List of pages to be updated.
10225  * @param states The state of the state machine. See definition of batch_set_cache_attr_state_t.
10226  * @param page_cnt Number of pages in total in user_page_list.
10227  * @param cacheattr The new cache attributes.
10228  *
10229  * @return The new state of the state machine.
10230  */
10231 MARK_AS_PMAP_TEXT batch_set_cache_attr_state_t
10232 pmap_batch_set_cache_attributes_internal(
10233 #if XNU_MONITOR
10234 	volatile upl_page_info_t *user_page_list,
10235 #else /* !XNU_MONITOR */
10236 	upl_page_info_array_t user_page_list,
10237 #endif /* XNU_MONITOR */
10238 	batch_set_cache_attr_state_t states,
10239 	unsigned int page_cnt,
10240 	unsigned int cacheattr)
10241 {
10242 	uint64_t page_index = states.page_index;
10243 	uint64_t state = states.state;
10244 	bool tlb_flush_pass_needed = !!(states.tlb_flush_pass_needed);
10245 	bool rt_cache_flush_pass_needed = !!(states.rt_cache_flush_pass_needed);
10246 
10247 	/* For verifying progress. */
10248 	__assert_only const uint64_t page_index_old = page_index;
10249 	__assert_only const uint64_t state_old = state;
10250 
10251 	/* Assert page_index and state are within their range. */
10252 	if (!(page_index < page_cnt && state < PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE)) {
10253 		panic("%s: invalid input; page_index: %llu, page_cnt: %u, state: %llu", __func__, page_index, page_cnt, state);
10254 	}
10255 
10256 	if (state == PMAP_BATCH_SET_CACHE_ATTRIBUTES_UPDATE_PASS) {
10257 		PMAP_TRACE(2, PMAP_CODE(PMAP__BATCH_UPDATE_CACHING), page_cnt, cacheattr, 0xCECC0DE1, page_index);
10258 		/* Update cache attributes of the pages until there's an urgent AST or it's done. */
10259 		while (page_index < page_cnt) {
10260 			const ppnum_t pn = user_page_list[page_index].phys_addr;
10261 			const pmap_paddr_t paddr = ptoa(pn);
10262 
10263 			if (!pa_valid(paddr)) {
10264 				panic("%s: page is not managed; addr: 0x%016llx", __func__, paddr);
10265 			}
10266 
10267 			const unsigned int pai = pa_index(paddr);
10268 
10269 			/* Lock the page. */
10270 			pvh_lock(pai);
10271 
10272 #if XNU_MONITOR
10273 			if (ppattr_pa_test_monitor(paddr)) {
10274 				panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
10275 			}
10276 #endif /* XNU_MONITOR */
10277 			const pp_attr_t pp_attr_current = pp_attr_table[pai];
10278 
10279 			unsigned int wimg_bits_prev = VM_WIMG_DEFAULT;
10280 			if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10281 				wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
10282 			}
10283 
10284 			const pp_attr_t pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr);
10285 
10286 			unsigned int wimg_bits_new = VM_WIMG_DEFAULT;
10287 			if (pp_attr_template & PP_ATTR_WIMG_MASK) {
10288 				wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
10289 			}
10290 
10291 			/* Update the cache attributes in PTE. */
10292 			if (wimg_bits_new != wimg_bits_prev) {
10293 				tlb_flush_pass_needed |= pmap_update_cache_attributes_locked(pn, cacheattr, false);
10294 			}
10295 
10296 			if (wimg_bits_new == VM_WIMG_RT && wimg_bits_prev != VM_WIMG_RT) {
10297 				rt_cache_flush_pass_needed = true;
10298 			}
10299 
10300 			pmap_update_pp_attr_wimg_bits_locked(pai, cacheattr);
10301 			pvh_unlock(pai);
10302 
10303 			page_index++;
10304 
10305 #if XNU_MONITOR
10306 			/**
10307 			 * Check for AST_URGENT every page, as the pve list search in cache
10308 			 * update can take non-constant time.
10309 			 */
10310 			if (__improbable(pmap_pending_preemption() && (page_index < page_cnt))) {
10311 				goto pbscai_exit;
10312 			}
10313 #endif /* XNU_MONITOR */
10314 		}
10315 
10316 		/* page_index == page_cnt && !pmap_pending_preemption() */
10317 		if (tlb_flush_pass_needed) {
10318 			state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_TLBFLUSH_PASS;
10319 		} else if (rt_cache_flush_pass_needed) {
10320 			state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_CACHEFLUSH_PASS;
10321 		} else {
10322 			state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE;
10323 		}
10324 		page_index = 0;
10325 
10326 		/* Sync the PTE writes before potential TLB/Cache flushes. */
10327 		FLUSH_PTE_STRONG();
10328 
10329 #if XNU_MONITOR
10330 		if (__improbable(pmap_pending_preemption())) {
10331 			goto pbscai_exit;
10332 		}
10333 #endif /* XNU_MONITOR */
10334 	}
10335 
10336 	if (state == PMAP_BATCH_SET_CACHE_ATTRIBUTES_TLBFLUSH_PASS) {
10337 		/**
10338 		 * Pass 2: for each physical page and for each mapping, we need to flush
10339 		 * the TLB for it.
10340 		 */
10341 		PMAP_TRACE(2, PMAP_CODE(PMAP__BATCH_UPDATE_CACHING), page_cnt, cacheattr, 0xCECC0DE2, page_index);
10342 		while (page_index < page_cnt) {
10343 			const ppnum_t pn = user_page_list[page_index].phys_addr;
10344 
10345 			const pmap_paddr_t paddr = ptoa(pn);
10346 			if (!pa_valid(paddr)) {
10347 				panic("%s: page is not managed; addr: 0x%016llx", __func__, paddr);
10348 			}
10349 
10350 			const unsigned int pai = pa_index(paddr);
10351 
10352 			pvh_lock(pai);
10353 			pmap_flush_tlb_for_paddr_locked_async(paddr);
10354 			pvh_unlock(pai);
10355 
10356 			page_index++;
10357 
10358 #if XNU_MONITOR
10359 			/**
10360 			 * Check for AST_URGENT every page, as the pve list search in cache
10361 			 * update can take non-constant time.
10362 			 */
10363 			if (__improbable(pmap_pending_preemption() && (page_index < page_cnt))) {
10364 				goto pbscai_exit;
10365 			}
10366 #endif /* XNU_MONITOR */
10367 		}
10368 
10369 		arm64_sync_tlb((cacheattr & VM_WIMG_MASK) == VM_WIMG_RT);
10370 
10371 		if (rt_cache_flush_pass_needed) {
10372 			state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_CACHEFLUSH_PASS;
10373 		} else {
10374 			state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE;
10375 		}
10376 		page_index = 0;
10377 
10378 #if XNU_MONITOR
10379 		if (__improbable(pmap_pending_preemption())) {
10380 			goto pbscai_exit;
10381 		}
10382 #endif /* XNU_MONITOR */
10383 	}
10384 
10385 	if (state == PMAP_BATCH_SET_CACHE_ATTRIBUTES_CACHEFLUSH_PASS) {
10386 		/* Pass 3: Flush the cache if the page is recently set to RT */
10387 		PMAP_TRACE(2, PMAP_CODE(PMAP__BATCH_UPDATE_CACHING), page_cnt, cacheattr, 0xCECC0DE3, page_index);
10388 #if !XNU_MONITOR
10389 		/**
10390 		 * On non-PPL platforms, we disable preemption to ensure we are not preempted
10391 		 * in the state where DC by VA instructions remain enabled.
10392 		 */
10393 		disable_preemption();
10394 #endif /* !XNU_MONITOR */
10395 
10396 		assert(get_preemption_level() > 0);
10397 
10398 #if defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM
10399 		/**
10400 		 * On APPLEVIRTUALPLATFORM, HID register accesses cause a synchronous exception
10401 		 * and the host will handle cache maintenance for it. So we don't need to
10402 		 * worry about enabling the ops here for AVP.
10403 		 */
10404 		enable_dc_mva_ops();
10405 #endif /* defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM */
10406 
10407 		while (page_index < page_cnt) {
10408 			const pmap_paddr_t paddr = ptoa(user_page_list[page_index].phys_addr);
10409 
10410 			if (!pa_valid(paddr)) {
10411 				panic("%s: page is not managed; addr: 0x%016llx", __func__, paddr);
10412 			}
10413 
10414 			CleanPoC_DcacheRegion_Force_nopreempt_nohid(phystokv(paddr), PAGE_SIZE);
10415 
10416 			page_index++;
10417 
10418 #if XNU_MONITOR
10419 			if (__improbable(pmap_pending_preemption() && (page_index < page_cnt))) {
10420 #if defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM
10421 				disable_dc_mva_ops();
10422 #endif /* defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM */
10423 				goto pbscai_exit;
10424 			}
10425 #endif /* XNU_MONITOR */
10426 		}
10427 
10428 #if defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM
10429 		disable_dc_mva_ops();
10430 #endif /* defined(APPLE_ARM64_ARCH_FAMILY) && !APPLEVIRTUALPLATFORM */
10431 
10432 #if !XNU_MONITOR
10433 		enable_preemption();
10434 #endif /* !XNU_MONITOR */
10435 
10436 		state = PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE;
10437 		page_index = 0;
10438 	}
10439 
10440 #if XNU_MONITOR
10441 pbscai_exit:
10442 #endif /* XNU_MONITOR */
10443 	/* Assert page_index and state are within their range. */
10444 	assert(page_index < page_cnt || state == PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE);
10445 
10446 	/* Make sure we are making progress in this call. */
10447 	assert(page_index > page_index_old || state > state_old);
10448 
10449 	batch_set_cache_attr_state_t states_new;
10450 	states_new.page_index = page_index;
10451 	states_new.state = state;
10452 	states_new.tlb_flush_pass_needed = tlb_flush_pass_needed ? 1 : 0;
10453 	states_new.rt_cache_flush_pass_needed = rt_cache_flush_pass_needed ? 1 : 0;
10454 	return states_new;
10455 }
10456 
10457 MARK_AS_PMAP_TEXT static void
10458 pmap_set_cache_attributes_priv(
10459 	ppnum_t pn,
10460 	unsigned int cacheattr,
10461 	boolean_t external __unused)
10462 {
10463 	pmap_paddr_t    paddr;
10464 	unsigned int    pai;
10465 	pp_attr_t       pp_attr_current;
10466 	pp_attr_t       pp_attr_template;
10467 	unsigned int    wimg_bits_prev, wimg_bits_new;
10468 
10469 	paddr = ptoa(pn);
10470 
10471 	if (!pa_valid(paddr)) {
10472 		return;                         /* Not a managed page. */
10473 	}
10474 
10475 	if (cacheattr & VM_WIMG_USE_DEFAULT) {
10476 		cacheattr = VM_WIMG_DEFAULT;
10477 	}
10478 
10479 	pai = pa_index(paddr);
10480 
10481 	pvh_lock(pai);
10482 
10483 #if XNU_MONITOR
10484 	if (external && ppattr_pa_test_monitor(paddr)) {
10485 		panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
10486 	} else if (!external && !ppattr_pa_test_monitor(paddr)) {
10487 		panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
10488 	}
10489 #endif
10490 
10491 	do {
10492 		pp_attr_current = pp_attr_table[pai];
10493 		wimg_bits_prev = VM_WIMG_DEFAULT;
10494 		if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10495 			wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
10496 		}
10497 
10498 		pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
10499 
10500 		/**
10501 		 * WIMG bits should only be updated under the PVH lock, but we should do
10502 		 * this in a CAS loop to avoid losing simultaneous updates to other bits like refmod.
10503 		 */
10504 	} while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
10505 
10506 	wimg_bits_new = VM_WIMG_DEFAULT;
10507 	if (pp_attr_template & PP_ATTR_WIMG_MASK) {
10508 		wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
10509 	}
10510 
10511 	if (wimg_bits_new != wimg_bits_prev) {
10512 		pmap_update_cache_attributes_locked(pn, cacheattr, true);
10513 	}
10514 
10515 	pvh_unlock(pai);
10516 
10517 	pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
10518 }
10519 
10520 MARK_AS_PMAP_TEXT void
10521 pmap_set_cache_attributes_internal(
10522 	ppnum_t pn,
10523 	unsigned int cacheattr)
10524 {
10525 	pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
10526 }
10527 
10528 void
10529 pmap_set_cache_attributes(
10530 	ppnum_t pn,
10531 	unsigned int cacheattr)
10532 {
10533 #if XNU_MONITOR
10534 	pmap_set_cache_attributes_ppl(pn, cacheattr);
10535 #else
10536 	pmap_set_cache_attributes_internal(pn, cacheattr);
10537 #endif
10538 }
10539 
10540 /**
10541  * Updates the page numbered ppnum to have attribute specified by attributes.
10542  * If a TLB flush is necessary, it will be performed if perform_tlbi is true.
10543  * The necessity of the TLB flush is returned in case this function is called
10544  * in a batched manner and the TLB flush is intended to be done at a different
10545  * timing.
10546  *
10547  * @param ppnum Page Number of the page to be updated.
10548  * @param attributes The new cache attributes.
10549  * @param perform_tlbi When a TLB flush is needed, whether to perform the tlbi
10550  *        immediately.
10551  *
10552  * @return Returns true if a TLB flush is needed for this update regardless of
10553  *         whether a flush has occurred already.
10554  */
10555 MARK_AS_PMAP_TEXT bool
10556 pmap_update_cache_attributes_locked(
10557 	ppnum_t ppnum,
10558 	unsigned attributes,
10559 	bool perform_tlbi)
10560 {
10561 	pmap_paddr_t    phys = ptoa(ppnum);
10562 	pv_entry_t      *pve_p;
10563 	pt_entry_t      *pte_p;
10564 	pv_entry_t      **pv_h;
10565 	pt_entry_t      tmplate;
10566 	unsigned int    pai;
10567 	boolean_t       tlb_flush_needed = false;
10568 
10569 	PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
10570 
10571 	if (pmap_panic_dev_wimg_on_managed) {
10572 		switch (attributes & VM_WIMG_MASK) {
10573 		case VM_WIMG_IO:                        // nGnRnE
10574 		case VM_WIMG_POSTED:                    // nGnRE
10575 		/* supported on DRAM, but slow, so we disallow */
10576 
10577 		case VM_WIMG_POSTED_REORDERED:          // nGRE
10578 		case VM_WIMG_POSTED_COMBINED_REORDERED: // GRE
10579 			/* unsupported on DRAM */
10580 
10581 			panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
10582 			    __FUNCTION__, attributes & VM_WIMG_MASK, ppnum);
10583 			break;
10584 
10585 		default:
10586 			/* not device type memory, all good */
10587 
10588 			break;
10589 		}
10590 	}
10591 
10592 #if __ARM_PTE_PHYSMAP__
10593 	vm_offset_t kva = phystokv(phys);
10594 	pte_p = pmap_pte(kernel_pmap, kva);
10595 
10596 	tmplate = *pte_p;
10597 	tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
10598 #if XNU_MONITOR
10599 	tmplate |= (wimg_to_pte(attributes, phys) & ~ARM_PTE_XPRR_MASK);
10600 #else
10601 	tmplate |= wimg_to_pte(attributes, phys);
10602 #endif
10603 	if (tmplate & ARM_PTE_HINT_MASK) {
10604 		panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
10605 		    __FUNCTION__, pte_p, (void *)kva, tmplate);
10606 	}
10607 
10608 	if (perform_tlbi) {
10609 		write_pte_strong(pte_p, tmplate);
10610 		flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap, true);
10611 	} else {
10612 		write_pte_fast(pte_p, tmplate);
10613 	}
10614 	tlb_flush_needed = true;
10615 #endif
10616 
10617 	pai = pa_index(phys);
10618 
10619 	pv_h = pai_to_pvh(pai);
10620 
10621 	pte_p = PT_ENTRY_NULL;
10622 	pve_p = PV_ENTRY_NULL;
10623 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10624 		pte_p = pvh_ptep(pv_h);
10625 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10626 		pve_p = pvh_pve_list(pv_h);
10627 		pte_p = PT_ENTRY_NULL;
10628 	}
10629 
10630 	int pve_ptep_idx = 0;
10631 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10632 		vm_map_address_t va;
10633 		pmap_t          pmap;
10634 
10635 		if (pve_p != PV_ENTRY_NULL) {
10636 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
10637 			if (pte_p == PT_ENTRY_NULL) {
10638 				goto cache_skip_pve;
10639 			}
10640 		}
10641 
10642 #ifdef PVH_FLAG_IOMMU
10643 		if (pvh_ptep_is_iommu(pte_p)) {
10644 			goto cache_skip_pve;
10645 		}
10646 #endif
10647 		pmap = ptep_get_pmap(pte_p);
10648 		va = ptep_get_va(pte_p);
10649 
10650 		tmplate = *pte_p;
10651 		tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
10652 		tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes, phys);
10653 
10654 		if (perform_tlbi) {
10655 			write_pte_strong(pte_p, tmplate);
10656 			pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
10657 		} else {
10658 			write_pte_fast(pte_p, tmplate);
10659 		}
10660 		tlb_flush_needed = true;
10661 
10662 cache_skip_pve:
10663 		pte_p = PT_ENTRY_NULL;
10664 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
10665 			pve_ptep_idx = 0;
10666 			pve_p = pve_next(pve_p);
10667 		}
10668 	}
10669 	if (perform_tlbi && tlb_flush_needed) {
10670 		arm64_sync_tlb((attributes & VM_WIMG_MASK) == VM_WIMG_RT);
10671 	}
10672 
10673 	PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
10674 
10675 	return tlb_flush_needed;
10676 }
10677 
10678 /**
10679  * Mark a pmap as being dedicated to use for a commpage mapping.
10680  * The pmap itself will never be activated on a CPU; its mappings will
10681  * only be embedded in userspace pmaps at a fixed virtual address.
10682  *
10683  * @param pmap the pmap to mark as belonging to a commpage.
10684  */
10685 static void
10686 pmap_set_commpage(pmap_t pmap)
10687 {
10688 #if XNU_MONITOR
10689 	assert(!pmap_ppl_locked_down);
10690 #endif
10691 	assert(pmap->type == PMAP_TYPE_USER);
10692 	pmap->type = PMAP_TYPE_COMMPAGE;
10693 	/*
10694 	 * Free the pmap's ASID.  This pmap should not ever be directly
10695 	 * activated in a CPU's TTBR.  Freeing the ASID will not only reduce
10696 	 * ASID space contention but will also cause pmap_switch() to panic
10697 	 * if an attacker tries to activate this pmap.  Disable preemption to
10698 	 * accommodate the *_nopreempt spinlock in free_asid().
10699 	 */
10700 	mp_disable_preemption();
10701 	pmap_get_pt_ops(pmap)->free_id(pmap);
10702 	mp_enable_preemption();
10703 }
10704 
10705 static void
10706 pmap_update_tt3e(
10707 	pmap_t pmap,
10708 	vm_address_t address,
10709 	tt_entry_t template)
10710 {
10711 	tt_entry_t *ptep, pte;
10712 
10713 	ptep = pmap_tt3e(pmap, address);
10714 	if (ptep == NULL) {
10715 		panic("%s: no ptep?", __FUNCTION__);
10716 	}
10717 
10718 	pte = *ptep;
10719 	pte = tte_to_pa(pte) | template;
10720 	write_pte_strong(ptep, pte);
10721 }
10722 
10723 /* Note absence of non-global bit */
10724 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10725 	        | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10726 	        | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
10727 	        | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10728 
10729 /* Note absence of non-global bit and no-execute bit.  */
10730 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10731 	        | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10732 	        | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
10733 	        | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10734 
10735 void
10736 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
10737     vm_map_address_t *kernel_ro_data_addr, vm_map_address_t *user_text_addr)
10738 {
10739 	kern_return_t kr;
10740 	pmap_paddr_t data_pa = 0; // data address
10741 	pmap_paddr_t ro_data_pa = 0; // kernel read-only data address
10742 	pmap_paddr_t text_pa = 0; // text address
10743 
10744 	*kernel_data_addr = 0;
10745 	*kernel_text_addr = 0;
10746 	*user_text_addr = 0;
10747 
10748 #if XNU_MONITOR
10749 	data_pa = pmap_alloc_page_for_kern(0);
10750 	assert(data_pa);
10751 	memset((char *) phystokv(data_pa), 0, PAGE_SIZE);
10752 	ro_data_pa = pmap_alloc_page_for_kern(0);
10753 	assert(ro_data_pa);
10754 	memset((char *) phystokv(ro_data_pa), 0, PAGE_SIZE);
10755 #if CONFIG_ARM_PFZ
10756 	text_pa = pmap_alloc_page_for_kern(0);
10757 	assert(text_pa);
10758 	memset((char *) phystokv(text_pa), 0, PAGE_SIZE);
10759 #endif
10760 
10761 #else /* XNU_MONITOR */
10762 	(void) pmap_pages_alloc_zeroed(&data_pa, PAGE_SIZE, 0);
10763 	/*
10764 	 * For non-PPL devices, we have neither page lockdown nor a physical aperture
10765 	 * mapped at page granularity, so a separate page for kernel RO data would not
10766 	 * be useful.
10767 	 */
10768 	ro_data_pa = data_pa;
10769 #if CONFIG_ARM_PFZ
10770 	(void) pmap_pages_alloc_zeroed(&text_pa, PAGE_SIZE, 0);
10771 #endif
10772 
10773 #endif /* XNU_MONITOR */
10774 
10775 	/*
10776 	 * In order to avoid burning extra pages on mapping the shared page, we
10777 	 * create a dedicated pmap for the shared page.  We forcibly nest the
10778 	 * translation tables from this pmap into other pmaps.  The level we
10779 	 * will nest at depends on the MMU configuration (page size, TTBR range,
10780 	 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
10781 	 *
10782 	 * Note that this is NOT "the nested pmap" (which is used to nest the
10783 	 * shared cache).
10784 	 *
10785 	 * Note that we update parameters of the entry for our unique needs (NG
10786 	 * entry, etc.).
10787 	 */
10788 	sharedpage_pmap_default = pmap_create_options(NULL, 0x0, 0);
10789 	assert(sharedpage_pmap_default != NULL);
10790 	pmap_set_commpage(sharedpage_pmap_default);
10791 
10792 	/* The user 64-bit mappings... */
10793 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10794 	assert(kr == KERN_SUCCESS);
10795 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10796 
10797 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_RO_ADDRESS, ro_data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10798 	assert(kr == KERN_SUCCESS);
10799 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_RO_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10800 #if CONFIG_ARM_PFZ
10801 	/* User mapping of comm page text section for 64 bit mapping only
10802 	 *
10803 	 * We don't insert it into the 32 bit mapping because we don't want 32 bit
10804 	 * user processes to get this page mapped in, they should never call into
10805 	 * this page.
10806 	 *
10807 	 * The data comm page is in a pre-reserved L3 VA range and the text commpage
10808 	 * is slid in the same L3 as the data commpage.  It is either outside the
10809 	 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
10810 	 * it is reserved and unavailable to mach VM for future mappings.
10811 	 */
10812 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(sharedpage_pmap_default);
10813 	int num_ptes = pt_attr_leaf_size(pt_attr) >> PTE_SHIFT;
10814 
10815 	vm_map_address_t commpage_text_va = 0;
10816 
10817 	do {
10818 		int text_leaf_index = random() % num_ptes;
10819 
10820 		// Generate a VA for the commpage text with the same root and twig index as data
10821 		// comm page, but with new leaf index we've just generated.
10822 		commpage_text_va = (_COMM_PAGE64_BASE_ADDRESS & ~pt_attr_leaf_index_mask(pt_attr));
10823 		commpage_text_va |= (text_leaf_index << pt_attr_leaf_shift(pt_attr));
10824 	} while ((commpage_text_va == _COMM_PAGE64_BASE_ADDRESS) || (commpage_text_va == _COMM_PAGE64_RO_ADDRESS)); // Try again if we collide (should be unlikely)
10825 
10826 	// Assert that this is empty
10827 	__assert_only pt_entry_t *ptep = pmap_pte(sharedpage_pmap_default, commpage_text_va);
10828 	assert(ptep != PT_ENTRY_NULL);
10829 	assert(*ptep == ARM_TTE_EMPTY);
10830 
10831 	// At this point, we've found the address we want to insert our comm page at
10832 	kr = pmap_enter_addr(sharedpage_pmap_default, commpage_text_va, text_pa, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10833 	assert(kr == KERN_SUCCESS);
10834 	// Mark it as global page R/X so that it doesn't get thrown out on tlb flush
10835 	pmap_update_tt3e(sharedpage_pmap_default, commpage_text_va, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE);
10836 
10837 	*user_text_addr = commpage_text_va;
10838 #endif
10839 
10840 	/* ...and the user 32-bit mappings. */
10841 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10842 	assert(kr == KERN_SUCCESS);
10843 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10844 
10845 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_RO_ADDRESS, ro_data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10846 	assert(kr == KERN_SUCCESS);
10847 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_RO_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10848 #if __ARM_MIXED_PAGE_SIZE__
10849 	/**
10850 	 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
10851 	 * new set of page tables that point to the exact same 16K shared page as
10852 	 * before. Only the first 4K of the 16K shared page is mapped since that's
10853 	 * the only part that contains relevant data.
10854 	 */
10855 	sharedpage_pmap_4k = pmap_create_options(NULL, 0x0, PMAP_CREATE_FORCE_4K_PAGES);
10856 	assert(sharedpage_pmap_4k != NULL);
10857 	pmap_set_commpage(sharedpage_pmap_4k);
10858 
10859 	/* The user 64-bit mappings... */
10860 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10861 	assert(kr == KERN_SUCCESS);
10862 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10863 
10864 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_RO_ADDRESS, ro_data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10865 	assert(kr == KERN_SUCCESS);
10866 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_RO_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10867 
10868 	/* ...and the user 32-bit mapping. */
10869 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10870 	assert(kr == KERN_SUCCESS);
10871 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10872 
10873 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_RO_ADDRESS, ro_data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10874 	assert(kr == KERN_SUCCESS);
10875 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_RO_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10876 #endif
10877 
10878 	/* For manipulation in kernel, go straight to physical page */
10879 	*kernel_data_addr = phystokv(data_pa);
10880 	assert(sharedpage_ro_data_kva == 0);
10881 	*kernel_ro_data_addr = sharedpage_ro_data_kva = phystokv(ro_data_pa);
10882 	assert(sharedpage_text_kva == 0);
10883 	*kernel_text_addr = sharedpage_text_kva = (text_pa ? phystokv(text_pa) : 0);
10884 }
10885 
10886 
10887 /*
10888  * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
10889  * with user controlled TTEs for regions that aren't explicitly reserved by the
10890  * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
10891  */
10892 #if (ARM_PGSHIFT == 14)
10893 /**
10894  * Ensure that 64-bit devices with 32-bit userspace VAs (arm64_32) can nest the
10895  * commpage completely above the maximum 32-bit userspace VA.
10896  */
10897 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
10898 
10899 /**
10900  * Normally there'd be an assert to check that 64-bit devices with 64-bit
10901  * userspace VAs can nest the commpage completely above the maximum 64-bit
10902  * userpace VA, but that technically isn't true on macOS. On those systems, the
10903  * commpage lives within the userspace VA range, but is protected by the VM as
10904  * a reserved region (see vm_reserved_regions[] definition for more info).
10905  */
10906 
10907 #elif (ARM_PGSHIFT == 12)
10908 /**
10909  * Ensure that 64-bit devices using 4K pages can nest the commpage completely
10910  * above the maximum userspace VA.
10911  */
10912 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
10913 #else
10914 #error Nested shared page mapping is unsupported on this config
10915 #endif
10916 
10917 MARK_AS_PMAP_TEXT kern_return_t
10918 pmap_insert_sharedpage_internal(
10919 	pmap_t pmap)
10920 {
10921 	kern_return_t kr = KERN_SUCCESS;
10922 	vm_offset_t sharedpage_vaddr;
10923 	pt_entry_t *ttep, *src_ttep;
10924 	int options = 0;
10925 	pmap_t sharedpage_pmap = sharedpage_pmap_default;
10926 
10927 	/* Validate the pmap input before accessing its data. */
10928 	validate_pmap_mutable(pmap);
10929 
10930 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10931 	const unsigned int sharedpage_level = pt_attr_commpage_level(pt_attr);
10932 
10933 #if __ARM_MIXED_PAGE_SIZE__
10934 #if !__ARM_16K_PG__
10935 	/* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
10936 	#error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
10937 #endif /* !__ARM_16K_PG__ */
10938 
10939 	/* Choose the correct shared page pmap to use. */
10940 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
10941 	if (pmap_page_size == 16384) {
10942 		sharedpage_pmap = sharedpage_pmap_default;
10943 	} else if (pmap_page_size == 4096) {
10944 		sharedpage_pmap = sharedpage_pmap_4k;
10945 	} else {
10946 		panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
10947 	}
10948 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10949 
10950 #if XNU_MONITOR
10951 	options |= PMAP_OPTIONS_NOWAIT;
10952 #endif /* XNU_MONITOR */
10953 
10954 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10955 #error We assume a single page.
10956 #endif
10957 
10958 	if (pmap_is_64bit(pmap)) {
10959 		sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
10960 	} else {
10961 		sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
10962 	}
10963 
10964 
10965 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
10966 
10967 	/*
10968 	 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
10969 	 * two (2MB) depending on the address space layout. For 16KB pages, each level
10970 	 * one entry is 64GB, so we must go to the second level entry (32MB) in order
10971 	 * to "nest".
10972 	 *
10973 	 * Note: This is not "nesting" in the shared cache sense. This definition of
10974 	 * nesting just means inserting pointers to pre-allocated tables inside of
10975 	 * the passed in pmap to allow us to share page tables (which map the shared
10976 	 * page) for every task. This saves at least one page of memory per process
10977 	 * compared to creating new page tables in every process for mapping the
10978 	 * shared page.
10979 	 */
10980 
10981 	/**
10982 	 * Allocate the twig page tables if needed, and slam a pointer to the shared
10983 	 * page's tables into place.
10984 	 */
10985 	while ((ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr)) == TT_ENTRY_NULL) {
10986 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
10987 
10988 		kr = pmap_expand(pmap, sharedpage_vaddr, options, sharedpage_level);
10989 
10990 		if (kr != KERN_SUCCESS) {
10991 #if XNU_MONITOR
10992 			if (kr == KERN_RESOURCE_SHORTAGE) {
10993 				return kr;
10994 			} else
10995 #endif
10996 			{
10997 				panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
10998 			}
10999 		}
11000 
11001 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
11002 	}
11003 
11004 	if (*ttep != ARM_PTE_EMPTY) {
11005 		panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
11006 	}
11007 
11008 	src_ttep = pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr);
11009 
11010 	*ttep = *src_ttep;
11011 	FLUSH_PTE_STRONG();
11012 
11013 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
11014 
11015 	return kr;
11016 }
11017 
11018 static void
11019 pmap_unmap_sharedpage(
11020 	pmap_t pmap)
11021 {
11022 	pt_entry_t *ttep;
11023 	vm_offset_t sharedpage_vaddr;
11024 	pmap_t sharedpage_pmap = sharedpage_pmap_default;
11025 
11026 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11027 	const unsigned int sharedpage_level = pt_attr_commpage_level(pt_attr);
11028 
11029 #if __ARM_MIXED_PAGE_SIZE__
11030 #if !__ARM_16K_PG__
11031 	/* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
11032 	#error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
11033 #endif /* !__ARM_16K_PG__ */
11034 
11035 	/* Choose the correct shared page pmap to use. */
11036 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
11037 	if (pmap_page_size == 16384) {
11038 		sharedpage_pmap = sharedpage_pmap_default;
11039 	} else if (pmap_page_size == 4096) {
11040 		sharedpage_pmap = sharedpage_pmap_4k;
11041 	} else {
11042 		panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
11043 	}
11044 #endif /* __ARM_MIXED_PAGE_SIZE__ */
11045 
11046 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
11047 #error We assume a single page.
11048 #endif
11049 
11050 	if (pmap_is_64bit(pmap)) {
11051 		sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
11052 	} else {
11053 		sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
11054 	}
11055 
11056 
11057 	ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr);
11058 
11059 	if (ttep == NULL) {
11060 		return;
11061 	}
11062 
11063 	/* It had better be mapped to the shared page. */
11064 	if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr)) {
11065 		panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
11066 	}
11067 
11068 	*ttep = ARM_TTE_EMPTY;
11069 	FLUSH_PTE_STRONG();
11070 
11071 	flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, pmap, false);
11072 	sync_tlb_flush();
11073 }
11074 
11075 void
11076 pmap_insert_sharedpage(
11077 	pmap_t pmap)
11078 {
11079 #if XNU_MONITOR
11080 	kern_return_t kr = KERN_FAILURE;
11081 
11082 	while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
11083 		pmap_alloc_page_for_ppl(0);
11084 	}
11085 
11086 	pmap_ledger_check_balance(pmap);
11087 
11088 	if (kr != KERN_SUCCESS) {
11089 		panic("%s: failed to insert the shared page, kr=%d, "
11090 		    "pmap=%p",
11091 		    __FUNCTION__, kr,
11092 		    pmap);
11093 	}
11094 #else
11095 	pmap_insert_sharedpage_internal(pmap);
11096 #endif
11097 }
11098 
11099 static boolean_t
11100 pmap_is_64bit(
11101 	pmap_t pmap)
11102 {
11103 	return pmap->is_64bit;
11104 }
11105 
11106 bool
11107 pmap_is_exotic(
11108 	pmap_t pmap __unused)
11109 {
11110 	return false;
11111 }
11112 
11113 
11114 /* ARMTODO -- an implementation that accounts for
11115  * holes in the physical map, if any.
11116  */
11117 boolean_t
11118 pmap_valid_page(
11119 	ppnum_t pn)
11120 {
11121 	return pa_valid(ptoa(pn));
11122 }
11123 
11124 boolean_t
11125 pmap_bootloader_page(
11126 	ppnum_t pn)
11127 {
11128 	pmap_paddr_t paddr = ptoa(pn);
11129 
11130 	if (pa_valid(paddr)) {
11131 		return FALSE;
11132 	}
11133 	pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
11134 	return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
11135 }
11136 
11137 MARK_AS_PMAP_TEXT boolean_t
11138 pmap_is_empty_internal(
11139 	pmap_t pmap,
11140 	vm_map_offset_t va_start,
11141 	vm_map_offset_t va_end)
11142 {
11143 	vm_map_offset_t block_start, block_end;
11144 	tt_entry_t *tte_p;
11145 
11146 	if (pmap == NULL) {
11147 		return TRUE;
11148 	}
11149 
11150 	validate_pmap(pmap);
11151 
11152 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11153 	unsigned int initial_not_in_kdp = not_in_kdp;
11154 
11155 	if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11156 		pmap_lock(pmap, PMAP_LOCK_SHARED);
11157 	}
11158 
11159 
11160 	/* TODO: This will be faster if we increment ttep at each level. */
11161 	block_start = va_start;
11162 
11163 	while (block_start < va_end) {
11164 		pt_entry_t     *bpte_p, *epte_p;
11165 		pt_entry_t     *pte_p;
11166 
11167 		block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
11168 		if (block_end > va_end) {
11169 			block_end = va_end;
11170 		}
11171 
11172 		tte_p = pmap_tte(pmap, block_start);
11173 		if ((tte_p != PT_ENTRY_NULL)
11174 		    && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
11175 			pte_p = (pt_entry_t *) ttetokv(*tte_p);
11176 			bpte_p = &pte_p[pte_index(pt_attr, block_start)];
11177 			epte_p = &pte_p[pte_index(pt_attr, block_end)];
11178 
11179 			for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
11180 				if (*pte_p != ARM_PTE_EMPTY) {
11181 					if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11182 						pmap_unlock(pmap, PMAP_LOCK_SHARED);
11183 					}
11184 					return FALSE;
11185 				}
11186 			}
11187 		}
11188 		block_start = block_end;
11189 	}
11190 
11191 	if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11192 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
11193 	}
11194 
11195 	return TRUE;
11196 }
11197 
11198 boolean_t
11199 pmap_is_empty(
11200 	pmap_t pmap,
11201 	vm_map_offset_t va_start,
11202 	vm_map_offset_t va_end)
11203 {
11204 #if XNU_MONITOR
11205 	return pmap_is_empty_ppl(pmap, va_start, va_end);
11206 #else
11207 	return pmap_is_empty_internal(pmap, va_start, va_end);
11208 #endif
11209 }
11210 
11211 vm_map_offset_t
11212 pmap_max_offset(
11213 	boolean_t               is64,
11214 	unsigned int    option)
11215 {
11216 	return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
11217 }
11218 
11219 vm_map_offset_t
11220 pmap_max_64bit_offset(
11221 	__unused unsigned int option)
11222 {
11223 	vm_map_offset_t max_offset_ret = 0;
11224 
11225 #if defined(__arm64__)
11226 	#define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
11227 	_Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
11228 	    "Minimum address space size outside allowable range");
11229 	const vm_map_offset_t min_max_offset = ARM64_MIN_MAX_ADDRESS; // end of shared region + 512MB for various purposes
11230 	if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
11231 		max_offset_ret = arm64_pmap_max_offset_default;
11232 	} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
11233 		max_offset_ret = min_max_offset;
11234 	} else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
11235 		max_offset_ret = MACH_VM_MAX_ADDRESS;
11236 	} else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
11237 		if (arm64_pmap_max_offset_default) {
11238 			max_offset_ret = arm64_pmap_max_offset_default;
11239 		} else if (max_mem > 0xC0000000) {
11240 			max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
11241 		} else if (max_mem > 0x40000000) {
11242 			max_offset_ret = min_max_offset + 0x38000000;  // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
11243 		} else {
11244 			max_offset_ret = min_max_offset;
11245 		}
11246 	} else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
11247 		if (arm64_pmap_max_offset_default) {
11248 			// Allow the boot-arg to override jumbo size
11249 			max_offset_ret = arm64_pmap_max_offset_default;
11250 		} else {
11251 			max_offset_ret = MACH_VM_MAX_ADDRESS;     // Max offset is 64GB for pmaps with special "jumbo" blessing
11252 		}
11253 	} else {
11254 		panic("pmap_max_64bit_offset illegal option 0x%x", option);
11255 	}
11256 
11257 	assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
11258 	assert(max_offset_ret >= min_max_offset);
11259 #else
11260 	panic("Can't run pmap_max_64bit_offset on non-64bit architectures");
11261 #endif
11262 
11263 	return max_offset_ret;
11264 }
11265 
11266 vm_map_offset_t
11267 pmap_max_32bit_offset(
11268 	unsigned int option)
11269 {
11270 	vm_map_offset_t max_offset_ret = 0;
11271 
11272 	if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
11273 		max_offset_ret = arm_pmap_max_offset_default;
11274 	} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
11275 		max_offset_ret = VM_MAX_ADDRESS;
11276 	} else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
11277 		max_offset_ret = VM_MAX_ADDRESS;
11278 	} else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
11279 		if (arm_pmap_max_offset_default) {
11280 			max_offset_ret = arm_pmap_max_offset_default;
11281 		} else if (max_mem > 0x20000000) {
11282 			max_offset_ret = VM_MAX_ADDRESS;
11283 		} else {
11284 			max_offset_ret = VM_MAX_ADDRESS;
11285 		}
11286 	} else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
11287 		max_offset_ret = VM_MAX_ADDRESS;
11288 	} else {
11289 		panic("pmap_max_32bit_offset illegal option 0x%x", option);
11290 	}
11291 
11292 	assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
11293 	return max_offset_ret;
11294 }
11295 
11296 #if CONFIG_DTRACE
11297 /*
11298  * Constrain DTrace copyin/copyout actions
11299  */
11300 extern kern_return_t dtrace_copyio_preflight(addr64_t);
11301 extern kern_return_t dtrace_copyio_postflight(addr64_t);
11302 
11303 kern_return_t
11304 dtrace_copyio_preflight(
11305 	__unused addr64_t va)
11306 {
11307 	if (current_map() == kernel_map) {
11308 		return KERN_FAILURE;
11309 	} else {
11310 		return KERN_SUCCESS;
11311 	}
11312 }
11313 
11314 kern_return_t
11315 dtrace_copyio_postflight(
11316 	__unused addr64_t va)
11317 {
11318 	return KERN_SUCCESS;
11319 }
11320 #endif /* CONFIG_DTRACE */
11321 
11322 
11323 void
11324 pmap_flush_context_init(__unused pmap_flush_context *pfc)
11325 {
11326 }
11327 
11328 
11329 void
11330 pmap_flush(
11331 	__unused pmap_flush_context *cpus_to_flush)
11332 {
11333 	/* not implemented yet */
11334 	return;
11335 }
11336 
11337 #if XNU_MONITOR
11338 
11339 /*
11340  * Enforce that the address range described by kva and nbytes is not currently
11341  * PPL-owned, and won't become PPL-owned while pinned.  This is to prevent
11342  * unintentionally writing to PPL-owned memory.
11343  */
11344 void
11345 pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
11346 {
11347 	vm_offset_t end;
11348 	if (os_add_overflow(kva, nbytes, &end)) {
11349 		panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
11350 	}
11351 	for (vm_offset_t ckva = trunc_page(kva); ckva < end; ckva = round_page(ckva + 1)) {
11352 		pmap_paddr_t pa = kvtophys_nofail(ckva);
11353 		pp_attr_t attr;
11354 		unsigned int pai = pa_index(pa);
11355 		if (ckva == phystokv(pa)) {
11356 			panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
11357 		}
11358 		do {
11359 			attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
11360 			if (attr & PP_ATTR_MONITOR) {
11361 				panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
11362 			}
11363 		} while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
11364 	}
11365 }
11366 
11367 void
11368 pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
11369 {
11370 	vm_offset_t end;
11371 	if (os_add_overflow(kva, nbytes, &end)) {
11372 		panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
11373 	}
11374 	for (vm_offset_t ckva = trunc_page(kva); ckva < end; ckva = round_page(ckva + 1)) {
11375 		pmap_paddr_t pa = kvtophys_nofail(ckva);
11376 
11377 		if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
11378 			panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
11379 		}
11380 		assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
11381 		ppattr_pa_clear_no_monitor(pa);
11382 	}
11383 }
11384 
11385 /**
11386  * Lock down a page, making all mappings read-only, and preventing further
11387  * mappings or removal of this particular kva's mapping. Effectively, it makes
11388  * the physical page at kva immutable (see the ppl_writable parameter for an
11389  * exception to this).
11390  *
11391  * @param kva Valid address to any mapping of the physical page to lockdown.
11392  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11393  * @param ppl_writable True if the PPL should still be able to write to the page
11394  *                     using the physical aperture mapping. False will make the
11395  *                     page read-only for both the kernel and PPL in the
11396  *                     physical aperture.
11397  */
11398 
11399 MARK_AS_PMAP_TEXT static void
11400 pmap_ppl_lockdown_page(vm_address_t kva, uint64_t lockdown_flag, bool ppl_writable)
11401 {
11402 	pmap_ppl_lockdown_page_with_prot(kva, lockdown_flag, ppl_writable, VM_PROT_READ);
11403 }
11404 
11405 /**
11406  * Lock down a page, giving all mappings the specified maximum permissions, and
11407  * preventing further mappings or removal of this particular kva's mapping.
11408  * Effectively, it makes the physical page at kva immutable (see the ppl_writable
11409  * parameter for an exception to this).
11410  *
11411  * @param kva Valid address to any mapping of the physical page to lockdown.
11412  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11413  * @param ppl_writable True if the PPL should still be able to write to the page
11414  *                     using the physical aperture mapping. False will make the
11415  *                     page read-only for both the kernel and PPL in the
11416  *                     physical aperture.
11417  * @param prot Maximum permissions to allow in existing alias mappings
11418  */
11419 MARK_AS_PMAP_TEXT static void
11420 pmap_ppl_lockdown_page_with_prot(vm_address_t kva, uint64_t lockdown_flag, bool ppl_writable, vm_prot_t prot)
11421 {
11422 	const pmap_paddr_t pa = kvtophys_nofail(kva);
11423 	const unsigned int pai = pa_index(pa);
11424 
11425 	assert(lockdown_flag & PVH_FLAG_LOCKDOWN_MASK);
11426 	pvh_lock(pai);
11427 	pv_entry_t **pvh = pai_to_pvh(pai);
11428 	const vm_offset_t pvh_flags = pvh_get_flags(pvh);
11429 
11430 	if (__improbable(ppattr_pa_test_monitor(pa))) {
11431 		panic("%s: %#lx (page %llx) belongs to PPL", __func__, kva, pa);
11432 	}
11433 
11434 	if (__improbable(pvh_flags & (PVH_FLAG_LOCKDOWN_MASK | PVH_FLAG_EXEC))) {
11435 		panic("%s: %#lx already locked down/executable (%#llx)",
11436 		    __func__, kva, (uint64_t)pvh_flags);
11437 	}
11438 
11439 
11440 	pvh_set_flags(pvh, pvh_flags | lockdown_flag);
11441 
11442 	/* Update the physical aperture mapping to prevent kernel write access. */
11443 	const unsigned int new_xprr_perm =
11444 	    (ppl_writable) ? XPRR_PPL_RW_PERM : XPRR_KERN_RO_PERM;
11445 	pmap_set_xprr_perm(pai, XPRR_KERN_RW_PERM, new_xprr_perm);
11446 
11447 	pvh_unlock(pai);
11448 
11449 	pmap_page_protect_options_internal((ppnum_t)atop(pa), prot, 0, NULL);
11450 
11451 	/**
11452 	 * Double-check that the mapping didn't change physical addresses before the
11453 	 * LOCKDOWN flag was set (there is a brief window between the above
11454 	 * kvtophys() and pvh_lock() calls where the mapping could have changed).
11455 	 *
11456 	 * This doesn't solve the ABA problem, but this doesn't have to since once
11457 	 * the pvh_lock() is grabbed no new mappings can be created on this physical
11458 	 * page without the LOCKDOWN flag already set (so any future mappings can
11459 	 * only be RO, and no existing mappings can be removed).
11460 	 */
11461 	if (kvtophys_nofail(kva) != pa) {
11462 		panic("%s: Physical address of mapping changed while setting LOCKDOWN "
11463 		    "flag %#lx %#llx", __func__, kva, (uint64_t)pa);
11464 	}
11465 }
11466 
11467 /**
11468  * Helper for releasing a page from being locked down to the PPL, making it writable to the
11469  * kernel once again.
11470  *
11471  * @note This must be paired with a pmap_ppl_lockdown_page() call. Any attempts
11472  *       to unlockdown a page that was never locked down, will panic.
11473  *
11474  * @param pai physical page index to release from lockdown.  PVH lock for this page must be held.
11475  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11476  * @param ppl_writable This must match whatever `ppl_writable` parameter was
11477  *                     passed to the paired pmap_ppl_lockdown_page() call. Any
11478  *                     deviation will result in a panic.
11479  */
11480 MARK_AS_PMAP_TEXT static void
11481 pmap_ppl_unlockdown_page_locked(unsigned int pai, uint64_t lockdown_flag, bool ppl_writable)
11482 {
11483 	pvh_assert_locked(pai);
11484 	pv_entry_t **pvh = pai_to_pvh(pai);
11485 	const vm_offset_t pvh_flags = pvh_get_flags(pvh);
11486 
11487 	if (__improbable(!(pvh_flags & lockdown_flag))) {
11488 		panic("%s: unlockdown attempt on not locked down pai %d, type=0x%llx, PVH flags=0x%llx",
11489 		    __func__, pai, (unsigned long long)lockdown_flag, (unsigned long long)pvh_flags);
11490 	}
11491 
11492 
11493 	pvh_set_flags(pvh, pvh_flags & ~lockdown_flag);
11494 
11495 	/* Restore the pre-lockdown physical aperture mapping permissions. */
11496 	const unsigned int old_xprr_perm =
11497 	    (ppl_writable) ? XPRR_PPL_RW_PERM : XPRR_KERN_RO_PERM;
11498 	pmap_set_xprr_perm(pai, old_xprr_perm, XPRR_KERN_RW_PERM);
11499 }
11500 
11501 /**
11502  * Release a page from being locked down to the PPL, making it writable to the
11503  * kernel once again.
11504  *
11505  * @note This must be paired with a pmap_ppl_lockdown_page() call. Any attempts
11506  *       to unlockdown a page that was never locked down, will panic.
11507  *
11508  * @param kva Valid address to any mapping of the physical page to unlockdown.
11509  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11510  * @param ppl_writable This must match whatever `ppl_writable` parameter was
11511  *                     passed to the paired pmap_ppl_lockdown_page() call. Any
11512  *                     deviation will result in a panic.
11513  */
11514 MARK_AS_PMAP_TEXT static void
11515 pmap_ppl_unlockdown_page(vm_address_t kva, uint64_t lockdown_flag, bool ppl_writable)
11516 {
11517 	const pmap_paddr_t pa = kvtophys_nofail(kva);
11518 	const unsigned int pai = pa_index(pa);
11519 
11520 	assert(lockdown_flag & PVH_FLAG_LOCKDOWN_MASK);
11521 	pvh_lock(pai);
11522 	pmap_ppl_unlockdown_page_locked(pai, lockdown_flag, ppl_writable);
11523 	pvh_unlock(pai);
11524 }
11525 
11526 #else /* XNU_MONITOR */
11527 
11528 void __unused
11529 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
11530 {
11531 }
11532 
11533 void __unused
11534 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
11535 {
11536 }
11537 
11538 #endif /* !XNU_MONITOR */
11539 
11540 
11541 MARK_AS_PMAP_TEXT static inline void
11542 pmap_cs_lockdown_pages(vm_address_t kva, vm_size_t size, bool ppl_writable)
11543 {
11544 #if XNU_MONITOR
11545 	pmap_ppl_lockdown_pages(kva, size, PVH_FLAG_LOCKDOWN_CS, ppl_writable);
11546 #else
11547 	pmap_ppl_lockdown_pages(kva, size, 0, ppl_writable);
11548 #endif
11549 }
11550 
11551 MARK_AS_PMAP_TEXT static inline void
11552 pmap_cs_unlockdown_pages(vm_address_t kva, vm_size_t size, bool ppl_writable)
11553 {
11554 #if XNU_MONITOR
11555 	pmap_ppl_unlockdown_pages(kva, size, PVH_FLAG_LOCKDOWN_CS, ppl_writable);
11556 #else
11557 	pmap_ppl_unlockdown_pages(kva, size, 0, ppl_writable);
11558 #endif
11559 }
11560 
11561 /**
11562  * Perform basic validation checks on the destination only and
11563  * corresponding offset/sizes prior to writing to a read only allocation.
11564  *
11565  * @note Should be called before writing to an allocation from the read
11566  * only allocator.
11567  *
11568  * @param zid The ID of the zone the allocation belongs to.
11569  * @param va VA of element being modified (destination).
11570  * @param offset Offset being written to, in the element.
11571  * @param new_data_size Size of modification.
11572  *
11573  */
11574 
11575 MARK_AS_PMAP_TEXT static void
11576 pmap_ro_zone_validate_element_dst(
11577 	zone_id_t           zid,
11578 	vm_offset_t         va,
11579 	vm_offset_t         offset,
11580 	vm_size_t           new_data_size)
11581 {
11582 	if (__improbable((zid < ZONE_ID__FIRST_RO) || (zid > ZONE_ID__LAST_RO))) {
11583 		panic("%s: ZoneID %u outside RO range %u - %u", __func__, zid,
11584 		    ZONE_ID__FIRST_RO, ZONE_ID__LAST_RO);
11585 	}
11586 
11587 	vm_size_t elem_size = zone_ro_size_params[zid].z_elem_size;
11588 
11589 	/* Check element is from correct zone and properly aligned */
11590 	zone_require_ro(zid, elem_size, (void*)va);
11591 
11592 	if (__improbable(new_data_size > (elem_size - offset))) {
11593 		panic("%s: New data size %lu too large for elem size %lu at addr %p",
11594 		    __func__, (uintptr_t)new_data_size, (uintptr_t)elem_size, (void*)va);
11595 	}
11596 	if (__improbable(offset >= elem_size)) {
11597 		panic("%s: Offset %lu too large for elem size %lu at addr %p",
11598 		    __func__, (uintptr_t)offset, (uintptr_t)elem_size, (void*)va);
11599 	}
11600 }
11601 
11602 
11603 /**
11604  * Perform basic validation checks on the source, destination and
11605  * corresponding offset/sizes prior to writing to a read only allocation.
11606  *
11607  * @note Should be called before writing to an allocation from the read
11608  * only allocator.
11609  *
11610  * @param zid The ID of the zone the allocation belongs to.
11611  * @param va VA of element being modified (destination).
11612  * @param offset Offset being written to, in the element.
11613  * @param new_data Pointer to new data (source).
11614  * @param new_data_size Size of modification.
11615  *
11616  */
11617 
11618 MARK_AS_PMAP_TEXT static void
11619 pmap_ro_zone_validate_element(
11620 	zone_id_t           zid,
11621 	vm_offset_t         va,
11622 	vm_offset_t         offset,
11623 	const vm_offset_t   new_data,
11624 	vm_size_t           new_data_size)
11625 {
11626 	vm_offset_t sum = 0;
11627 
11628 	if (__improbable(os_add_overflow(new_data, new_data_size, &sum))) {
11629 		panic("%s: Integer addition overflow %p + %lu = %lu",
11630 		    __func__, (void*)new_data, (uintptr_t)new_data_size, (uintptr_t)sum);
11631 	}
11632 
11633 	pmap_ro_zone_validate_element_dst(zid, va, offset, new_data_size);
11634 }
11635 
11636 /**
11637  * Ensure that physical page is locked down and pinned, before writing to it.
11638  *
11639  * @note Should be called before writing to an allocation from the read
11640  * only allocator. This function pairs with pmap_ro_zone_unlock_phy_page,
11641  * ensure that it is called after the modification.
11642  *
11643  *
11644  * @param pa Physical address of the element being modified.
11645  * @param va Virtual address of element being modified.
11646  * @param size Size of the modification.
11647  *
11648  */
11649 
11650 MARK_AS_PMAP_TEXT static void
11651 pmap_ro_zone_lock_phy_page(
11652 	const pmap_paddr_t  pa,
11653 	vm_offset_t         va,
11654 	vm_size_t           size)
11655 {
11656 	const unsigned int pai = pa_index(pa);
11657 	pvh_lock(pai);
11658 
11659 	/* Ensure that the physical page is locked down */
11660 #if XNU_MONITOR
11661 	pv_entry_t **pvh = pai_to_pvh(pai);
11662 	if (!(pvh_get_flags(pvh) & PVH_FLAG_LOCKDOWN_RO)) {
11663 		panic("%s: Physical page not locked down %llx", __func__, pa);
11664 	}
11665 #endif /* XNU_MONITOR */
11666 
11667 	/* Ensure page can't become PPL-owned memory before the memcpy occurs */
11668 	pmap_pin_kernel_pages(va, size);
11669 }
11670 
11671 /**
11672  * Unlock and unpin physical page after writing to it.
11673  *
11674  * @note Should be called after writing to an allocation from the read
11675  * only allocator. This function pairs with pmap_ro_zone_lock_phy_page,
11676  * ensure that it has been called prior to the modification.
11677  *
11678  * @param pa Physical address of the element that was modified.
11679  * @param va Virtual address of element that was modified.
11680  * @param size Size of the modification.
11681  *
11682  */
11683 
11684 MARK_AS_PMAP_TEXT static void
11685 pmap_ro_zone_unlock_phy_page(
11686 	const pmap_paddr_t  pa,
11687 	vm_offset_t         va,
11688 	vm_size_t           size)
11689 {
11690 	const unsigned int pai = pa_index(pa);
11691 	pmap_unpin_kernel_pages(va, size);
11692 	pvh_unlock(pai);
11693 }
11694 
11695 /**
11696  * Function to copy kauth_cred from new_data to kv.
11697  * Function defined in "kern_prot.c"
11698  *
11699  * @note Will be removed upon completion of
11700  * <rdar://problem/72635194> Compiler PAC support for memcpy.
11701  *
11702  * @param kv Address to copy new data to.
11703  * @param new_data Pointer to new data.
11704  *
11705  */
11706 
11707 extern void
11708 kauth_cred_copy(const uintptr_t kv, const uintptr_t new_data);
11709 
11710 /**
11711  * Zalloc-specific memcpy that writes through the physical aperture
11712  * and ensures the element being modified is from a read-only zone.
11713  *
11714  * @note Designed to work only with the zone allocator's read-only submap.
11715  *
11716  * @param zid The ID of the zone to allocate from.
11717  * @param va VA of element to be modified.
11718  * @param offset Offset from element.
11719  * @param new_data Pointer to new data.
11720  * @param new_data_size	Size of modification.
11721  *
11722  */
11723 
11724 void
11725 pmap_ro_zone_memcpy(
11726 	zone_id_t           zid,
11727 	vm_offset_t         va,
11728 	vm_offset_t         offset,
11729 	const vm_offset_t   new_data,
11730 	vm_size_t           new_data_size)
11731 {
11732 #if XNU_MONITOR
11733 	pmap_ro_zone_memcpy_ppl(zid, va, offset, new_data, new_data_size);
11734 #else /* XNU_MONITOR */
11735 	pmap_ro_zone_memcpy_internal(zid, va, offset, new_data, new_data_size);
11736 #endif /* XNU_MONITOR */
11737 }
11738 
11739 MARK_AS_PMAP_TEXT void
11740 pmap_ro_zone_memcpy_internal(
11741 	zone_id_t             zid,
11742 	vm_offset_t           va,
11743 	vm_offset_t           offset,
11744 	const vm_offset_t     new_data,
11745 	vm_size_t             new_data_size)
11746 {
11747 	const pmap_paddr_t pa = kvtophys_nofail(va + offset);
11748 
11749 	if (!new_data || new_data_size == 0) {
11750 		return;
11751 	}
11752 
11753 	pmap_ro_zone_validate_element(zid, va, offset, new_data, new_data_size);
11754 	pmap_ro_zone_lock_phy_page(pa, va, new_data_size);
11755 	memcpy((void*)phystokv(pa), (void*)new_data, new_data_size);
11756 	pmap_ro_zone_unlock_phy_page(pa, va, new_data_size);
11757 }
11758 
11759 /**
11760  * Zalloc-specific function to atomically mutate fields of an element that
11761  * belongs to a read-only zone, via the physcial aperture.
11762  *
11763  * @note Designed to work only with the zone allocator's read-only submap.
11764  *
11765  * @param zid The ID of the zone the element belongs to.
11766  * @param va VA of element to be modified.
11767  * @param offset Offset in element.
11768  * @param op Atomic operation to perform.
11769  * @param value	Mutation value.
11770  *
11771  */
11772 
11773 uint64_t
11774 pmap_ro_zone_atomic_op(
11775 	zone_id_t             zid,
11776 	vm_offset_t           va,
11777 	vm_offset_t           offset,
11778 	zro_atomic_op_t       op,
11779 	uint64_t              value)
11780 {
11781 #if XNU_MONITOR
11782 	return pmap_ro_zone_atomic_op_ppl(zid, va, offset, op, value);
11783 #else /* XNU_MONITOR */
11784 	return pmap_ro_zone_atomic_op_internal(zid, va, offset, op, value);
11785 #endif /* XNU_MONITOR */
11786 }
11787 
11788 MARK_AS_PMAP_TEXT uint64_t
11789 pmap_ro_zone_atomic_op_internal(
11790 	zone_id_t             zid,
11791 	vm_offset_t           va,
11792 	vm_offset_t           offset,
11793 	zro_atomic_op_t       op,
11794 	uint64_t              value)
11795 {
11796 	const pmap_paddr_t pa = kvtophys_nofail(va + offset);
11797 	vm_size_t value_size = op & 0xf;
11798 
11799 	pmap_ro_zone_validate_element_dst(zid, va, offset, value_size);
11800 	pmap_ro_zone_lock_phy_page(pa, va, value_size);
11801 	value = __zalloc_ro_mut_atomic(phystokv(pa), op, value);
11802 	pmap_ro_zone_unlock_phy_page(pa, va, value_size);
11803 
11804 	return value;
11805 }
11806 
11807 /**
11808  * bzero for allocations from read only zones, that writes through the
11809  * physical aperture.
11810  *
11811  * @note This is called by the zfree path of all allocations from read
11812  * only zones.
11813  *
11814  * @param zid The ID of the zone the allocation belongs to.
11815  * @param va VA of element to be zeroed.
11816  * @param offset Offset in the element.
11817  * @param size	Size of allocation.
11818  *
11819  */
11820 
11821 void
11822 pmap_ro_zone_bzero(
11823 	zone_id_t       zid,
11824 	vm_offset_t     va,
11825 	vm_offset_t     offset,
11826 	vm_size_t       size)
11827 {
11828 #if XNU_MONITOR
11829 	pmap_ro_zone_bzero_ppl(zid, va, offset, size);
11830 #else /* XNU_MONITOR */
11831 	pmap_ro_zone_bzero_internal(zid, va, offset, size);
11832 #endif /* XNU_MONITOR */
11833 }
11834 
11835 MARK_AS_PMAP_TEXT void
11836 pmap_ro_zone_bzero_internal(
11837 	zone_id_t       zid,
11838 	vm_offset_t     va,
11839 	vm_offset_t     offset,
11840 	vm_size_t       size)
11841 {
11842 	const pmap_paddr_t pa = kvtophys_nofail(va + offset);
11843 	pmap_ro_zone_validate_element(zid, va, offset, 0, size);
11844 	pmap_ro_zone_lock_phy_page(pa, va, size);
11845 	bzero((void*)phystokv(pa), size);
11846 	pmap_ro_zone_unlock_phy_page(pa, va, size);
11847 }
11848 
11849 /**
11850  * Removes write access from the Physical Aperture.
11851  *
11852  * @note For non-PPL devices, it simply makes all virtual mappings RO.
11853  * @note Designed to work only with the zone allocator's read-only submap.
11854  *
11855  * @param va VA of the page to restore write access to.
11856  *
11857  */
11858 MARK_AS_PMAP_TEXT static void
11859 pmap_phys_write_disable(vm_address_t va)
11860 {
11861 #if XNU_MONITOR
11862 	pmap_ppl_lockdown_page(va, PVH_FLAG_LOCKDOWN_RO, true);
11863 #else /* XNU_MONITOR */
11864 	pmap_page_protect(atop_kernel(kvtophys(va)), VM_PROT_READ);
11865 #endif /* XNU_MONITOR */
11866 }
11867 
11868 #define PMAP_RESIDENT_INVALID   ((mach_vm_size_t)-1)
11869 
11870 MARK_AS_PMAP_TEXT mach_vm_size_t
11871 pmap_query_resident_internal(
11872 	pmap_t                  pmap,
11873 	vm_map_address_t        start,
11874 	vm_map_address_t        end,
11875 	mach_vm_size_t          *compressed_bytes_p)
11876 {
11877 	mach_vm_size_t  resident_bytes = 0;
11878 	mach_vm_size_t  compressed_bytes = 0;
11879 
11880 	pt_entry_t     *bpte, *epte;
11881 	pt_entry_t     *pte_p;
11882 	tt_entry_t     *tte_p;
11883 
11884 	if (pmap == NULL) {
11885 		return PMAP_RESIDENT_INVALID;
11886 	}
11887 
11888 	validate_pmap(pmap);
11889 
11890 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11891 
11892 	/* Ensure that this request is valid, and addresses exactly one TTE. */
11893 	if (__improbable((start % pt_attr_page_size(pt_attr)) ||
11894 	    (end % pt_attr_page_size(pt_attr)))) {
11895 		panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__, (void*)start, (void*)end, pt_attr_page_size(pt_attr));
11896 	}
11897 
11898 	if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
11899 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
11900 	}
11901 
11902 	pmap_lock(pmap, PMAP_LOCK_SHARED);
11903 	tte_p = pmap_tte(pmap, start);
11904 	if (tte_p == (tt_entry_t *) NULL) {
11905 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
11906 		return PMAP_RESIDENT_INVALID;
11907 	}
11908 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
11909 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
11910 		bpte = &pte_p[pte_index(pt_attr, start)];
11911 		epte = &pte_p[pte_index(pt_attr, end)];
11912 
11913 		for (; bpte < epte; bpte++) {
11914 			if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
11915 				compressed_bytes += pt_attr_page_size(pt_attr);
11916 			} else if (pa_valid(pte_to_pa(*bpte))) {
11917 				resident_bytes += pt_attr_page_size(pt_attr);
11918 			}
11919 		}
11920 	}
11921 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
11922 
11923 	if (compressed_bytes_p) {
11924 		pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
11925 		*compressed_bytes_p += compressed_bytes;
11926 		pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
11927 	}
11928 
11929 	return resident_bytes;
11930 }
11931 
11932 mach_vm_size_t
11933 pmap_query_resident(
11934 	pmap_t                  pmap,
11935 	vm_map_address_t        start,
11936 	vm_map_address_t        end,
11937 	mach_vm_size_t          *compressed_bytes_p)
11938 {
11939 	mach_vm_size_t          total_resident_bytes;
11940 	mach_vm_size_t          compressed_bytes;
11941 	vm_map_address_t        va;
11942 
11943 
11944 	if (pmap == PMAP_NULL) {
11945 		if (compressed_bytes_p) {
11946 			*compressed_bytes_p = 0;
11947 		}
11948 		return 0;
11949 	}
11950 
11951 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11952 
11953 	total_resident_bytes = 0;
11954 	compressed_bytes = 0;
11955 
11956 	PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
11957 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
11958 	    VM_KERNEL_ADDRHIDE(end));
11959 
11960 	va = start;
11961 	while (va < end) {
11962 		vm_map_address_t l;
11963 		mach_vm_size_t resident_bytes;
11964 
11965 		l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
11966 
11967 		if (l > end) {
11968 			l = end;
11969 		}
11970 #if XNU_MONITOR
11971 		resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
11972 #else
11973 		resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
11974 #endif
11975 		if (resident_bytes == PMAP_RESIDENT_INVALID) {
11976 			break;
11977 		}
11978 
11979 		total_resident_bytes += resident_bytes;
11980 
11981 		va = l;
11982 	}
11983 
11984 	if (compressed_bytes_p) {
11985 		*compressed_bytes_p = compressed_bytes;
11986 	}
11987 
11988 	PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
11989 	    total_resident_bytes);
11990 
11991 	return total_resident_bytes;
11992 }
11993 
11994 #if MACH_ASSERT
11995 static void
11996 pmap_check_ledgers(
11997 	pmap_t pmap)
11998 {
11999 	int     pid;
12000 	char    *procname;
12001 
12002 	if (pmap->pmap_pid == 0 || pmap->pmap_pid == -1) {
12003 		/*
12004 		 * This pmap was not or is no longer fully associated
12005 		 * with a task (e.g. the old pmap after a fork()/exec() or
12006 		 * spawn()).  Its "ledger" still points at a task that is
12007 		 * now using a different (and active) address space, so
12008 		 * we can't check that all the pmap ledgers are balanced here.
12009 		 *
12010 		 * If the "pid" is set, that means that we went through
12011 		 * pmap_set_process() in task_terminate_internal(), so
12012 		 * this task's ledger should not have been re-used and
12013 		 * all the pmap ledgers should be back to 0.
12014 		 */
12015 		return;
12016 	}
12017 
12018 	pid = pmap->pmap_pid;
12019 	procname = pmap->pmap_procname;
12020 
12021 	vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
12022 }
12023 #endif /* MACH_ASSERT */
12024 
12025 void
12026 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
12027 {
12028 }
12029 
12030 /**
12031  * The minimum shared region nesting size is used by the VM to determine when to
12032  * break up large mappings to nested regions. The smallest size that these
12033  * mappings can be broken into is determined by what page table level those
12034  * regions are being nested in at and the size of the page tables.
12035  *
12036  * For instance, if a nested region is nesting at L2 for a process utilizing
12037  * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
12038  * block entry).
12039  *
12040  * @param pmap The target pmap to determine the block size based on whether it's
12041  *             using 16KB or 4KB page tables.
12042  */
12043 uint64_t
12044 pmap_shared_region_size_min(__unused pmap_t pmap)
12045 {
12046 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12047 
12048 	/**
12049 	 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
12050 	 * 4KB pages). This means that a target pmap will contain L2 entries that
12051 	 * point to shared L3 page tables in the shared region pmap.
12052 	 */
12053 	return pt_attr_twig_size(pt_attr);
12054 }
12055 
12056 boolean_t
12057 pmap_enforces_execute_only(
12058 	pmap_t pmap)
12059 {
12060 	return pmap != kernel_pmap;
12061 }
12062 
12063 MARK_AS_PMAP_TEXT void
12064 pmap_set_vm_map_cs_enforced_internal(
12065 	pmap_t pmap,
12066 	bool new_value)
12067 {
12068 	validate_pmap_mutable(pmap);
12069 	pmap->pmap_vm_map_cs_enforced = new_value;
12070 }
12071 
12072 void
12073 pmap_set_vm_map_cs_enforced(
12074 	pmap_t pmap,
12075 	bool new_value)
12076 {
12077 #if XNU_MONITOR
12078 	pmap_set_vm_map_cs_enforced_ppl(pmap, new_value);
12079 #else
12080 	pmap_set_vm_map_cs_enforced_internal(pmap, new_value);
12081 #endif
12082 }
12083 
12084 extern int cs_process_enforcement_enable;
12085 bool
12086 pmap_get_vm_map_cs_enforced(
12087 	pmap_t pmap)
12088 {
12089 	if (cs_process_enforcement_enable) {
12090 		return true;
12091 	}
12092 	return pmap->pmap_vm_map_cs_enforced;
12093 }
12094 
12095 MARK_AS_PMAP_TEXT void
12096 pmap_set_jit_entitled_internal(
12097 	__unused pmap_t pmap)
12098 {
12099 	return;
12100 }
12101 
12102 void
12103 pmap_set_jit_entitled(
12104 	pmap_t pmap)
12105 {
12106 #if XNU_MONITOR
12107 	pmap_set_jit_entitled_ppl(pmap);
12108 #else
12109 	pmap_set_jit_entitled_internal(pmap);
12110 #endif
12111 }
12112 
12113 bool
12114 pmap_get_jit_entitled(
12115 	__unused pmap_t pmap)
12116 {
12117 	return false;
12118 }
12119 
12120 MARK_AS_PMAP_TEXT void
12121 pmap_set_tpro_internal(
12122 	__unused pmap_t pmap)
12123 {
12124 	return;
12125 }
12126 
12127 void
12128 pmap_set_tpro(
12129 	pmap_t pmap)
12130 {
12131 #if XNU_MONITOR
12132 	pmap_set_tpro_ppl(pmap);
12133 #else /* XNU_MONITOR */
12134 	pmap_set_tpro_internal(pmap);
12135 #endif /* XNU_MONITOR */
12136 }
12137 
12138 bool
12139 pmap_get_tpro(
12140 	__unused pmap_t pmap)
12141 {
12142 	return false;
12143 }
12144 
12145 uint64_t pmap_query_page_info_retries MARK_AS_PMAP_DATA;
12146 
12147 MARK_AS_PMAP_TEXT kern_return_t
12148 pmap_query_page_info_internal(
12149 	pmap_t          pmap,
12150 	vm_map_offset_t va,
12151 	int             *disp_p)
12152 {
12153 	pmap_paddr_t    pa;
12154 	int             disp;
12155 	unsigned int    pai;
12156 	pt_entry_t      *pte_p, pte;
12157 	pv_entry_t      **pv_h, *pve_p;
12158 
12159 	if (pmap == PMAP_NULL || pmap == kernel_pmap) {
12160 		pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
12161 		*disp_p = 0;
12162 		pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
12163 		return KERN_INVALID_ARGUMENT;
12164 	}
12165 
12166 	validate_pmap(pmap);
12167 	pmap_lock(pmap, PMAP_LOCK_SHARED);
12168 
12169 try_again:
12170 	disp = 0;
12171 	pte_p = pmap_pte(pmap, va);
12172 	if (pte_p == PT_ENTRY_NULL) {
12173 		goto done;
12174 	}
12175 	pte = *(volatile pt_entry_t*)pte_p;
12176 	pa = pte_to_pa(pte);
12177 	if (pa == 0) {
12178 		if (ARM_PTE_IS_COMPRESSED(pte, pte_p)) {
12179 			disp |= PMAP_QUERY_PAGE_COMPRESSED;
12180 			if (pte & ARM_PTE_COMPRESSED_ALT) {
12181 				disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
12182 			}
12183 		}
12184 	} else {
12185 		disp |= PMAP_QUERY_PAGE_PRESENT;
12186 		pai = pa_index(pa);
12187 		if (!pa_valid(pa)) {
12188 			goto done;
12189 		}
12190 		pvh_lock(pai);
12191 		if (pte != *(volatile pt_entry_t*)pte_p) {
12192 			/* something changed: try again */
12193 			pvh_unlock(pai);
12194 			pmap_query_page_info_retries++;
12195 			goto try_again;
12196 		}
12197 		pv_h = pai_to_pvh(pai);
12198 		pve_p = PV_ENTRY_NULL;
12199 		int pve_ptep_idx = 0;
12200 		if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
12201 			pve_p = pvh_pve_list(pv_h);
12202 			while (pve_p != PV_ENTRY_NULL &&
12203 			    (pve_ptep_idx = pve_find_ptep_index(pve_p, pte_p)) == -1) {
12204 				pve_p = pve_next(pve_p);
12205 			}
12206 		}
12207 
12208 		if (ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx)) {
12209 			disp |= PMAP_QUERY_PAGE_ALTACCT;
12210 		} else if (ppattr_test_reusable(pai)) {
12211 			disp |= PMAP_QUERY_PAGE_REUSABLE;
12212 		} else if (ppattr_pve_is_internal(pai, pve_p, pve_ptep_idx)) {
12213 			disp |= PMAP_QUERY_PAGE_INTERNAL;
12214 		}
12215 		pvh_unlock(pai);
12216 	}
12217 
12218 done:
12219 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
12220 	pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
12221 	*disp_p = disp;
12222 	pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
12223 	return KERN_SUCCESS;
12224 }
12225 
12226 kern_return_t
12227 pmap_query_page_info(
12228 	pmap_t          pmap,
12229 	vm_map_offset_t va,
12230 	int             *disp_p)
12231 {
12232 #if XNU_MONITOR
12233 	return pmap_query_page_info_ppl(pmap, va, disp_p);
12234 #else
12235 	return pmap_query_page_info_internal(pmap, va, disp_p);
12236 #endif
12237 }
12238 
12239 
12240 
12241 static vm_map_size_t
12242 pmap_user_va_size(pmap_t pmap __unused)
12243 {
12244 #if __ARM_MIXED_PAGE_SIZE__
12245 	uint64_t tcr_value = pmap_get_pt_attr(pmap)->pta_tcr_value;
12246 	return 1ULL << (64 - ((tcr_value >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK));
12247 #else
12248 	return 1ULL << (64 - T0SZ_BOOT);
12249 #endif
12250 }
12251 
12252 
12253 
12254 bool
12255 pmap_in_ppl(void)
12256 {
12257 	// Unsupported
12258 	return false;
12259 }
12260 
12261 bool
12262 pmap_has_ppl(void)
12263 {
12264 	// Unsupported
12265 	return false;
12266 }
12267 
12268 __attribute__((__noreturn__))
12269 void
12270 pmap_iofilter_protected_write(__unused vm_address_t addr, __unused uint64_t value, __unused uint64_t width)
12271 {
12272 	panic("%s called on an unsupported platform.", __FUNCTION__);
12273 }
12274 
12275 void *
12276 pmap_claim_reserved_ppl_page(void)
12277 {
12278 	// Unsupported
12279 	return NULL;
12280 }
12281 
12282 void
12283 pmap_free_reserved_ppl_page(void __unused *kva)
12284 {
12285 	// Unsupported
12286 }
12287 
12288 
12289 #if PMAP_CS_PPL_MONITOR
12290 
12291 /* Immutable part of the trust cache runtime */
12292 SECURITY_READ_ONLY_LATE(TrustCacheRuntime_t) ppl_trust_cache_rt;
12293 
12294 /* Mutable part of the trust cache runtime */
12295 MARK_AS_PMAP_DATA TrustCacheMutableRuntime_t ppl_trust_cache_mut_rt;
12296 
12297 /* Lock for the trust cache runtime */
12298 MARK_AS_PMAP_DATA decl_lck_rw_data(, ppl_trust_cache_rt_lock);
12299 
12300 MARK_AS_PMAP_TEXT kern_return_t
12301 pmap_load_trust_cache_with_type_internal(
12302 	TCType_t type,
12303 	const vm_address_t pmap_img4_payload, const vm_size_t pmap_img4_payload_len,
12304 	const vm_address_t img4_manifest, const vm_size_t img4_manifest_len,
12305 	const vm_address_t img4_aux_manifest, const vm_size_t img4_aux_manifest_len)
12306 {
12307 	kern_return_t ret = KERN_DENIED;
12308 	pmap_img4_payload_t *payload = NULL;
12309 	size_t img4_payload_len = 0;
12310 	size_t payload_len_aligned = 0;
12311 	size_t manifest_len_aligned = 0;
12312 
12313 	/* Ignore the auxiliary manifest until we add support for it */
12314 	(void)img4_aux_manifest;
12315 	(void)img4_aux_manifest_len;
12316 
12317 
12318 	/* Image4 interface needs to be available */
12319 	if (img4if == NULL) {
12320 		panic("image4 interface not available");
12321 	}
12322 
12323 	/* AMFI interface needs to be available */
12324 	if (amfi == NULL) {
12325 		panic("amfi interface not available");
12326 	}
12327 
12328 	const TrustCacheInterface_t *interface = &amfi->TrustCache;
12329 	if (interface->version < 1) {
12330 		/* AMFI change hasn't landed in the build */
12331 		pmap_cs_log_error("unable to load trust cache (type: %u): interface not supported", type);
12332 		return KERN_NOT_SUPPORTED;
12333 	}
12334 
12335 #if PMAP_CS_INCLUDE_CODE_SIGNING
12336 	if (pmap_cs) {
12337 		if ((type == kTCTypeStatic) || (type == kTCTypeEngineering) || (type == kTCTypeLegacy)) {
12338 			panic("trust cache type not loadable from interface: %u", type);
12339 		} else if (type >= kTCTypeTotal) {
12340 			panic("attempted to load an unsupported trust cache type: %u", type);
12341 		}
12342 
12343 		/* Validate entitlement for the calling process */
12344 		if (TCTypeConfig[type].entitlementValue != NULL) {
12345 			const bool entitlement_satisfied = check_entitlement_pmap(
12346 				NULL,
12347 				"com.apple.private.pmap.load-trust-cache",
12348 				TCTypeConfig[type].entitlementValue,
12349 				false,
12350 				true);
12351 
12352 			if (entitlement_satisfied == false) {
12353 				panic("attempted to load trust cache without entitlement: %u", type);
12354 			}
12355 		}
12356 	}
12357 #endif
12358 
12359 	/* AppleImage4 validation uses CoreCrypto -- requires a spare page */
12360 	ret = pmap_reserve_ppl_page();
12361 	if (ret != KERN_SUCCESS) {
12362 		if (ret != KERN_RESOURCE_SHORTAGE) {
12363 			pmap_cs_log_error("unable to load trust cache (type: %u): unable to reserve page", type);
12364 		}
12365 		return ret;
12366 	}
12367 
12368 	/* Align the passed in lengths to the page size -- round_page is overflow safe */
12369 	payload_len_aligned = round_page(pmap_img4_payload_len);
12370 	manifest_len_aligned = round_page(img4_manifest_len);
12371 
12372 	/* Ensure we have valid data passed in */
12373 	pmap_cs_assert_addr(pmap_img4_payload, payload_len_aligned, false, false);
12374 	pmap_cs_assert_addr(img4_manifest, manifest_len_aligned, false, false);
12375 
12376 	/*
12377 	 * Lockdown the data passed in. The pmap image4 payload also contains the trust cache
12378 	 * data structure used by libTrustCache to manage the payload. We need to be able to
12379 	 * write to that data structure, so we keep the payload PPL writable.
12380 	 */
12381 	pmap_cs_lockdown_pages(pmap_img4_payload, payload_len_aligned, true);
12382 	pmap_cs_lockdown_pages(img4_manifest, manifest_len_aligned, false);
12383 
12384 	/* Should be safe to read from this now */
12385 	payload = (pmap_img4_payload_t*)pmap_img4_payload;
12386 
12387 	/* Acquire a writable version of the trust cache data structure */
12388 	TrustCache_t *trust_cache = &payload->trust_cache;
12389 	trust_cache = (TrustCache_t*)phystokv(kvtophys_nofail((vm_offset_t)trust_cache));
12390 
12391 	/* Calculate the correct length of the img4 payload */
12392 	if (os_sub_overflow(pmap_img4_payload_len, sizeof(pmap_img4_payload_t), &img4_payload_len)) {
12393 		panic("underflow on the img4_payload_len: %lu", pmap_img4_payload_len);
12394 	}
12395 
12396 	/* Exclusively lock the runtime */
12397 	lck_rw_lock_exclusive(&ppl_trust_cache_rt_lock);
12398 
12399 	/* Load the trust cache */
12400 	TCReturn_t tc_ret = interface->load(
12401 		&ppl_trust_cache_rt,
12402 		type,
12403 		trust_cache,
12404 		(const uintptr_t)payload->img4_payload, img4_payload_len,
12405 		(const uintptr_t)img4_manifest, img4_manifest_len);
12406 
12407 	/* Unlock the runtime */
12408 	lck_rw_unlock_exclusive(&ppl_trust_cache_rt_lock);
12409 
12410 	if (tc_ret.error == kTCReturnSuccess) {
12411 		ret = KERN_SUCCESS;
12412 	} else {
12413 		if (tc_ret.error == kTCReturnDuplicate) {
12414 			ret = KERN_ALREADY_IN_SET;
12415 		} else {
12416 			pmap_cs_log_error("unable to load trust cache (TCReturn: 0x%02X | 0x%02X | %u)",
12417 			    tc_ret.component, tc_ret.error, tc_ret.uniqueError);
12418 
12419 			ret = KERN_FAILURE;
12420 		}
12421 
12422 		/* Unlock the payload data */
12423 		pmap_cs_unlockdown_pages(pmap_img4_payload, payload_len_aligned, true);
12424 		trust_cache = NULL;
12425 		payload = NULL;
12426 	}
12427 
12428 	/* Unlock the manifest since it is no longer needed */
12429 	pmap_cs_unlockdown_pages(img4_manifest, manifest_len_aligned, false);
12430 
12431 	/* Return the CoreCrypto reserved page back to the free list */
12432 	pmap_release_reserved_ppl_page();
12433 
12434 	return ret;
12435 }
12436 
12437 kern_return_t
12438 pmap_load_trust_cache_with_type(
12439 	TCType_t type,
12440 	const vm_address_t pmap_img4_payload, const vm_size_t pmap_img4_payload_len,
12441 	const vm_address_t img4_manifest, const vm_size_t img4_manifest_len,
12442 	const vm_address_t img4_aux_manifest, const vm_size_t img4_aux_manifest_len)
12443 {
12444 	kern_return_t ret = KERN_DENIED;
12445 
12446 	ret = pmap_load_trust_cache_with_type_ppl(
12447 		type,
12448 		pmap_img4_payload, pmap_img4_payload_len,
12449 		img4_manifest, img4_manifest_len,
12450 		img4_aux_manifest, img4_aux_manifest_len);
12451 
12452 	while (ret == KERN_RESOURCE_SHORTAGE) {
12453 		/* Allocate a page from the free list */
12454 		pmap_alloc_page_for_ppl(0);
12455 
12456 		/* Attempt the call again */
12457 		ret = pmap_load_trust_cache_with_type_ppl(
12458 			type,
12459 			pmap_img4_payload, pmap_img4_payload_len,
12460 			img4_manifest, img4_manifest_len,
12461 			img4_aux_manifest, img4_aux_manifest_len);
12462 	}
12463 
12464 	return ret;
12465 }
12466 
12467 MARK_AS_PMAP_TEXT kern_return_t
12468 pmap_query_trust_cache_safe(
12469 	TCQueryType_t query_type,
12470 	const uint8_t cdhash[kTCEntryHashSize],
12471 	TrustCacheQueryToken_t *query_token)
12472 {
12473 	kern_return_t ret = KERN_NOT_FOUND;
12474 
12475 	/* AMFI interface needs to be available */
12476 	if (amfi == NULL) {
12477 		panic("amfi interface not available");
12478 	}
12479 
12480 	const TrustCacheInterface_t *interface = &amfi->TrustCache;
12481 	if (interface->version < 1) {
12482 		/* AMFI change hasn't landed in the build */
12483 		pmap_cs_log_error("unable to query trust cache: interface not supported");
12484 		return KERN_NOT_SUPPORTED;
12485 	}
12486 
12487 	/* Validate the query type preemptively */
12488 	if (query_type >= kTCQueryTypeTotal) {
12489 		pmap_cs_log_error("unable to query trust cache: invalid query type: %u", query_type);
12490 		return KERN_INVALID_ARGUMENT;
12491 	}
12492 
12493 	/* Lock the runtime as shared */
12494 	lck_rw_lock_shared(&ppl_trust_cache_rt_lock);
12495 
12496 	TCReturn_t tc_ret = interface->query(
12497 		&ppl_trust_cache_rt,
12498 		query_type,
12499 		cdhash,
12500 		query_token);
12501 
12502 	/* Unlock the runtime */
12503 	lck_rw_unlock_shared(&ppl_trust_cache_rt_lock);
12504 
12505 	if (tc_ret.error == kTCReturnSuccess) {
12506 		ret = KERN_SUCCESS;
12507 	} else if (tc_ret.error == kTCReturnNotFound) {
12508 		ret = KERN_NOT_FOUND;
12509 	} else {
12510 		ret = KERN_FAILURE;
12511 		pmap_cs_log_error("trust cache query failed (TCReturn: 0x%02X | 0x%02X | %u)",
12512 		    tc_ret.component, tc_ret.error, tc_ret.uniqueError);
12513 	}
12514 
12515 	return ret;
12516 }
12517 
12518 MARK_AS_PMAP_TEXT kern_return_t
12519 pmap_query_trust_cache_internal(
12520 	TCQueryType_t query_type,
12521 	const uint8_t cdhash[kTCEntryHashSize],
12522 	TrustCacheQueryToken_t *query_token)
12523 {
12524 	kern_return_t ret = KERN_NOT_FOUND;
12525 	TrustCacheQueryToken_t query_token_safe = {0};
12526 	uint8_t cdhash_safe[kTCEntryHashSize] = {0};
12527 
12528 	/* Copy in the CDHash into PPL storage */
12529 	memcpy(cdhash_safe, cdhash, kTCEntryHashSize);
12530 
12531 	/* Query through the safe API since we're in the PPL now */
12532 	ret = pmap_query_trust_cache_safe(query_type, cdhash_safe, &query_token_safe);
12533 
12534 	if (query_token != NULL) {
12535 		pmap_pin_kernel_pages((vm_offset_t)query_token, sizeof(*query_token));
12536 		memcpy((void*)query_token, (void*)&query_token_safe, sizeof(*query_token));
12537 		pmap_unpin_kernel_pages((vm_offset_t)query_token, sizeof(*query_token));
12538 	}
12539 
12540 	return ret;
12541 }
12542 
12543 kern_return_t
12544 pmap_query_trust_cache(
12545 	TCQueryType_t query_type,
12546 	const uint8_t cdhash[kTCEntryHashSize],
12547 	TrustCacheQueryToken_t *query_token)
12548 {
12549 	kern_return_t ret = KERN_NOT_FOUND;
12550 
12551 	ret = pmap_query_trust_cache_ppl(
12552 		query_type,
12553 		cdhash,
12554 		query_token);
12555 
12556 	return ret;
12557 }
12558 
12559 MARK_AS_PMAP_DATA bool ppl_developer_mode_set =  false;
12560 MARK_AS_PMAP_DATA bool ppl_developer_mode_storage = false;
12561 
12562 MARK_AS_PMAP_TEXT void
12563 pmap_toggle_developer_mode_internal(
12564 	bool state)
12565 {
12566 	bool state_set = os_atomic_load(&ppl_developer_mode_set, relaxed);
12567 
12568 	/*
12569 	 * Only the following state transitions are allowed:
12570 	 * -- not set --> false
12571 	 * -- not set --> true
12572 	 * -- true --> false
12573 	 * -- true --> true
12574 	 * -- false --> false
12575 	 *
12576 	 * We never allow false --> true transitions.
12577 	 */
12578 	bool current = os_atomic_load(&ppl_developer_mode_storage, relaxed);
12579 
12580 	if ((current == false) && (state == true) && state_set) {
12581 		panic("PMAP_CS: attempted to enable developer mode incorrectly");
12582 	}
12583 
12584 	/* We're going to update the developer mode state, so update this first */
12585 	os_atomic_store(&ppl_developer_mode_set, true, relaxed);
12586 
12587 	/* Update the developer mode state on the system */
12588 	os_atomic_store(&ppl_developer_mode_storage, state, release);
12589 }
12590 
12591 void
12592 pmap_toggle_developer_mode(
12593 	bool state)
12594 {
12595 	pmap_toggle_developer_mode_ppl(state);
12596 }
12597 
12598 #endif /* PMAP_CS_PPL_MONITOR */
12599 
12600 #if PMAP_CS_INCLUDE_CODE_SIGNING
12601 
12602 static int
12603 pmap_cs_profiles_rbtree_compare(
12604 	void *profile0,
12605 	void *profile1)
12606 {
12607 	if (profile0 < profile1) {
12608 		return -1;
12609 	} else if (profile0 > profile1) {
12610 		return 1;
12611 	}
12612 	return 0;
12613 }
12614 
12615 /* Red-black tree for managing provisioning profiles */
12616 MARK_AS_PMAP_DATA static
12617 RB_HEAD(pmap_cs_profiles_rbtree, _pmap_cs_profile) pmap_cs_registered_profiles;
12618 
12619 RB_PROTOTYPE(pmap_cs_profiles_rbtree, _pmap_cs_profile, link, pmap_cs_profiles_rbtree_compare);
12620 RB_GENERATE(pmap_cs_profiles_rbtree, _pmap_cs_profile, link, pmap_cs_profiles_rbtree_compare);
12621 
12622 /* Lock for the profile red-black tree */
12623 MARK_AS_PMAP_DATA decl_lck_rw_data(, pmap_cs_profiles_rbtree_lock);
12624 
12625 void
12626 pmap_initialize_provisioning_profiles(void)
12627 {
12628 	/* Initialize the profiles red-black tree lock */
12629 	lck_rw_init(&pmap_cs_profiles_rbtree_lock, &pmap_lck_grp, 0);
12630 	pmap_cs_profiles_rbtree_lock.lck_rw_can_sleep = FALSE;
12631 
12632 	/* Initialize the red-black tree itself */
12633 	RB_INIT(&pmap_cs_registered_profiles);
12634 
12635 	printf("initialized PPL provisioning profile data\n");
12636 }
12637 
12638 static bool
12639 pmap_is_testflight_profile(
12640 	pmap_cs_profile_t *profile_obj)
12641 {
12642 	const char *entitlement_name = "beta-reports-active";
12643 	const size_t entitlement_length = strlen(entitlement_name);
12644 	CEQueryOperation_t query[2] = {0};
12645 
12646 	/* If the profile provisions no entitlements, then it isn't a test flight one */
12647 	if (profile_obj->entitlements_ctx == NULL) {
12648 		return false;
12649 	}
12650 
12651 	/* Build our CoreEntitlements query */
12652 	query[0].opcode = kCEOpSelectKey;
12653 	memcpy(query[0].parameters.stringParameter.data, entitlement_name, entitlement_length);
12654 	query[0].parameters.stringParameter.length = entitlement_length;
12655 	query[1] = CEMatchBool(true);
12656 
12657 	CEError_t ce_err = amfi->CoreEntitlements.ContextQuery(
12658 		profile_obj->entitlements_ctx,
12659 		query, 2);
12660 
12661 	if (ce_err == amfi->CoreEntitlements.kNoError) {
12662 		return true;
12663 	}
12664 
12665 	return false;
12666 }
12667 
12668 static bool
12669 pmap_is_development_profile(
12670 	pmap_cs_profile_t *profile_obj)
12671 {
12672 	/* Check for UPP */
12673 	const der_vm_context_t upp_ctx = amfi->CoreEntitlements.der_vm_execute(
12674 		*profile_obj->profile_ctx,
12675 		CESelectDictValue("ProvisionsAllDevices"));
12676 	if (amfi->CoreEntitlements.der_vm_context_is_valid(upp_ctx) == true) {
12677 		if (amfi->CoreEntitlements.der_vm_bool_from_context(upp_ctx) == true) {
12678 			pmap_cs_log_info("%p: [UPP] non-development profile", profile_obj);
12679 			return false;
12680 		}
12681 	}
12682 
12683 	/* Check for TestFlight profile */
12684 	if (pmap_is_testflight_profile(profile_obj) == true) {
12685 		pmap_cs_log_info("%p: [TestFlight] non-development profile", profile_obj);
12686 		return false;
12687 	}
12688 
12689 	pmap_cs_log_info("%p: development profile", profile_obj);
12690 	return true;
12691 }
12692 
12693 static kern_return_t
12694 pmap_initialize_profile_entitlements(
12695 	pmap_cs_profile_t *profile_obj)
12696 {
12697 	const der_vm_context_t entitlements_der_ctx = amfi->CoreEntitlements.der_vm_execute(
12698 		*profile_obj->profile_ctx,
12699 		CESelectDictValue("Entitlements"));
12700 
12701 	if (amfi->CoreEntitlements.der_vm_context_is_valid(entitlements_der_ctx) == false) {
12702 		memset(&profile_obj->entitlements_ctx_storage, 0, sizeof(struct CEQueryContext));
12703 		profile_obj->entitlements_ctx = NULL;
12704 
12705 		pmap_cs_log_info("%p: profile provisions no entitlements", profile_obj);
12706 		return KERN_NOT_FOUND;
12707 	}
12708 
12709 	const uint8_t *der_start = entitlements_der_ctx.state.der_start;
12710 	const uint8_t *der_end = entitlements_der_ctx.state.der_end;
12711 
12712 	CEValidationResult ce_result = {0};
12713 	CEError_t ce_err = amfi->CoreEntitlements.Validate(
12714 		pmap_cs_core_entitlements_runtime,
12715 		&ce_result,
12716 		der_start, der_end);
12717 	if (ce_err != amfi->CoreEntitlements.kNoError) {
12718 		pmap_cs_log_error("unable to validate profile entitlements: %s",
12719 		    amfi->CoreEntitlements.GetErrorString(ce_err));
12720 
12721 		return KERN_ABORTED;
12722 	}
12723 
12724 	struct CEQueryContext query_ctx = {0};
12725 	ce_err = amfi->CoreEntitlements.AcquireUnmanagedContext(
12726 		pmap_cs_core_entitlements_runtime,
12727 		ce_result,
12728 		&query_ctx);
12729 	if (ce_err != amfi->CoreEntitlements.kNoError) {
12730 		pmap_cs_log_error("unable to acquire context for profile entitlements: %s",
12731 		    amfi->CoreEntitlements.GetErrorString(ce_err));
12732 
12733 		return KERN_ABORTED;
12734 	}
12735 
12736 	/* Setup the entitlements context within the profile object */
12737 	profile_obj->entitlements_ctx_storage = query_ctx;
12738 	profile_obj->entitlements_ctx = &profile_obj->entitlements_ctx_storage;
12739 
12740 	pmap_cs_log_info("%p: profile entitlements successfully setup", profile_obj);
12741 	return KERN_SUCCESS;
12742 }
12743 
12744 kern_return_t
12745 pmap_register_provisioning_profile_internal(
12746 	const vm_address_t payload_addr,
12747 	const vm_size_t payload_size)
12748 {
12749 	kern_return_t ret = KERN_DENIED;
12750 	pmap_cs_profile_t *profile_obj = NULL;
12751 	pmap_profile_payload_t *profile_payload = NULL;
12752 	vm_size_t max_profile_blob_size = 0;
12753 	const uint8_t *profile_content = NULL;
12754 	size_t profile_content_length = 0;
12755 
12756 
12757 	/* CoreTrust validation uses CoreCrypto -- requires a spare page */
12758 	ret = pmap_reserve_ppl_page();
12759 	if (ret != KERN_SUCCESS) {
12760 		if (ret != KERN_RESOURCE_SHORTAGE) {
12761 			pmap_cs_log_error("unable to register profile: unable to reserve page: %d", ret);
12762 		}
12763 		return ret;
12764 	}
12765 
12766 	/* Ensure we have valid data passed in */
12767 	pmap_cs_assert_addr(payload_addr, payload_size, false, false);
12768 
12769 	/*
12770 	 * Lockdown the data passed in. The pmap profile payload also contains the profile
12771 	 * data structure used by the PPL to manage the payload. We need to be able to write
12772 	 * to that data structure, so we keep the payload PPL writable.
12773 	 */
12774 	pmap_cs_lockdown_pages(payload_addr, payload_size, true);
12775 
12776 	/* Should be safe to read from this now */
12777 	profile_payload = (pmap_profile_payload_t*)payload_addr;
12778 
12779 	/* Ensure the profile blob size provided is valid */
12780 	if (os_sub_overflow(payload_size, sizeof(*profile_payload), &max_profile_blob_size)) {
12781 		panic("PMAP_CS: underflow on the max_profile_blob_size: %lu", payload_size);
12782 	} else if (profile_payload->profile_blob_size > max_profile_blob_size) {
12783 		panic("PMAP_CS: overflow on the profile_blob_size: %lu", profile_payload->profile_blob_size);
12784 	}
12785 
12786 #if PMAP_CS_INCLUDE_INTERNAL_CODE
12787 	const bool allow_development_root_cert = true;
12788 #else
12789 	const bool allow_development_root_cert = false;
12790 #endif
12791 
12792 	int ct_result = coretrust->CTEvaluateProvisioningProfile(
12793 		profile_payload->profile_blob, profile_payload->profile_blob_size,
12794 		allow_development_root_cert,
12795 		&profile_content, &profile_content_length);
12796 
12797 	/* Release the PPL page allocated for CoreCrypto */
12798 	pmap_release_reserved_ppl_page();
12799 
12800 	if (ct_result != 0) {
12801 		panic("PMAP_CS: profile does not validate through CoreTrust: %d", ct_result);
12802 	} else if ((profile_content == NULL) || profile_content_length == 0) {
12803 		panic("PMAP_CS: profile does not have any content: %p | %lu",
12804 		    profile_content, profile_content_length);
12805 	}
12806 
12807 	der_vm_context_t profile_ctx_storage = amfi->CoreEntitlements.der_vm_context_create(
12808 		pmap_cs_core_entitlements_runtime,
12809 		CCDER_CONSTRUCTED_SET,
12810 		false,
12811 		profile_content, profile_content + profile_content_length);
12812 	if (amfi->CoreEntitlements.der_vm_context_is_valid(profile_ctx_storage) == false) {
12813 		panic("PMAP_CS: unable to create a CoreEntitlements context for the profile");
12814 	}
12815 
12816 	/* Acquire a writable version of the profile data structure */
12817 	profile_obj = &profile_payload->profile_obj_storage;
12818 	profile_obj = (pmap_cs_profile_t*)phystokv(kvtophys_nofail((vm_offset_t)profile_obj));
12819 
12820 	profile_obj->original_payload = profile_payload;
12821 	profile_obj->profile_ctx_storage = profile_ctx_storage;
12822 	profile_obj->profile_ctx = &profile_obj->profile_ctx_storage;
12823 	os_atomic_store(&profile_obj->reference_count, 0, release);
12824 
12825 	/* Setup the entitlements provisioned by the profile */
12826 	ret = pmap_initialize_profile_entitlements(profile_obj);
12827 	if ((ret != KERN_SUCCESS) && (ret != KERN_NOT_FOUND)) {
12828 		panic("PMAP_CS: fatal error while setting up profile entitlements: %d", ret);
12829 	}
12830 
12831 	/* Setup properties of the profile */
12832 	profile_obj->development_profile = pmap_is_development_profile(profile_obj);
12833 
12834 	/* Mark as validated since it passed all checks */
12835 	profile_obj->profile_validated = true;
12836 
12837 	/* Add the profile to the red-black tree */
12838 	lck_rw_lock_exclusive(&pmap_cs_profiles_rbtree_lock);
12839 	if (RB_INSERT(pmap_cs_profiles_rbtree, &pmap_cs_registered_profiles, profile_obj) != NULL) {
12840 		panic("PMAP_CS: Anomaly, profile already exists in the tree: %p", profile_obj);
12841 	}
12842 	lck_rw_unlock_exclusive(&pmap_cs_profiles_rbtree_lock);
12843 
12844 	pmap_cs_log_info("%p: profile successfully registered", profile_obj);
12845 	return KERN_SUCCESS;
12846 }
12847 
12848 kern_return_t
12849 pmap_register_provisioning_profile(
12850 	const vm_address_t payload_addr,
12851 	const vm_size_t payload_size)
12852 {
12853 	kern_return_t ret = KERN_DENIED;
12854 
12855 	ret = pmap_register_provisioning_profile_ppl(
12856 		payload_addr,
12857 		payload_size);
12858 
12859 	while (ret == KERN_RESOURCE_SHORTAGE) {
12860 		/* Allocate a page from the free list */
12861 		pmap_alloc_page_for_ppl(0);
12862 
12863 		/* Attempt the call again */
12864 		ret = pmap_register_provisioning_profile_ppl(
12865 			payload_addr,
12866 			payload_size);
12867 	}
12868 
12869 	return ret;
12870 }
12871 
12872 kern_return_t
12873 pmap_unregister_provisioning_profile_internal(
12874 	pmap_cs_profile_t *profile_obj)
12875 {
12876 	kern_return_t ret = KERN_DENIED;
12877 
12878 	/* Lock the red-black tree exclusively */
12879 	lck_rw_lock_exclusive(&pmap_cs_profiles_rbtree_lock);
12880 
12881 	if (RB_FIND(pmap_cs_profiles_rbtree, &pmap_cs_registered_profiles, profile_obj) == NULL) {
12882 		panic("PMAP_CS: unregistering an unknown profile: %p", profile_obj);
12883 	}
12884 
12885 	uint32_t reference_count = os_atomic_load(&profile_obj->reference_count, acquire);
12886 	if (reference_count != 0) {
12887 		ret = KERN_FAILURE;
12888 		goto exit;
12889 	}
12890 
12891 	/* Remove the profile from the red-black tree */
12892 	RB_REMOVE(pmap_cs_profiles_rbtree, &pmap_cs_registered_profiles, profile_obj);
12893 
12894 	/* Unregistration was a success */
12895 	ret = KERN_SUCCESS;
12896 
12897 exit:
12898 	/* Unlock the red-black tree */
12899 	lck_rw_unlock_exclusive(&pmap_cs_profiles_rbtree_lock);
12900 
12901 	if (ret == KERN_SUCCESS) {
12902 		/* Get the original payload address */
12903 		const pmap_profile_payload_t *profile_payload = profile_obj->original_payload;
12904 		const vm_address_t payload_addr = (const vm_address_t)profile_payload;
12905 
12906 		/* Get the original payload size */
12907 		vm_size_t payload_size = profile_payload->profile_blob_size + sizeof(*profile_payload);
12908 		payload_size = round_page(payload_size);
12909 
12910 		/* Unlock the profile payload */
12911 		pmap_cs_unlockdown_pages(payload_addr, payload_size, true);
12912 		pmap_cs_log_info("%p: profile successfully unregistered: %p | %lu", profile_obj,
12913 		    profile_payload, payload_size);
12914 
12915 		profile_obj = NULL;
12916 	}
12917 	return ret;
12918 }
12919 
12920 kern_return_t
12921 pmap_unregister_provisioning_profile(
12922 	pmap_cs_profile_t *profile_obj)
12923 {
12924 	return pmap_unregister_provisioning_profile_ppl(profile_obj);
12925 }
12926 
12927 kern_return_t
12928 pmap_associate_provisioning_profile_internal(
12929 	pmap_cs_code_directory_t *cd_entry,
12930 	pmap_cs_profile_t *profile_obj)
12931 {
12932 	kern_return_t ret = KERN_DENIED;
12933 
12934 	/* Acquire the lock on the code directory */
12935 	pmap_cs_lock_code_directory(cd_entry);
12936 
12937 	if (cd_entry->trust != PMAP_CS_UNTRUSTED) {
12938 		pmap_cs_log_error("disallowing profile association with verified signature");
12939 		goto exit;
12940 	} else if (cd_entry->profile_obj != NULL) {
12941 		pmap_cs_log_error("disallowing multiple profile associations with signature");
12942 		goto exit;
12943 	}
12944 
12945 	/* Lock the red-black tree as shared */
12946 	lck_rw_lock_shared(&pmap_cs_profiles_rbtree_lock);
12947 
12948 	if (RB_FIND(pmap_cs_profiles_rbtree, &pmap_cs_registered_profiles, profile_obj) == NULL) {
12949 		panic("PMAP_CS: associating an unknown profile: %p", profile_obj);
12950 	} else if (profile_obj->profile_validated == false) {
12951 		panic("PMAP_CS: attempted association with unverified profile: %p", profile_obj);
12952 	}
12953 
12954 	/* Associate the profile with the signature */
12955 	cd_entry->profile_obj = profile_obj;
12956 
12957 	/* Increment the reference count on the profile object */
12958 	uint32_t reference_count = os_atomic_add(&profile_obj->reference_count, 1, relaxed);
12959 	if (reference_count == 0) {
12960 		panic("PMAP_CS: overflow on reference count for profile: %p", profile_obj);
12961 	}
12962 
12963 	/* Unlock the red-black tree */
12964 	lck_rw_unlock_shared(&pmap_cs_profiles_rbtree_lock);
12965 
12966 	/* Association was a success */
12967 	pmap_cs_log_info("associated profile %p with signature %p", profile_obj, cd_entry);
12968 	ret = KERN_SUCCESS;
12969 
12970 exit:
12971 	lck_rw_unlock_exclusive(&cd_entry->rwlock);
12972 
12973 	return ret;
12974 }
12975 
12976 kern_return_t
12977 pmap_associate_provisioning_profile(
12978 	pmap_cs_code_directory_t *cd_entry,
12979 	pmap_cs_profile_t *profile_obj)
12980 {
12981 	return pmap_associate_provisioning_profile_ppl(cd_entry, profile_obj);
12982 }
12983 
12984 kern_return_t
12985 pmap_disassociate_provisioning_profile_internal(
12986 	pmap_cs_code_directory_t *cd_entry)
12987 {
12988 	pmap_cs_profile_t *profile_obj = NULL;
12989 	kern_return_t ret = KERN_DENIED;
12990 
12991 	/* Acquire the lock on the code directory */
12992 	pmap_cs_lock_code_directory(cd_entry);
12993 
12994 	if (cd_entry->trust == PMAP_CS_UNTRUSTED) {
12995 		panic("PMAP_CS: profile disassociation not allowed on unverified signatures");
12996 	}
12997 
12998 	if (cd_entry->profile_obj == NULL) {
12999 		ret = KERN_NOT_FOUND;
13000 		goto exit;
13001 	}
13002 	profile_obj = cd_entry->profile_obj;
13003 
13004 	/* Disassociate the profile from the signature */
13005 	cd_entry->profile_obj = NULL;
13006 
13007 	/* Disassociation was a success */
13008 	ret = KERN_SUCCESS;
13009 
13010 exit:
13011 	lck_rw_unlock_exclusive(&cd_entry->rwlock);
13012 
13013 	if (ret == KERN_SUCCESS) {
13014 		/* Decrement the reference count on the profile object */
13015 		uint32_t reference_count = os_atomic_sub(&profile_obj->reference_count, 1, release);
13016 		if (reference_count == UINT32_MAX) {
13017 			panic("PMAP_CS: underflow on reference count for profile: %p", profile_obj);
13018 		}
13019 		pmap_cs_log_info("disassociated profile %p from signature %p", profile_obj, cd_entry);
13020 	}
13021 	return ret;
13022 }
13023 
13024 kern_return_t
13025 pmap_disassociate_provisioning_profile(
13026 	pmap_cs_code_directory_t *cd_entry)
13027 {
13028 	return pmap_disassociate_provisioning_profile_ppl(cd_entry);
13029 }
13030 
13031 #endif /* PMAP_CS_INCLUDE_CODE_SIGNING */
13032 
13033 MARK_AS_PMAP_TEXT bool
13034 pmap_lookup_in_loaded_trust_caches_internal(
13035 	const uint8_t cdhash[CS_CDHASH_LEN])
13036 {
13037 	kern_return_t kr = KERN_NOT_FOUND;
13038 
13039 #if PMAP_CS_PPL_MONITOR
13040 	/*
13041 	 * If we have the PPL monitor, then this function can only be called from
13042 	 * within the PPL. Calling it directly would've caused a panic, so we can
13043 	 * assume that we're in the PPL here.
13044 	 */
13045 	uint8_t cdhash_safe[CS_CDHASH_LEN];
13046 	memcpy(cdhash_safe, cdhash, CS_CDHASH_LEN);
13047 
13048 	kr = pmap_query_trust_cache_safe(
13049 		kTCQueryTypeLoadable,
13050 		cdhash_safe,
13051 		NULL);
13052 #else
13053 	kr = query_trust_cache(
13054 		kTCQueryTypeLoadable,
13055 		cdhash,
13056 		NULL);
13057 #endif
13058 
13059 	if (kr == KERN_SUCCESS) {
13060 		return true;
13061 	}
13062 	return false;
13063 }
13064 
13065 bool
13066 pmap_lookup_in_loaded_trust_caches(
13067 	const uint8_t cdhash[CS_CDHASH_LEN])
13068 {
13069 #if XNU_MONITOR
13070 	return pmap_lookup_in_loaded_trust_caches_ppl(cdhash);
13071 #else
13072 	return pmap_lookup_in_loaded_trust_caches_internal(cdhash);
13073 #endif
13074 }
13075 
13076 MARK_AS_PMAP_TEXT uint32_t
13077 pmap_lookup_in_static_trust_cache_internal(
13078 	const uint8_t cdhash[CS_CDHASH_LEN])
13079 {
13080 	TrustCacheQueryToken_t query_token = {0};
13081 	kern_return_t kr = KERN_NOT_FOUND;
13082 	uint64_t flags = 0;
13083 	uint8_t hash_type = 0;
13084 
13085 #if PMAP_CS_PPL_MONITOR
13086 	/*
13087 	 * If we have the PPL monitor, then this function can only be called from
13088 	 * within the PPL. Calling it directly would've caused a panic, so we can
13089 	 * assume that we're in the PPL here.
13090 	 */
13091 	uint8_t cdhash_safe[CS_CDHASH_LEN];
13092 	memcpy(cdhash_safe, cdhash, CS_CDHASH_LEN);
13093 
13094 	kr = pmap_query_trust_cache_safe(
13095 		kTCQueryTypeStatic,
13096 		cdhash_safe,
13097 		&query_token);
13098 #else
13099 	kr = query_trust_cache(
13100 		kTCQueryTypeStatic,
13101 		cdhash,
13102 		&query_token);
13103 #endif
13104 
13105 	if (kr == KERN_SUCCESS) {
13106 		amfi->TrustCache.queryGetFlags(&query_token, &flags);
13107 		amfi->TrustCache.queryGetHashType(&query_token, &hash_type);
13108 
13109 		return (TC_LOOKUP_FOUND << TC_LOOKUP_RESULT_SHIFT) |
13110 		       (hash_type << TC_LOOKUP_HASH_TYPE_SHIFT) |
13111 		       ((uint8_t)flags << TC_LOOKUP_FLAGS_SHIFT);
13112 	}
13113 
13114 	return 0;
13115 }
13116 
13117 uint32_t
13118 pmap_lookup_in_static_trust_cache(const uint8_t cdhash[CS_CDHASH_LEN])
13119 {
13120 #if XNU_MONITOR
13121 	return pmap_lookup_in_static_trust_cache_ppl(cdhash);
13122 #else
13123 	return pmap_lookup_in_static_trust_cache_internal(cdhash);
13124 #endif
13125 }
13126 
13127 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_compilation_service_cdhash_lock, 0);
13128 MARK_AS_PMAP_DATA uint8_t pmap_compilation_service_cdhash[CS_CDHASH_LEN] = { 0 };
13129 
13130 MARK_AS_PMAP_TEXT void
13131 pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
13132 {
13133 
13134 	pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
13135 	memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
13136 	pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
13137 
13138 	pmap_cs_log_info("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
13139 }
13140 
13141 MARK_AS_PMAP_TEXT bool
13142 pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
13143 {
13144 	bool match = false;
13145 
13146 	pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
13147 	if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
13148 		match = true;
13149 	}
13150 	pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
13151 
13152 	if (match) {
13153 		pmap_cs_log_info("Matched Compilation Service CDHash through the PPL");
13154 	}
13155 
13156 	return match;
13157 }
13158 
13159 void
13160 pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
13161 {
13162 #if XNU_MONITOR
13163 	pmap_set_compilation_service_cdhash_ppl(cdhash);
13164 #else
13165 	pmap_set_compilation_service_cdhash_internal(cdhash);
13166 #endif
13167 }
13168 
13169 bool
13170 pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
13171 {
13172 #if XNU_MONITOR
13173 	return pmap_match_compilation_service_cdhash_ppl(cdhash);
13174 #else
13175 	return pmap_match_compilation_service_cdhash_internal(cdhash);
13176 #endif
13177 }
13178 
13179 /*
13180  * As part of supporting local signing on the device, we need the PMAP layer
13181  * to store the local signing key so that PMAP_CS can validate with it. We
13182  * store it at the PMAP layer such that it is accessible to both AMFI and
13183  * PMAP_CS should they need it.
13184  */
13185 MARK_AS_PMAP_DATA static bool pmap_local_signing_public_key_set = false;
13186 MARK_AS_PMAP_DATA static uint8_t pmap_local_signing_public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE] = { 0 };
13187 
13188 MARK_AS_PMAP_TEXT void
13189 pmap_set_local_signing_public_key_internal(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])
13190 {
13191 	bool key_set = false;
13192 
13193 	/*
13194 	 * os_atomic_cmpxchg returns true in case the exchange was successful. For us,
13195 	 * a successful exchange means that the local signing public key has _not_ been
13196 	 * set. In case the key has been set, we panic as we would never expect the
13197 	 * kernel to attempt to set the key more than once.
13198 	 */
13199 	key_set = !os_atomic_cmpxchg(&pmap_local_signing_public_key_set, false, true, relaxed);
13200 
13201 	if (key_set) {
13202 		panic("attempted to set the local signing public key multiple times");
13203 	}
13204 
13205 	memcpy(pmap_local_signing_public_key, public_key, PMAP_ECC_P384_PUBLIC_KEY_SIZE);
13206 	pmap_cs_log_info("set local signing public key");
13207 }
13208 
13209 void
13210 pmap_set_local_signing_public_key(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])
13211 {
13212 #if XNU_MONITOR
13213 	return pmap_set_local_signing_public_key_ppl(public_key);
13214 #else
13215 	return pmap_set_local_signing_public_key_internal(public_key);
13216 #endif
13217 }
13218 
13219 uint8_t*
13220 pmap_get_local_signing_public_key(void)
13221 {
13222 	bool key_set = os_atomic_load(&pmap_local_signing_public_key_set, relaxed);
13223 
13224 	if (key_set) {
13225 		return pmap_local_signing_public_key;
13226 	}
13227 
13228 	return NULL;
13229 }
13230 
13231 /*
13232  * Locally signed applications need to be explicitly authorized by an entitled application
13233  * before we allow them to run.
13234  */
13235 MARK_AS_PMAP_DATA static uint8_t pmap_local_signing_cdhash[CS_CDHASH_LEN] = {0};
13236 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_local_signing_cdhash_lock, 0);
13237 
13238 MARK_AS_PMAP_TEXT void
13239 pmap_unrestrict_local_signing_internal(
13240 	const uint8_t cdhash[CS_CDHASH_LEN])
13241 {
13242 
13243 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
13244 	memcpy(pmap_local_signing_cdhash, cdhash, sizeof(pmap_local_signing_cdhash));
13245 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
13246 
13247 	pmap_cs_log_debug("unrestricted local signing for CDHash: 0x%02X%02X%02X%02X%02X...",
13248 	    cdhash[0], cdhash[1], cdhash[2], cdhash[3], cdhash[4]);
13249 }
13250 
13251 void
13252 pmap_unrestrict_local_signing(
13253 	const uint8_t cdhash[CS_CDHASH_LEN])
13254 {
13255 #if XNU_MONITOR
13256 	return pmap_unrestrict_local_signing_ppl(cdhash);
13257 #else
13258 	return pmap_unrestrict_local_signing_internal(cdhash);
13259 #endif
13260 }
13261 
13262 #if PMAP_CS
13263 MARK_AS_PMAP_TEXT static void
13264 pmap_restrict_local_signing(void)
13265 {
13266 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
13267 	memset(pmap_local_signing_cdhash, 0, sizeof(pmap_local_signing_cdhash));
13268 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
13269 }
13270 
13271 MARK_AS_PMAP_TEXT static bool
13272 pmap_local_signing_restricted(
13273 	const uint8_t cdhash[CS_CDHASH_LEN])
13274 {
13275 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
13276 	int ret = memcmp(pmap_local_signing_cdhash, cdhash, sizeof(pmap_local_signing_cdhash));
13277 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
13278 
13279 	return ret != 0;
13280 }
13281 
13282 MARK_AS_PMAP_TEXT bool
13283 pmap_cs_query_entitlements_internal(
13284 	pmap_t pmap,
13285 	CEQuery_t query,
13286 	size_t queryLength,
13287 	CEQueryContext_t finalContext)
13288 {
13289 	struct pmap_cs_code_directory *cd_entry = NULL;
13290 	bool ret = false;
13291 
13292 	if (!pmap_cs) {
13293 		panic("PMAP_CS: cannot query for entitlements as pmap_cs is turned off");
13294 	}
13295 
13296 	/*
13297 	 * When a pmap has not been passed in, we assume the caller wants to check the
13298 	 * entitlements on the current user space process.
13299 	 */
13300 	if (pmap == NULL) {
13301 		pmap = current_pmap();
13302 	}
13303 
13304 	if (pmap == kernel_pmap) {
13305 		/*
13306 		 * Instead of panicking we will just return false.
13307 		 */
13308 		return false;
13309 	}
13310 
13311 	if (query == NULL || queryLength > 64) {
13312 		panic("PMAP_CS: bogus entitlements query");
13313 	} else {
13314 		pmap_cs_assert_addr((vm_address_t)query, sizeof(CEQueryOperation_t) * queryLength, false, true);
13315 	}
13316 
13317 	if (finalContext != NULL) {
13318 		pmap_cs_assert_addr((vm_address_t)finalContext, sizeof(*finalContext), false, false);
13319 	}
13320 
13321 	validate_pmap(pmap);
13322 	pmap_lock(pmap, PMAP_LOCK_SHARED);
13323 
13324 	cd_entry = pmap_cs_code_directory_from_region(pmap->pmap_cs_main);
13325 	if (cd_entry == NULL) {
13326 		pmap_cs_log_error("attempted to query entitlements from an invalid pmap or a retired code directory");
13327 		goto out;
13328 	}
13329 
13330 	if (cd_entry->ce_ctx == NULL) {
13331 		pmap_cs_log_debug("%s: code signature doesn't have any entitlements", cd_entry->identifier);
13332 		goto out;
13333 	}
13334 
13335 	der_vm_context_t executionContext = cd_entry->ce_ctx->der_context;
13336 
13337 	for (size_t op = 0; op < queryLength; op++) {
13338 		executionContext = amfi->CoreEntitlements.der_vm_execute(executionContext, query[op]);
13339 	}
13340 
13341 	if (amfi->CoreEntitlements.der_vm_context_is_valid(executionContext)) {
13342 		ret = true;
13343 		if (finalContext != NULL) {
13344 			pmap_pin_kernel_pages((vm_offset_t)finalContext, sizeof(*finalContext));
13345 			finalContext->der_context = executionContext;
13346 			pmap_unpin_kernel_pages((vm_offset_t)finalContext, sizeof(*finalContext));
13347 		}
13348 	} else {
13349 		ret = false;
13350 	}
13351 
13352 out:
13353 	if (cd_entry) {
13354 		lck_rw_unlock_shared(&cd_entry->rwlock);
13355 		cd_entry = NULL;
13356 	}
13357 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
13358 
13359 	return ret;
13360 }
13361 #endif
13362 
13363 bool
13364 pmap_query_entitlements(
13365 	__unused pmap_t pmap,
13366 	__unused CEQuery_t query,
13367 	__unused size_t queryLength,
13368 	__unused CEQueryContext_t finalContext)
13369 {
13370 #if !PMAP_SUPPORTS_ENTITLEMENT_CHECKS
13371 	panic("PMAP_CS: do not use this API without checking for \'#if PMAP_SUPPORTS_ENTITLEMENT_CHECKS\'");
13372 #else
13373 
13374 #if XNU_MONITOR
13375 	return pmap_cs_query_entitlements_ppl(pmap, query, queryLength, finalContext);
13376 #else
13377 	return pmap_cs_query_entitlements_internal(pmap, query, queryLength, finalContext);
13378 #endif
13379 
13380 #endif /* !PMAP_SUPPORTS_ENTITLEMENT_CHECKS */
13381 }
13382 
13383 MARK_AS_PMAP_TEXT void
13384 pmap_footprint_suspend_internal(
13385 	vm_map_t        map,
13386 	boolean_t       suspend)
13387 {
13388 #if DEVELOPMENT || DEBUG
13389 	if (suspend) {
13390 		current_thread()->pmap_footprint_suspended = TRUE;
13391 		map->pmap->footprint_was_suspended = TRUE;
13392 	} else {
13393 		current_thread()->pmap_footprint_suspended = FALSE;
13394 	}
13395 #else /* DEVELOPMENT || DEBUG */
13396 	(void) map;
13397 	(void) suspend;
13398 #endif /* DEVELOPMENT || DEBUG */
13399 }
13400 
13401 void
13402 pmap_footprint_suspend(
13403 	vm_map_t map,
13404 	boolean_t suspend)
13405 {
13406 #if XNU_MONITOR
13407 	pmap_footprint_suspend_ppl(map, suspend);
13408 #else
13409 	pmap_footprint_suspend_internal(map, suspend);
13410 #endif
13411 }
13412 
13413 MARK_AS_PMAP_TEXT void
13414 pmap_nop_internal(pmap_t pmap __unused)
13415 {
13416 	validate_pmap_mutable(pmap);
13417 }
13418 
13419 void
13420 pmap_nop(pmap_t pmap)
13421 {
13422 #if XNU_MONITOR
13423 	pmap_nop_ppl(pmap);
13424 #else
13425 	pmap_nop_internal(pmap);
13426 #endif
13427 }
13428 
13429 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
13430 
13431 struct page_table_dump_header {
13432 	uint64_t pa;
13433 	uint64_t num_entries;
13434 	uint64_t start_va;
13435 	uint64_t end_va;
13436 };
13437 
13438 static kern_return_t
13439 pmap_dump_page_tables_recurse(pmap_t pmap,
13440     const tt_entry_t *ttp,
13441     unsigned int cur_level,
13442     unsigned int level_mask,
13443     uint64_t start_va,
13444     void *buf_start,
13445     void *buf_end,
13446     size_t *bytes_copied)
13447 {
13448 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13449 	uint64_t num_entries = pt_attr_page_size(pt_attr) / sizeof(*ttp);
13450 
13451 	uint64_t size = pt_attr->pta_level_info[cur_level].size;
13452 	uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
13453 	uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
13454 	uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
13455 
13456 	void *bufp = (uint8_t*)buf_start + *bytes_copied;
13457 
13458 	if (cur_level == pt_attr_root_level(pt_attr)) {
13459 		num_entries = pmap_root_alloc_size(pmap) / sizeof(tt_entry_t);
13460 	}
13461 
13462 	uint64_t tt_size = num_entries * sizeof(tt_entry_t);
13463 	const tt_entry_t *tt_end = &ttp[num_entries];
13464 
13465 	if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
13466 		return KERN_INSUFFICIENT_BUFFER_SIZE;
13467 	}
13468 
13469 	if (level_mask & (1U << cur_level)) {
13470 		struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
13471 		header->pa = ml_static_vtop((vm_offset_t)ttp);
13472 		header->num_entries = num_entries;
13473 		header->start_va = start_va;
13474 		header->end_va = start_va + (num_entries * size);
13475 
13476 		bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
13477 		*bytes_copied = *bytes_copied + sizeof(*header) + tt_size;
13478 	}
13479 	uint64_t current_va = start_va;
13480 
13481 	for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
13482 		tt_entry_t tte = *ttep;
13483 
13484 		if (!(tte & valid_mask)) {
13485 			continue;
13486 		}
13487 
13488 		if ((tte & type_mask) == type_block) {
13489 			continue;
13490 		} else {
13491 			if (cur_level >= pt_attr_leaf_level(pt_attr)) {
13492 				panic("%s: corrupt entry %#llx at %p, "
13493 				    "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
13494 				    __FUNCTION__, tte, ttep,
13495 				    ttp, cur_level, bufp, buf_end);
13496 			}
13497 
13498 			const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
13499 
13500 			kern_return_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1,
13501 			    level_mask, current_va, buf_start, buf_end, bytes_copied);
13502 
13503 			if (recurse_result != KERN_SUCCESS) {
13504 				return recurse_result;
13505 			}
13506 		}
13507 	}
13508 
13509 	return KERN_SUCCESS;
13510 }
13511 
13512 kern_return_t
13513 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end, unsigned int level_mask, size_t *bytes_copied)
13514 {
13515 	if (not_in_kdp) {
13516 		panic("pmap_dump_page_tables must only be called from kernel debugger context");
13517 	}
13518 	return pmap_dump_page_tables_recurse(pmap, pmap->tte, pt_attr_root_level(pmap_get_pt_attr(pmap)),
13519 	           level_mask, pmap->min, bufp, buf_end, bytes_copied);
13520 }
13521 
13522 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
13523 
13524 kern_return_t
13525 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
13526     unsigned int level_mask __unused, size_t *bytes_copied __unused)
13527 {
13528 	return KERN_NOT_SUPPORTED;
13529 }
13530 #endif /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
13531 
13532 
13533 #ifdef CONFIG_XNUPOST
13534 #ifdef __arm64__
13535 static volatile bool pmap_test_took_fault = false;
13536 
13537 static bool
13538 pmap_test_fault_handler(arm_saved_state_t * state)
13539 {
13540 	bool retval                 = false;
13541 	uint32_t esr                = get_saved_state_esr(state);
13542 	esr_exception_class_t class = ESR_EC(esr);
13543 	fault_status_t fsc          = ISS_IA_FSC(ESR_ISS(esr));
13544 
13545 	if ((class == ESR_EC_DABORT_EL1) &&
13546 	    ((fsc == FSC_PERMISSION_FAULT_L3) || (fsc == FSC_ACCESS_FLAG_FAULT_L3))) {
13547 		pmap_test_took_fault = true;
13548 		/* return to the instruction immediately after the call to NX page */
13549 		set_saved_state_pc(state, get_saved_state_pc(state) + 4);
13550 		retval = true;
13551 	}
13552 
13553 	return retval;
13554 }
13555 
13556 // Disable KASAN instrumentation, as the test pmap's TTBR0 space will not be in the shadow map
13557 static NOKASAN bool
13558 pmap_test_access(pmap_t pmap, vm_map_address_t va, bool should_fault, bool is_write)
13559 {
13560 	pmap_t old_pmap = NULL;
13561 
13562 	pmap_test_took_fault = false;
13563 
13564 	/*
13565 	 * We're potentially switching pmaps without using the normal thread
13566 	 * mechanism; disable interrupts and preemption to avoid any unexpected
13567 	 * memory accesses.
13568 	 */
13569 	uint64_t old_int_state = pmap_interrupts_disable();
13570 	mp_disable_preemption();
13571 
13572 	if (pmap != NULL) {
13573 		old_pmap = current_pmap();
13574 		pmap_switch(pmap);
13575 
13576 		/* Disable PAN; pmap shouldn't be the kernel pmap. */
13577 #if __ARM_PAN_AVAILABLE__
13578 		__builtin_arm_wsr("pan", 0);
13579 #endif /* __ARM_PAN_AVAILABLE__ */
13580 	}
13581 
13582 	ml_expect_fault_begin(pmap_test_fault_handler, va);
13583 
13584 	if (is_write) {
13585 		*((volatile uint64_t*)(va)) = 0xdec0de;
13586 	} else {
13587 		volatile uint64_t tmp = *((volatile uint64_t*)(va));
13588 		(void)tmp;
13589 	}
13590 
13591 	/* Save the fault bool, and undo the gross stuff we did. */
13592 	bool took_fault = pmap_test_took_fault;
13593 	ml_expect_fault_end();
13594 
13595 	if (pmap != NULL) {
13596 #if __ARM_PAN_AVAILABLE__
13597 		__builtin_arm_wsr("pan", 1);
13598 #endif /* __ARM_PAN_AVAILABLE__ */
13599 
13600 		pmap_switch(old_pmap);
13601 	}
13602 
13603 	mp_enable_preemption();
13604 	pmap_interrupts_restore(old_int_state);
13605 	bool retval = (took_fault == should_fault);
13606 	return retval;
13607 }
13608 
13609 static bool
13610 pmap_test_read(pmap_t pmap, vm_map_address_t va, bool should_fault)
13611 {
13612 	bool retval = pmap_test_access(pmap, va, should_fault, false);
13613 
13614 	if (!retval) {
13615 		T_FAIL("%s: %s, "
13616 		    "pmap=%p, va=%p, should_fault=%u",
13617 		    __func__, should_fault ? "did not fault" : "faulted",
13618 		    pmap, (void*)va, (unsigned)should_fault);
13619 	}
13620 
13621 	return retval;
13622 }
13623 
13624 static bool
13625 pmap_test_write(pmap_t pmap, vm_map_address_t va, bool should_fault)
13626 {
13627 	bool retval = pmap_test_access(pmap, va, should_fault, true);
13628 
13629 	if (!retval) {
13630 		T_FAIL("%s: %s, "
13631 		    "pmap=%p, va=%p, should_fault=%u",
13632 		    __func__, should_fault ? "did not fault" : "faulted",
13633 		    pmap, (void*)va, (unsigned)should_fault);
13634 	}
13635 
13636 	return retval;
13637 }
13638 
13639 static bool
13640 pmap_test_check_refmod(pmap_paddr_t pa, unsigned int should_be_set)
13641 {
13642 	unsigned int should_be_clear = (~should_be_set) & (VM_MEM_REFERENCED | VM_MEM_MODIFIED);
13643 	unsigned int bits = pmap_get_refmod((ppnum_t)atop(pa));
13644 
13645 	bool retval = (((bits & should_be_set) == should_be_set) && ((bits & should_be_clear) == 0));
13646 
13647 	if (!retval) {
13648 		T_FAIL("%s: bits=%u, "
13649 		    "pa=%p, should_be_set=%u",
13650 		    __func__, bits,
13651 		    (void*)pa, should_be_set);
13652 	}
13653 
13654 	return retval;
13655 }
13656 
13657 static __attribute__((noinline)) bool
13658 pmap_test_read_write(pmap_t pmap, vm_map_address_t va, bool allow_read, bool allow_write)
13659 {
13660 	bool retval = (pmap_test_read(pmap, va, !allow_read) | pmap_test_write(pmap, va, !allow_write));
13661 	return retval;
13662 }
13663 
13664 static int
13665 pmap_test_test_config(unsigned int flags)
13666 {
13667 	T_LOG("running pmap_test_test_config flags=0x%X", flags);
13668 	unsigned int map_count = 0;
13669 	unsigned long page_ratio = 0;
13670 	pmap_t pmap = pmap_create_options(NULL, 0, flags);
13671 
13672 	if (!pmap) {
13673 		panic("Failed to allocate pmap");
13674 	}
13675 
13676 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13677 	uintptr_t native_page_size = pt_attr_page_size(native_pt_attr);
13678 	uintptr_t pmap_page_size = pt_attr_page_size(pt_attr);
13679 	uintptr_t pmap_twig_size = pt_attr_twig_size(pt_attr);
13680 
13681 	if (pmap_page_size <= native_page_size) {
13682 		page_ratio = native_page_size / pmap_page_size;
13683 	} else {
13684 		/*
13685 		 * We claim to support a page_ratio of less than 1, which is
13686 		 * not currently supported by the pmap layer; panic.
13687 		 */
13688 		panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
13689 		    "flags=%u",
13690 		    __func__, native_page_size, pmap_page_size,
13691 		    flags);
13692 	}
13693 
13694 	if (PAGE_RATIO > 1) {
13695 		/*
13696 		 * The kernel is deliberately pretending to have 16KB pages.
13697 		 * The pmap layer has code that supports this, so pretend the
13698 		 * page size is larger than it is.
13699 		 */
13700 		pmap_page_size = PAGE_SIZE;
13701 		native_page_size = PAGE_SIZE;
13702 	}
13703 
13704 	/*
13705 	 * Get two pages from the VM; one to be mapped wired, and one to be
13706 	 * mapped nonwired.
13707 	 */
13708 	vm_page_t unwired_vm_page = vm_page_grab();
13709 	vm_page_t wired_vm_page = vm_page_grab();
13710 
13711 	if ((unwired_vm_page == VM_PAGE_NULL) || (wired_vm_page == VM_PAGE_NULL)) {
13712 		panic("Failed to grab VM pages");
13713 	}
13714 
13715 	ppnum_t pn = VM_PAGE_GET_PHYS_PAGE(unwired_vm_page);
13716 	ppnum_t wired_pn = VM_PAGE_GET_PHYS_PAGE(wired_vm_page);
13717 
13718 	pmap_paddr_t pa = ptoa(pn);
13719 	pmap_paddr_t wired_pa = ptoa(wired_pn);
13720 
13721 	/*
13722 	 * We'll start mappings at the second twig TT.  This keeps us from only
13723 	 * using the first entry in each TT, which would trivially be address
13724 	 * 0; one of the things we will need to test is retrieving the VA for
13725 	 * a given PTE.
13726 	 */
13727 	vm_map_address_t va_base = pmap_twig_size;
13728 	vm_map_address_t wired_va_base = ((2 * pmap_twig_size) - pmap_page_size);
13729 
13730 	if (wired_va_base < (va_base + (page_ratio * pmap_page_size))) {
13731 		/*
13732 		 * Not exactly a functional failure, but this test relies on
13733 		 * there being a spare PTE slot we can use to pin the TT.
13734 		 */
13735 		panic("Cannot pin translation table");
13736 	}
13737 
13738 	/*
13739 	 * Create the wired mapping; this will prevent the pmap layer from
13740 	 * reclaiming our test TTs, which would interfere with this test
13741 	 * ("interfere" -> "make it panic").
13742 	 */
13743 	pmap_enter_addr(pmap, wired_va_base, wired_pa, VM_PROT_READ, VM_PROT_READ, 0, true);
13744 
13745 #if XNU_MONITOR
13746 	/*
13747 	 * If the PPL is enabled, make sure that the kernel cannot write
13748 	 * to PPL memory.
13749 	 */
13750 	if (!pmap_ppl_disable) {
13751 		T_LOG("Validate that kernel cannot write to PPL memory.");
13752 		pt_entry_t * ptep = pmap_pte(pmap, va_base);
13753 		pmap_test_write(NULL, (vm_map_address_t)ptep, true);
13754 	}
13755 #endif
13756 
13757 	/*
13758 	 * Create read-only mappings of the nonwired page; if the pmap does
13759 	 * not use the same page size as the kernel, create multiple mappings
13760 	 * so that the kernel page is fully mapped.
13761 	 */
13762 	for (map_count = 0; map_count < page_ratio; map_count++) {
13763 		pmap_enter_addr(pmap, va_base + (pmap_page_size * map_count), pa + (pmap_page_size * (map_count)), VM_PROT_READ, VM_PROT_READ, 0, false);
13764 	}
13765 
13766 	/* Validate that all the PTEs have the expected PA and VA. */
13767 	for (map_count = 0; map_count < page_ratio; map_count++) {
13768 		pt_entry_t * ptep = pmap_pte(pmap, va_base + (pmap_page_size * map_count));
13769 
13770 		if (pte_to_pa(*ptep) != (pa + (pmap_page_size * map_count))) {
13771 			T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
13772 			    (void*)pte_to_pa(*ptep), (void*)(pa + (pmap_page_size * map_count)), map_count);
13773 		}
13774 
13775 		if (ptep_get_va(ptep) != (va_base + (pmap_page_size * map_count))) {
13776 			T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
13777 			    (void*)ptep_get_va(ptep), (void*)(va_base + (pmap_page_size * map_count)), map_count);
13778 		}
13779 	}
13780 
13781 	T_LOG("Validate that reads to our mapping do not fault.");
13782 	pmap_test_read(pmap, va_base, false);
13783 
13784 	T_LOG("Validate that writes to our mapping fault.");
13785 	pmap_test_write(pmap, va_base, true);
13786 
13787 	T_LOG("Make the first mapping writable.");
13788 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
13789 
13790 	T_LOG("Validate that writes to our mapping do not fault.");
13791 	pmap_test_write(pmap, va_base, false);
13792 
13793 
13794 	T_LOG("Make the first mapping execute-only");
13795 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_EXECUTE, VM_PROT_EXECUTE, 0, false);
13796 
13797 
13798 	T_LOG("Validate that reads to our mapping do not fault.");
13799 	pmap_test_read(pmap, va_base, false);
13800 
13801 	T_LOG("Validate that writes to our mapping fault.");
13802 	pmap_test_write(pmap, va_base, true);
13803 
13804 
13805 	/*
13806 	 * For page ratios of greater than 1: validate that writes to the other
13807 	 * mappings still fault.  Remove the mappings afterwards (we're done
13808 	 * with page ratio testing).
13809 	 */
13810 	for (map_count = 1; map_count < page_ratio; map_count++) {
13811 		pmap_test_write(pmap, va_base + (pmap_page_size * map_count), true);
13812 		pmap_remove(pmap, va_base + (pmap_page_size * map_count), va_base + (pmap_page_size * map_count) + pmap_page_size);
13813 	}
13814 
13815 	T_LOG("Mark the page unreferenced and unmodified.");
13816 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13817 	pmap_test_check_refmod(pa, 0);
13818 
13819 	/*
13820 	 * Begin testing the ref/mod state machine.  Re-enter the mapping with
13821 	 * different protection/fault_type settings, and confirm that the
13822 	 * ref/mod state matches our expectations at each step.
13823 	 */
13824 	T_LOG("!ref/!mod: read, no fault.  Expect ref/!mod");
13825 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_NONE, 0, false);
13826 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
13827 
13828 	T_LOG("!ref/!mod: read, read fault.  Expect ref/!mod");
13829 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13830 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
13831 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
13832 
13833 	T_LOG("!ref/!mod: rw, read fault.  Expect ref/!mod");
13834 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13835 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, false);
13836 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
13837 
13838 	T_LOG("ref/!mod: rw, read fault.  Expect ref/!mod");
13839 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ, 0, false);
13840 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
13841 
13842 	T_LOG("!ref/!mod: rw, rw fault.  Expect ref/mod");
13843 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13844 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
13845 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
13846 
13847 	/*
13848 	 * Shared memory testing; we'll have two mappings; one read-only,
13849 	 * one read-write.
13850 	 */
13851 	vm_map_address_t rw_base = va_base;
13852 	vm_map_address_t ro_base = va_base + pmap_page_size;
13853 
13854 	pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
13855 	pmap_enter_addr(pmap, ro_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
13856 
13857 	/*
13858 	 * Test that we take faults as expected for unreferenced/unmodified
13859 	 * pages.  Also test the arm_fast_fault interface, to ensure that
13860 	 * mapping permissions change as expected.
13861 	 */
13862 	T_LOG("!ref/!mod: expect no access");
13863 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13864 	pmap_test_read_write(pmap, ro_base, false, false);
13865 	pmap_test_read_write(pmap, rw_base, false, false);
13866 
13867 	T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
13868 	arm_fast_fault(pmap, rw_base, VM_PROT_READ, false, false);
13869 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
13870 	pmap_test_read_write(pmap, ro_base, true, false);
13871 	pmap_test_read_write(pmap, rw_base, true, false);
13872 
13873 	T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
13874 	arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
13875 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
13876 	pmap_test_read_write(pmap, ro_base, true, false);
13877 	pmap_test_read_write(pmap, rw_base, true, true);
13878 
13879 	T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
13880 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
13881 	arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
13882 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
13883 	pmap_test_read_write(pmap, ro_base, true, false);
13884 	pmap_test_read_write(pmap, rw_base, true, true);
13885 
13886 	T_LOG("RW protect both mappings; should not change protections.");
13887 	pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
13888 	pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
13889 	pmap_test_read_write(pmap, ro_base, true, false);
13890 	pmap_test_read_write(pmap, rw_base, true, true);
13891 
13892 	T_LOG("Read protect both mappings; RW mapping should become RO.");
13893 	pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ);
13894 	pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ);
13895 	pmap_test_read_write(pmap, ro_base, true, false);
13896 	pmap_test_read_write(pmap, rw_base, true, false);
13897 
13898 	T_LOG("RW protect the page; mappings should not change protections.");
13899 	pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
13900 	pmap_page_protect(pn, VM_PROT_ALL);
13901 	pmap_test_read_write(pmap, ro_base, true, false);
13902 	pmap_test_read_write(pmap, rw_base, true, true);
13903 
13904 	T_LOG("Read protect the page; RW mapping should become RO.");
13905 	pmap_page_protect(pn, VM_PROT_READ);
13906 	pmap_test_read_write(pmap, ro_base, true, false);
13907 	pmap_test_read_write(pmap, rw_base, true, false);
13908 
13909 	T_LOG("Validate that disconnect removes all known mappings of the page.");
13910 	pmap_disconnect(pn);
13911 	if (!pmap_verify_free(pn)) {
13912 		T_FAIL("Page still has mappings");
13913 	}
13914 
13915 	T_LOG("Remove the wired mapping, so we can tear down the test map.");
13916 	pmap_remove(pmap, wired_va_base, wired_va_base + pmap_page_size);
13917 	pmap_destroy(pmap);
13918 
13919 	T_LOG("Release the pages back to the VM.");
13920 	vm_page_lock_queues();
13921 	vm_page_free(unwired_vm_page);
13922 	vm_page_free(wired_vm_page);
13923 	vm_page_unlock_queues();
13924 
13925 	T_LOG("Testing successful!");
13926 	return 0;
13927 }
13928 #endif /* __arm64__ */
13929 
13930 kern_return_t
13931 pmap_test(void)
13932 {
13933 	T_LOG("Starting pmap_tests");
13934 #ifdef __arm64__
13935 	int flags = 0;
13936 	flags |= PMAP_CREATE_64BIT;
13937 
13938 #if __ARM_MIXED_PAGE_SIZE__
13939 	T_LOG("Testing VM_PAGE_SIZE_4KB");
13940 	pmap_test_test_config(flags | PMAP_CREATE_FORCE_4K_PAGES);
13941 	T_LOG("Testing VM_PAGE_SIZE_16KB");
13942 	pmap_test_test_config(flags);
13943 #else /* __ARM_MIXED_PAGE_SIZE__ */
13944 	pmap_test_test_config(flags);
13945 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13946 
13947 #endif /* __arm64__ */
13948 	T_PASS("completed pmap_test successfully");
13949 	return KERN_SUCCESS;
13950 }
13951 #endif /* CONFIG_XNUPOST */
13952 
13953 /*
13954  * The following function should never make it to RELEASE code, since
13955  * it provides a way to get the PPL to modify text pages.
13956  */
13957 #if DEVELOPMENT || DEBUG
13958 
13959 #define ARM_UNDEFINED_INSN 0xe7f000f0
13960 #define ARM_UNDEFINED_INSN_THUMB 0xde00
13961 
13962 /**
13963  * Forcibly overwrite executable text with an illegal instruction.
13964  *
13965  * @note Only used for xnu unit testing.
13966  *
13967  * @param pa The physical address to corrupt.
13968  *
13969  * @return KERN_SUCCESS on success.
13970  */
13971 kern_return_t
13972 pmap_test_text_corruption(pmap_paddr_t pa)
13973 {
13974 #if XNU_MONITOR
13975 	return pmap_test_text_corruption_ppl(pa);
13976 #else /* XNU_MONITOR */
13977 	return pmap_test_text_corruption_internal(pa);
13978 #endif /* XNU_MONITOR */
13979 }
13980 
13981 MARK_AS_PMAP_TEXT kern_return_t
13982 pmap_test_text_corruption_internal(pmap_paddr_t pa)
13983 {
13984 	vm_offset_t va = phystokv(pa);
13985 	unsigned int pai = pa_index(pa);
13986 
13987 	assert(pa_valid(pa));
13988 
13989 	pvh_lock(pai);
13990 
13991 	pv_entry_t **pv_h  = pai_to_pvh(pai);
13992 	assert(!pvh_test_type(pv_h, PVH_TYPE_NULL));
13993 #if defined(PVH_FLAG_EXEC)
13994 	const bool need_ap_twiddle = pvh_get_flags(pv_h) & PVH_FLAG_EXEC;
13995 
13996 	if (need_ap_twiddle) {
13997 		pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
13998 	}
13999 #endif /* defined(PVH_FLAG_EXEC) */
14000 
14001 	/*
14002 	 * The low bit in an instruction address indicates a THUMB instruction
14003 	 */
14004 	if (va & 1) {
14005 		va &= ~(vm_offset_t)1;
14006 		*(uint16_t *)va = ARM_UNDEFINED_INSN_THUMB;
14007 	} else {
14008 		*(uint32_t *)va = ARM_UNDEFINED_INSN;
14009 	}
14010 
14011 #if defined(PVH_FLAG_EXEC)
14012 	if (need_ap_twiddle) {
14013 		pmap_set_ptov_ap(pai, AP_RONA, FALSE);
14014 	}
14015 #endif /* defined(PVH_FLAG_EXEC) */
14016 
14017 	InvalidatePoU_IcacheRegion(va, sizeof(uint32_t));
14018 
14019 	pvh_unlock(pai);
14020 
14021 	return KERN_SUCCESS;
14022 }
14023 
14024 #endif /* DEVELOPMENT || DEBUG */
14025