xref: /xnu-8019.80.24/osfmk/arm/pmap.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31 
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38 
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/zalloc_internal.h>
45 #include <kern/kalloc.h>
46 #include <kern/spl.h>
47 #include <kern/startup.h>
48 #include <kern/trustcache.h>
49 
50 #include <os/overflow.h>
51 
52 #include <vm/pmap.h>
53 #include <vm/pmap_cs.h>
54 #include <vm/vm_map.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/cpm.h>
61 
62 #include <libkern/img4/interface.h>
63 #include <libkern/section_keywords.h>
64 #include <sys/errno.h>
65 
66 #include <machine/atomic.h>
67 #include <machine/thread.h>
68 #include <machine/lowglobals.h>
69 
70 #include <arm/caches_internal.h>
71 #include <arm/cpu_data.h>
72 #include <arm/cpu_data_internal.h>
73 #include <arm/cpu_capabilities.h>
74 #include <arm/cpu_number.h>
75 #include <arm/machine_cpu.h>
76 #include <arm/misc_protos.h>
77 #include <arm/pmap/pmap_internal.h>
78 #include <arm/trap.h>
79 
80 #if     (__ARM_VMSA__ > 7)
81 #include <arm64/proc_reg.h>
82 #include <pexpert/arm64/boot.h>
83 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
84 #include <arm64/amcc_rorgn.h>
85 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
86 #endif
87 
88 #include <pexpert/device_tree.h>
89 
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
92 
93 #if defined(HAS_APPLE_PAC)
94 #include <ptrauth.h>
95 #endif
96 
97 #ifdef CONFIG_XNUPOST
98 #include <tests/xnupost.h>
99 #endif
100 
101 
102 #if HIBERNATION
103 #include <IOKit/IOHibernatePrivate.h>
104 #endif /* HIBERNATION */
105 
106 #ifdef __ARM64_PMAP_SUBPAGE_L1__
107 #if (__ARM_VMSA__ <= 7)
108 #error This is not supported for old-style page tables
109 #endif
110 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
111 #else
112 #if (__ARM_VMSA__ <= 7)
113 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
114 #else
115 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
116 #endif
117 #endif
118 
119 extern u_int32_t random(void); /* from <libkern/libkern.h> */
120 
121 static bool alloc_asid(pmap_t pmap);
122 static void free_asid(pmap_t pmap);
123 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap, bool last_level_only);
124 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
125 static pt_entry_t wimg_to_pte(unsigned int wimg, pmap_paddr_t pa);
126 
127 static const struct page_table_ops native_pt_ops =
128 {
129 	.alloc_id = alloc_asid,
130 	.free_id = free_asid,
131 	.flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
132 	.flush_tlb_async = flush_mmu_tlb_full_asid_async,
133 	.wimg_to_pte = wimg_to_pte,
134 };
135 
136 #if (__ARM_VMSA__ > 7)
137 const struct page_table_level_info pmap_table_level_info_16k[] =
138 {
139 	[0] = {
140 		.size       = ARM_16K_TT_L0_SIZE,
141 		.offmask    = ARM_16K_TT_L0_OFFMASK,
142 		.shift      = ARM_16K_TT_L0_SHIFT,
143 		.index_mask = ARM_16K_TT_L0_INDEX_MASK,
144 		.valid_mask = ARM_TTE_VALID,
145 		.type_mask  = ARM_TTE_TYPE_MASK,
146 		.type_block = ARM_TTE_TYPE_BLOCK
147 	},
148 	[1] = {
149 		.size       = ARM_16K_TT_L1_SIZE,
150 		.offmask    = ARM_16K_TT_L1_OFFMASK,
151 		.shift      = ARM_16K_TT_L1_SHIFT,
152 		.index_mask = ARM_16K_TT_L1_INDEX_MASK,
153 		.valid_mask = ARM_TTE_VALID,
154 		.type_mask  = ARM_TTE_TYPE_MASK,
155 		.type_block = ARM_TTE_TYPE_BLOCK
156 	},
157 	[2] = {
158 		.size       = ARM_16K_TT_L2_SIZE,
159 		.offmask    = ARM_16K_TT_L2_OFFMASK,
160 		.shift      = ARM_16K_TT_L2_SHIFT,
161 		.index_mask = ARM_16K_TT_L2_INDEX_MASK,
162 		.valid_mask = ARM_TTE_VALID,
163 		.type_mask  = ARM_TTE_TYPE_MASK,
164 		.type_block = ARM_TTE_TYPE_BLOCK
165 	},
166 	[3] = {
167 		.size       = ARM_16K_TT_L3_SIZE,
168 		.offmask    = ARM_16K_TT_L3_OFFMASK,
169 		.shift      = ARM_16K_TT_L3_SHIFT,
170 		.index_mask = ARM_16K_TT_L3_INDEX_MASK,
171 		.valid_mask = ARM_PTE_TYPE_VALID,
172 		.type_mask  = ARM_PTE_TYPE_MASK,
173 		.type_block = ARM_TTE_TYPE_L3BLOCK
174 	}
175 };
176 
177 const struct page_table_level_info pmap_table_level_info_4k[] =
178 {
179 	[0] = {
180 		.size       = ARM_4K_TT_L0_SIZE,
181 		.offmask    = ARM_4K_TT_L0_OFFMASK,
182 		.shift      = ARM_4K_TT_L0_SHIFT,
183 		.index_mask = ARM_4K_TT_L0_INDEX_MASK,
184 		.valid_mask = ARM_TTE_VALID,
185 		.type_mask  = ARM_TTE_TYPE_MASK,
186 		.type_block = ARM_TTE_TYPE_BLOCK
187 	},
188 	[1] = {
189 		.size       = ARM_4K_TT_L1_SIZE,
190 		.offmask    = ARM_4K_TT_L1_OFFMASK,
191 		.shift      = ARM_4K_TT_L1_SHIFT,
192 		.index_mask = ARM_4K_TT_L1_INDEX_MASK,
193 		.valid_mask = ARM_TTE_VALID,
194 		.type_mask  = ARM_TTE_TYPE_MASK,
195 		.type_block = ARM_TTE_TYPE_BLOCK
196 	},
197 	[2] = {
198 		.size       = ARM_4K_TT_L2_SIZE,
199 		.offmask    = ARM_4K_TT_L2_OFFMASK,
200 		.shift      = ARM_4K_TT_L2_SHIFT,
201 		.index_mask = ARM_4K_TT_L2_INDEX_MASK,
202 		.valid_mask = ARM_TTE_VALID,
203 		.type_mask  = ARM_TTE_TYPE_MASK,
204 		.type_block = ARM_TTE_TYPE_BLOCK
205 	},
206 	[3] = {
207 		.size       = ARM_4K_TT_L3_SIZE,
208 		.offmask    = ARM_4K_TT_L3_OFFMASK,
209 		.shift      = ARM_4K_TT_L3_SHIFT,
210 		.index_mask = ARM_4K_TT_L3_INDEX_MASK,
211 		.valid_mask = ARM_PTE_TYPE_VALID,
212 		.type_mask  = ARM_PTE_TYPE_MASK,
213 		.type_block = ARM_TTE_TYPE_L3BLOCK
214 	}
215 };
216 
217 const struct page_table_attr pmap_pt_attr_4k = {
218 	.pta_level_info = pmap_table_level_info_4k,
219 	.pta_root_level = (T0SZ_BOOT - 16) / 9,
220 #if __ARM_MIXED_PAGE_SIZE__
221 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
222 #else /* __ARM_MIXED_PAGE_SIZE__ */
223 #if __ARM_16K_PG__
224 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
225 #else /* __ARM_16K_PG__ */
226 	.pta_commpage_level = PMAP_TT_L1_LEVEL,
227 #endif /* __ARM_16K_PG__ */
228 #endif /* __ARM_MIXED_PAGE_SIZE__ */
229 	.pta_max_level  = PMAP_TT_L3_LEVEL,
230 	.pta_ops = &native_pt_ops,
231 	.ap_ro = ARM_PTE_AP(AP_RORO),
232 	.ap_rw = ARM_PTE_AP(AP_RWRW),
233 	.ap_rona = ARM_PTE_AP(AP_RONA),
234 	.ap_rwna = ARM_PTE_AP(AP_RWNA),
235 	.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
236 	.ap_x = ARM_PTE_PNX,
237 #if __ARM_MIXED_PAGE_SIZE__
238 	.pta_tcr_value  = TCR_EL1_4KB,
239 #endif /* __ARM_MIXED_PAGE_SIZE__ */
240 	.pta_page_size  = 4096,
241 	.pta_page_shift = 12,
242 };
243 
244 const struct page_table_attr pmap_pt_attr_16k = {
245 	.pta_level_info = pmap_table_level_info_16k,
246 	.pta_root_level = PMAP_TT_L1_LEVEL,
247 	.pta_commpage_level = PMAP_TT_L2_LEVEL,
248 	.pta_max_level  = PMAP_TT_L3_LEVEL,
249 	.pta_ops = &native_pt_ops,
250 	.ap_ro = ARM_PTE_AP(AP_RORO),
251 	.ap_rw = ARM_PTE_AP(AP_RWRW),
252 	.ap_rona = ARM_PTE_AP(AP_RONA),
253 	.ap_rwna = ARM_PTE_AP(AP_RWNA),
254 	.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
255 	.ap_x = ARM_PTE_PNX,
256 #if __ARM_MIXED_PAGE_SIZE__
257 	.pta_tcr_value  = TCR_EL1_16KB,
258 #endif /* __ARM_MIXED_PAGE_SIZE__ */
259 	.pta_page_size  = 16384,
260 	.pta_page_shift = 14,
261 };
262 
263 #if __ARM_16K_PG__
264 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
265 #else /* !__ARM_16K_PG__ */
266 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
267 #endif /* !__ARM_16K_PG__ */
268 
269 
270 #else /* (__ARM_VMSA__ > 7) */
271 /*
272  * We don't support pmap parameterization for VMSA7, so use an opaque
273  * page_table_attr structure.
274  */
275 const struct page_table_attr * const native_pt_attr = NULL;
276 #endif /* (__ARM_VMSA__ > 7) */
277 
278 
279 static inline void
pmap_sync_tlb(bool strong __unused)280 pmap_sync_tlb(bool strong __unused)
281 {
282 	sync_tlb_flush();
283 }
284 
285 #if MACH_ASSERT
286 int vm_footprint_suspend_allowed = 1;
287 
288 extern int pmap_ledgers_panic;
289 extern int pmap_ledgers_panic_leeway;
290 
291 #endif /* MACH_ASSERT */
292 
293 #if DEVELOPMENT || DEBUG
294 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
295 	(current_thread()->pmap_footprint_suspended)
296 #else /* DEVELOPMENT || DEBUG */
297 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
298 #endif /* DEVELOPMENT || DEBUG */
299 
300 
301 #ifdef PLATFORM_BridgeOS
302 static struct pmap_legacy_trust_cache *pmap_legacy_trust_caches MARK_AS_PMAP_DATA = NULL;
303 #endif
304 static struct pmap_image4_trust_cache *pmap_image4_trust_caches MARK_AS_PMAP_DATA = NULL;
305 
306 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_loaded_trust_caches_lock, 0);
307 
308 SECURITY_READ_ONLY_LATE(int) srd_fused = 0;
309 
310 /*
311  * Represents a tlb range that will be flushed before exiting
312  * the ppl.
313  * Used by phys_attribute_clear_range to defer flushing pages in
314  * this range until the end of the operation.
315  */
316 typedef struct pmap_tlb_flush_range {
317 	pmap_t ptfr_pmap;
318 	vm_map_address_t ptfr_start;
319 	vm_map_address_t ptfr_end;
320 	bool ptfr_flush_needed;
321 } pmap_tlb_flush_range_t;
322 
323 #if XNU_MONITOR
324 /*
325  * PPL External References.
326  */
327 extern vm_offset_t   segPPLDATAB;
328 extern unsigned long segSizePPLDATA;
329 extern vm_offset_t   segPPLTEXTB;
330 extern unsigned long segSizePPLTEXT;
331 extern vm_offset_t   segPPLDATACONSTB;
332 extern unsigned long segSizePPLDATACONST;
333 
334 
335 /*
336  * PPL Global Variables
337  */
338 
339 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
340 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
341 SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
342 #else
343 const boolean_t pmap_ppl_disable = FALSE;
344 #endif
345 
346 /* Indicates if the PPL has started applying APRR. */
347 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
348 
349 extern void *pmap_stacks_start;
350 extern void *pmap_stacks_end;
351 
352 #endif /* !XNU_MONITOR */
353 
354 
355 /* Virtual memory region for early allocation */
356 #if     (__ARM_VMSA__ == 7)
357 #define VREGION1_HIGH_WINDOW    (0)
358 #else
359 #define VREGION1_HIGH_WINDOW    (PE_EARLY_BOOT_VA)
360 #endif
361 #define VREGION1_START          ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
362 #define VREGION1_SIZE           (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
363 
364 extern uint8_t bootstrap_pagetables[];
365 
366 extern unsigned int not_in_kdp;
367 
368 extern vm_offset_t first_avail;
369 
370 extern vm_offset_t     virtual_space_start;     /* Next available kernel VA */
371 extern vm_offset_t     virtual_space_end;       /* End of kernel address space */
372 extern vm_offset_t     static_memory_end;
373 
374 extern const vm_map_address_t physmap_base;
375 extern const vm_map_address_t physmap_end;
376 
377 extern int maxproc, hard_maxproc;
378 
379 vm_address_t MARK_AS_PMAP_DATA image4_slab = 0;
380 vm_address_t MARK_AS_PMAP_DATA image4_late_slab = 0;
381 
382 #if (__ARM_VMSA__ > 7)
383 /* The number of address bits one TTBR can cover. */
384 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
385 
386 /*
387  * The bounds on our TTBRs.  These are for sanity checking that
388  * an address is accessible by a TTBR before we attempt to map it.
389  */
390 
391 /* The level of the root of a page table. */
392 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
393 
394 /* The number of entries in the root TT of a page table. */
395 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
396 #else
397 const uint64_t arm64_root_pgtable_level = 0;
398 const uint64_t arm64_root_pgtable_num_ttes = 0;
399 #endif
400 
401 struct pmap                     kernel_pmap_store MARK_AS_PMAP_DATA;
402 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
403 
404 static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone;  /* zone of pmap structures */
405 
406 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
407 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
408 unsigned int    pmap_stamp MARK_AS_PMAP_DATA;
409 queue_head_t    map_pmap_list MARK_AS_PMAP_DATA;
410 
411 typedef struct tt_free_entry {
412 	struct tt_free_entry    *next;
413 } tt_free_entry_t;
414 
415 #define TT_FREE_ENTRY_NULL      ((tt_free_entry_t *) 0)
416 
417 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
418 unsigned int    free_page_size_tt_count MARK_AS_PMAP_DATA;
419 unsigned int    free_page_size_tt_max MARK_AS_PMAP_DATA;
420 #define FREE_PAGE_SIZE_TT_MAX   4
421 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
422 unsigned int    free_two_page_size_tt_count MARK_AS_PMAP_DATA;
423 unsigned int    free_two_page_size_tt_max MARK_AS_PMAP_DATA;
424 #define FREE_TWO_PAGE_SIZE_TT_MAX       4
425 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
426 unsigned int    free_tt_count MARK_AS_PMAP_DATA;
427 unsigned int    free_tt_max MARK_AS_PMAP_DATA;
428 
429 #define TT_FREE_ENTRY_NULL      ((tt_free_entry_t *) 0)
430 
431 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
432 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
433 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
434 
435 unsigned int    inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0;        /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
436 unsigned int    inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0;        /* leaf user pagetable pages, in units of PAGE_SIZE */
437 unsigned int    inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0;  /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
438 unsigned int    inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
439 unsigned int    inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
440 unsigned int    inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
441 
442 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte  = 0;
443 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
444 
445 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte  = 0;                     /* set by arm_vm_init() - keep out of bss */
446 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0;                     /* set by arm_vm_init() - phys tte addr */
447 
448 /* Lock group used for all pmap object locks. */
449 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
450 
451 #if DEVELOPMENT || DEBUG
452 int nx_enabled = 1;                                     /* enable no-execute protection */
453 int allow_data_exec  = 0;                               /* No apps may execute data */
454 int allow_stack_exec = 0;                               /* No apps may execute from the stack */
455 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
456 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
457 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
458 #else /* DEVELOPMENT || DEBUG */
459 const int nx_enabled = 1;                                       /* enable no-execute protection */
460 const int allow_data_exec  = 0;                         /* No apps may execute data */
461 const int allow_stack_exec = 0;                         /* No apps may execute from the stack */
462 #endif /* DEVELOPMENT || DEBUG */
463 
464 /**
465  * This variable is set true during hibernation entry to protect pmap data structures
466  * during image copying, and reset false on hibernation exit.
467  */
468 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
469 
470 #if MACH_ASSERT
471 static void pmap_check_ledgers(pmap_t pmap);
472 #else
473 static inline void
pmap_check_ledgers(__unused pmap_t pmap)474 pmap_check_ledgers(__unused pmap_t pmap)
475 {
476 }
477 #endif /* MACH_ASSERT */
478 
479 /**
480  * This helper function ensures that potentially-long-running batched PPL operations are
481  * called in preemptible context before entering the PPL, so that the PPL call may
482  * periodically exit to allow pending urgent ASTs to be taken.
483  */
484 static inline void
pmap_verify_preemptible(void)485 pmap_verify_preemptible(void)
486 {
487 	assert(preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT));
488 }
489 
490 SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
491 
492 SECURITY_READ_ONLY_LATE(pmap_paddr_t)   vm_first_phys = (pmap_paddr_t) 0;
493 SECURITY_READ_ONLY_LATE(pmap_paddr_t)   vm_last_phys = (pmap_paddr_t) 0;
494 
495 SECURITY_READ_ONLY_LATE(boolean_t)      pmap_initialized = FALSE;       /* Has pmap_init completed? */
496 
497 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default  = 0x0;
498 #if defined(__arm64__)
499 #  ifdef XNU_TARGET_OS_OSX
500 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = MACH_VM_MAX_ADDRESS;
501 #  else
502 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
503 #  endif
504 #endif /* __arm64__ */
505 
506 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
507 SECURITY_READ_ONLY_LATE(boolean_t)   pmap_panic_dev_wimg_on_managed = TRUE;
508 #else
509 SECURITY_READ_ONLY_LATE(boolean_t)   pmap_panic_dev_wimg_on_managed = FALSE;
510 #endif
511 
512 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
513 SECURITY_READ_ONLY_LATE(uint32_t) pmap_max_asids = 0;
514 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
515 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
516 SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
517 static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
518 static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
519 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
520 
521 
522 #if (__ARM_VMSA__ > 7)
523 #if __ARM_MIXED_PAGE_SIZE__
524 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_4k;
525 #endif
526 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_default;
527 #endif
528 
529 /* PTE Define Macros */
530 
531 #define pte_is_wired(pte)                                                               \
532 	(((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
533 
534 #define pte_was_writeable(pte) \
535 	(((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
536 
537 #define pte_set_was_writeable(pte, was_writeable) \
538 	do {                                         \
539 	        if ((was_writeable)) {               \
540 	                (pte) |= ARM_PTE_WRITEABLE;  \
541 	        } else {                             \
542 	                (pte) &= ~ARM_PTE_WRITEABLE; \
543 	        }                                    \
544 	} while(0)
545 
546 static inline void
pte_set_wired(pmap_t pmap,pt_entry_t * ptep,boolean_t wired)547 pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
548 {
549 	if (wired) {
550 		*ptep |= ARM_PTE_WIRED;
551 	} else {
552 		*ptep &= ~ARM_PTE_WIRED;
553 	}
554 	/*
555 	 * Do not track wired page count for kernel pagetable pages.  Kernel mappings are
556 	 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
557 	 * never reclaimed.
558 	 */
559 	if (pmap == kernel_pmap) {
560 		return;
561 	}
562 	unsigned short *ptd_wiredcnt_ptr;
563 	ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
564 	if (wired) {
565 		os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
566 	} else {
567 		unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
568 		if (__improbable(prev_wired == 0)) {
569 			panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
570 		}
571 	}
572 }
573 
574 #define PMAP_UPDATE_TLBS(pmap, s, e, strong, last_level_only) {                                       \
575 	pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap, last_level_only); \
576 	pmap_sync_tlb(strong);                                                                        \
577 }
578 
579 /*
580  * Synchronize updates to PTEs that were previously invalid or had the AF bit cleared,
581  * therefore not requiring TLBI.  Use a store-load barrier to ensure subsequent loads
582  * will observe the updated PTE.
583  */
584 #define FLUSH_PTE()                                                                     \
585 	__builtin_arm_dmb(DMB_ISH);
586 
587 /*
588  * Synchronize updates to PTEs that were previously valid and thus may be cached in
589  * TLBs.  DSB is required to ensure the PTE stores have completed prior to the ensuing
590  * TLBI.  This should only require a store-store barrier, as subsequent accesses in
591  * program order will not issue until the DSB completes.  Prior loads may be reordered
592  * after the barrier, but their behavior should not be materially affected by the
593  * reordering.  For fault-driven PTE updates such as COW, PTE contents should not
594  * matter for loads until the access is re-driven well after the TLB update is
595  * synchronized.   For "involuntary" PTE access restriction due to paging lifecycle,
596  * we should be in a position to handle access faults.  For "voluntary" PTE access
597  * restriction due to unmapping or protection, the decision to restrict access should
598  * have a data dependency on prior loads in order to avoid a data race.
599  */
600 #define FLUSH_PTE_STRONG()                                                             \
601 	__builtin_arm_dsb(DSB_ISHST);
602 
603 /**
604  * Write enough page table entries to map a single VM page. On systems where the
605  * VM page size does not match the hardware page size, multiple page table
606  * entries will need to be written.
607  *
608  * @note This function does not emit a barrier to ensure these page table writes
609  *       have completed before continuing. This is commonly needed. In the case
610  *       where a DMB or DSB barrier is needed, then use the write_pte() and
611  *       write_pte_strong() functions respectively instead of this one.
612  *
613  * @param ptep Pointer to the first page table entry to update.
614  * @param pte The value to write into each page table entry. In the case that
615  *            multiple PTEs are updated to a non-empty value, then the address
616  *            in this value will automatically be incremented for each PTE
617  *            write.
618  */
619 static void
write_pte_fast(pt_entry_t * ptep,pt_entry_t pte)620 write_pte_fast(pt_entry_t *ptep, pt_entry_t pte)
621 {
622 	/**
623 	 * The PAGE_SHIFT (and in turn, the PAGE_RATIO) can be a variable on some
624 	 * systems, which is why it's checked at runtime instead of compile time.
625 	 * The "unreachable" warning needs to be suppressed because it still is a
626 	 * compile time constant on some systems.
627 	 */
628 	__unreachable_ok_push
629 	if (TEST_PAGE_RATIO_4) {
630 		if (((uintptr_t)ptep) & 0x1f) {
631 			panic("%s: PTE write is unaligned, ptep=%p, pte=%p",
632 			    __func__, ptep, (void*)pte);
633 		}
634 
635 		if ((pte & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) {
636 			/**
637 			 * If we're writing an empty/compressed PTE value, then don't
638 			 * auto-increment the address for each PTE write.
639 			 */
640 			*ptep = pte;
641 			*(ptep + 1) = pte;
642 			*(ptep + 2) = pte;
643 			*(ptep + 3) = pte;
644 		} else {
645 			*ptep = pte;
646 			*(ptep + 1) = pte | 0x1000;
647 			*(ptep + 2) = pte | 0x2000;
648 			*(ptep + 3) = pte | 0x3000;
649 		}
650 	} else {
651 		*ptep = pte;
652 	}
653 	__unreachable_ok_pop
654 }
655 
656 /**
657  * Writes enough page table entries to map a single VM page and then ensures
658  * those writes complete by executing a Data Memory Barrier.
659  *
660  * @note The DMB issued by this function is not strong enough to protect against
661  *       TLB invalidates from being reordered above the PTE writes. If a TLBI
662  *       instruction is going to immediately be called after this write, it's
663  *       recommended to call write_pte_strong() instead of this function.
664  *
665  * See the function header for write_pte_fast() for more details on the
666  * parameters.
667  */
668 void
write_pte(pt_entry_t * ptep,pt_entry_t pte)669 write_pte(pt_entry_t *ptep, pt_entry_t pte)
670 {
671 	write_pte_fast(ptep, pte);
672 	FLUSH_PTE();
673 }
674 
675 /**
676  * Writes enough page table entries to map a single VM page and then ensures
677  * those writes complete by executing a Data Synchronization Barrier. This
678  * barrier provides stronger guarantees than the DMB executed by write_pte().
679  *
680  * @note This function is useful if you're going to immediately flush the TLB
681  *       after making the PTE write. A DSB is required to protect against the
682  *       TLB invalidate being reordered before the PTE write.
683  *
684  * See the function header for write_pte_fast() for more details on the
685  * parameters.
686  */
687 static void
write_pte_strong(pt_entry_t * ptep,pt_entry_t pte)688 write_pte_strong(pt_entry_t *ptep, pt_entry_t pte)
689 {
690 	write_pte_fast(ptep, pte);
691 	FLUSH_PTE_STRONG();
692 }
693 
694 /**
695  * Retrieve the pmap structure for the thread running on the current CPU.
696  */
697 pmap_t
current_pmap()698 current_pmap()
699 {
700 	const pmap_t current = vm_map_pmap(current_thread()->map);
701 
702 	assert(current != NULL);
703 
704 #if XNU_MONITOR
705 	/**
706 	 * On PPL-enabled systems, it's important that PPL policy decisions aren't
707 	 * decided by kernel-writable memory. This function is used in various parts
708 	 * of the PPL, and besides validating that the pointer returned by this
709 	 * function is indeed a pmap structure, it's also important to ensure that
710 	 * it's actually the current thread's pmap. This is because different pmaps
711 	 * will have access to different entitlements based on the code signature of
712 	 * their loaded process. So if a different user pmap is set in the current
713 	 * thread structure (in an effort to bypass code signing restrictions), even
714 	 * though the structure would validate correctly as it is a real pmap
715 	 * structure, it should fail here.
716 	 *
717 	 * This only needs to occur for user pmaps because the kernel pmap's root
718 	 * page table is always the same as TTBR1 (it's set during bootstrap and not
719 	 * changed so it'd be redundant to check), and its code signing fields are
720 	 * always set to NULL. The PMAP CS logic won't operate on the kernel pmap so
721 	 * it shouldn't be possible to set those fields. Due to that, an attacker
722 	 * setting the current thread's pmap to the kernel pmap as a way to bypass
723 	 * this check won't accomplish anything as it doesn't provide any extra code
724 	 * signing entitlements.
725 	 */
726 	if ((current != kernel_pmap) &&
727 	    ((get_mmu_ttb() & TTBR_BADDR_MASK) != (current->ttep))) {
728 		panic_plain("%s: Current thread's pmap doesn't match up with TTBR0 "
729 		    "%#llx %#llx", __func__, get_mmu_ttb(), current->ttep);
730 	}
731 #endif /* XNU_MONITOR */
732 
733 	return current;
734 }
735 
736 #if DEVELOPMENT || DEBUG
737 
738 /*
739  * Trace levels are controlled by a bitmask in which each
740  * level can be enabled/disabled by the (1<<level) position
741  * in the boot arg
742  * Level 0: PPL extension functionality
743  * Level 1: pmap lifecycle (create/destroy/switch)
744  * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
745  * Level 3: internal state management (attributes/fast-fault)
746  * Level 4-7: TTE traces for paging levels 0-3.  TTBs are traced at level 4.
747  */
748 
749 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
750 
751 #define PMAP_TRACE(level, ...) \
752 	if (__improbable((1 << (level)) & pmap_trace_mask)) { \
753 	        KDBG_RELEASE(__VA_ARGS__); \
754 	}
755 #else /* DEVELOPMENT || DEBUG */
756 
757 #define PMAP_TRACE(level, ...)
758 
759 #endif /* DEVELOPMENT || DEBUG */
760 
761 
762 /*
763  * Internal function prototypes (forward declarations).
764  */
765 
766 static vm_map_size_t pmap_user_va_size(pmap_t pmap);
767 
768 static void pmap_set_reference(ppnum_t pn);
769 
770 pmap_paddr_t pmap_vtophys(pmap_t pmap, addr64_t va);
771 
772 static void pmap_switch_user_ttb(pmap_t pmap, pmap_cpu_data_t *cpu_data_ptr);
773 
774 static kern_return_t pmap_expand(
775 	pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
776 
777 static int pmap_remove_range(
778 	pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
779 
780 static tt_entry_t *pmap_tt1_allocate(
781 	pmap_t, vm_size_t, unsigned int);
782 
783 #define PMAP_TT_ALLOCATE_NOWAIT         0x1
784 
785 static void pmap_tt1_deallocate(
786 	pmap_t, tt_entry_t *, vm_size_t, unsigned int);
787 
788 #define PMAP_TT_DEALLOCATE_NOBLOCK      0x1
789 
790 static kern_return_t pmap_tt_allocate(
791 	pmap_t, tt_entry_t **, unsigned int, unsigned int);
792 
793 #define PMAP_TT_ALLOCATE_NOWAIT         0x1
794 
795 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
796 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
797 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
798 
799 #define PMAP_TT_DEALLOCATE_NOBLOCK      0x1
800 
801 #if     (__ARM_VMSA__ > 7)
802 
803 static void pmap_unmap_sharedpage(
804 	pmap_t pmap);
805 
806 static boolean_t
807 pmap_is_64bit(pmap_t);
808 
809 
810 #endif /* (__ARM_VMSA__ > 7) */
811 
812 static void pmap_update_cache_attributes_locked(
813 	ppnum_t, unsigned);
814 
815 static boolean_t arm_clear_fast_fault(
816 	ppnum_t ppnum,
817 	vm_prot_t fault_type,
818 	pt_entry_t *pte_p);
819 
820 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
821 
822 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
823 
824 static void pmap_trim_self(pmap_t pmap);
825 static void pmap_trim_subord(pmap_t subord);
826 
827 
828 /*
829  * Temporary prototypes, while we wait for pmap_enter to move to taking an
830  * address instead of a page number.
831  */
832 static kern_return_t
833 pmap_enter_addr(
834 	pmap_t pmap,
835 	vm_map_address_t v,
836 	pmap_paddr_t pa,
837 	vm_prot_t prot,
838 	vm_prot_t fault_type,
839 	unsigned int flags,
840 	boolean_t wired);
841 
842 kern_return_t
843 pmap_enter_options_addr(
844 	pmap_t pmap,
845 	vm_map_address_t v,
846 	pmap_paddr_t pa,
847 	vm_prot_t prot,
848 	vm_prot_t fault_type,
849 	unsigned int flags,
850 	boolean_t wired,
851 	unsigned int options,
852 	__unused void   *arg);
853 
854 #ifdef CONFIG_XNUPOST
855 kern_return_t pmap_test(void);
856 #endif /* CONFIG_XNUPOST */
857 
858 PMAP_SUPPORT_PROTOTYPES(
859 	kern_return_t,
860 	arm_fast_fault, (pmap_t pmap,
861 	vm_map_address_t va,
862 	vm_prot_t fault_type,
863 	bool was_af_fault,
864 	bool from_user), ARM_FAST_FAULT_INDEX);
865 
866 PMAP_SUPPORT_PROTOTYPES(
867 	boolean_t,
868 	arm_force_fast_fault, (ppnum_t ppnum,
869 	vm_prot_t allow_mode,
870 	int options), ARM_FORCE_FAST_FAULT_INDEX);
871 
872 MARK_AS_PMAP_TEXT static boolean_t
873 arm_force_fast_fault_with_flush_range(
874 	ppnum_t ppnum,
875 	vm_prot_t allow_mode,
876 	int options,
877 	pmap_tlb_flush_range_t *flush_range);
878 
879 PMAP_SUPPORT_PROTOTYPES(
880 	boolean_t,
881 	pmap_batch_set_cache_attributes, (ppnum_t pn,
882 	unsigned int cacheattr,
883 	unsigned int page_cnt,
884 	unsigned int page_index,
885 	boolean_t doit,
886 	unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
887 
888 PMAP_SUPPORT_PROTOTYPES(
889 	void,
890 	pmap_change_wiring, (pmap_t pmap,
891 	vm_map_address_t v,
892 	boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
893 
894 PMAP_SUPPORT_PROTOTYPES(
895 	pmap_t,
896 	pmap_create_options, (ledger_t ledger,
897 	vm_map_size_t size,
898 	unsigned int flags,
899 	kern_return_t * kr), PMAP_CREATE_INDEX);
900 
901 PMAP_SUPPORT_PROTOTYPES(
902 	void,
903 	pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
904 
905 PMAP_SUPPORT_PROTOTYPES(
906 	kern_return_t,
907 	pmap_enter_options, (pmap_t pmap,
908 	vm_map_address_t v,
909 	pmap_paddr_t pa,
910 	vm_prot_t prot,
911 	vm_prot_t fault_type,
912 	unsigned int flags,
913 	boolean_t wired,
914 	unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
915 
916 PMAP_SUPPORT_PROTOTYPES(
917 	pmap_paddr_t,
918 	pmap_find_pa, (pmap_t pmap,
919 	addr64_t va), PMAP_FIND_PA_INDEX);
920 
921 #if (__ARM_VMSA__ > 7)
922 PMAP_SUPPORT_PROTOTYPES(
923 	kern_return_t,
924 	pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
925 #endif
926 
927 
928 PMAP_SUPPORT_PROTOTYPES(
929 	boolean_t,
930 	pmap_is_empty, (pmap_t pmap,
931 	vm_map_offset_t va_start,
932 	vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
933 
934 
935 PMAP_SUPPORT_PROTOTYPES(
936 	unsigned int,
937 	pmap_map_cpu_windows_copy, (ppnum_t pn,
938 	vm_prot_t prot,
939 	unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
940 
941 PMAP_SUPPORT_PROTOTYPES(
942 	void,
943 	pmap_ro_zone_memcpy, (zone_id_t zid,
944 	vm_offset_t va,
945 	vm_offset_t offset,
946 	const vm_offset_t new_data,
947 	vm_size_t new_data_size), PMAP_RO_ZONE_MEMCPY_INDEX);
948 
949 PMAP_SUPPORT_PROTOTYPES(
950 	void,
951 	pmap_ro_zone_bzero, (zone_id_t zid,
952 	vm_offset_t va,
953 	vm_offset_t offset,
954 	vm_size_t size), PMAP_RO_ZONE_BZERO_INDEX);
955 
956 PMAP_SUPPORT_PROTOTYPES(
957 	kern_return_t,
958 	pmap_nest, (pmap_t grand,
959 	pmap_t subord,
960 	addr64_t vstart,
961 	uint64_t size), PMAP_NEST_INDEX);
962 
963 PMAP_SUPPORT_PROTOTYPES(
964 	void,
965 	pmap_page_protect_options, (ppnum_t ppnum,
966 	vm_prot_t prot,
967 	unsigned int options,
968 	void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
969 
970 PMAP_SUPPORT_PROTOTYPES(
971 	vm_map_address_t,
972 	pmap_protect_options, (pmap_t pmap,
973 	vm_map_address_t start,
974 	vm_map_address_t end,
975 	vm_prot_t prot,
976 	unsigned int options,
977 	void *args), PMAP_PROTECT_OPTIONS_INDEX);
978 
979 PMAP_SUPPORT_PROTOTYPES(
980 	kern_return_t,
981 	pmap_query_page_info, (pmap_t pmap,
982 	vm_map_offset_t va,
983 	int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
984 
985 PMAP_SUPPORT_PROTOTYPES(
986 	mach_vm_size_t,
987 	pmap_query_resident, (pmap_t pmap,
988 	vm_map_address_t start,
989 	vm_map_address_t end,
990 	mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
991 
992 PMAP_SUPPORT_PROTOTYPES(
993 	void,
994 	pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
995 
996 PMAP_SUPPORT_PROTOTYPES(
997 	vm_map_address_t,
998 	pmap_remove_options, (pmap_t pmap,
999 	vm_map_address_t start,
1000 	vm_map_address_t end,
1001 	int options), PMAP_REMOVE_OPTIONS_INDEX);
1002 
1003 
1004 PMAP_SUPPORT_PROTOTYPES(
1005 	void,
1006 	pmap_set_cache_attributes, (ppnum_t pn,
1007 	unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1008 
1009 PMAP_SUPPORT_PROTOTYPES(
1010 	void,
1011 	pmap_update_compressor_page, (ppnum_t pn,
1012 	unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1013 
1014 PMAP_SUPPORT_PROTOTYPES(
1015 	void,
1016 	pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1017 
1018 #if MACH_ASSERT || XNU_MONITOR
1019 PMAP_SUPPORT_PROTOTYPES(
1020 	void,
1021 	pmap_set_process, (pmap_t pmap,
1022 	int pid,
1023 	char *procname), PMAP_SET_PROCESS_INDEX);
1024 #endif
1025 
1026 PMAP_SUPPORT_PROTOTYPES(
1027 	void,
1028 	pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1029 
1030 PMAP_SUPPORT_PROTOTYPES(
1031 	kern_return_t,
1032 	pmap_unnest_options, (pmap_t grand,
1033 	addr64_t vaddr,
1034 	uint64_t size,
1035 	unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1036 
1037 PMAP_SUPPORT_PROTOTYPES(
1038 	void,
1039 	phys_attribute_set, (ppnum_t pn,
1040 	unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1041 
1042 PMAP_SUPPORT_PROTOTYPES(
1043 	void,
1044 	phys_attribute_clear, (ppnum_t pn,
1045 	unsigned int bits,
1046 	int options,
1047 	void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1048 
1049 #if __ARM_RANGE_TLBI__
1050 PMAP_SUPPORT_PROTOTYPES(
1051 	vm_map_address_t,
1052 	phys_attribute_clear_range, (pmap_t pmap,
1053 	vm_map_address_t start,
1054 	vm_map_address_t end,
1055 	unsigned int bits,
1056 	unsigned int options), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX);
1057 #endif /* __ARM_RANGE_TLBI__ */
1058 
1059 
1060 PMAP_SUPPORT_PROTOTYPES(
1061 	void,
1062 	pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1063 
1064 PMAP_SUPPORT_PROTOTYPES(
1065 	void,
1066 	pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1067 
1068 PMAP_SUPPORT_PROTOTYPES(
1069 	void,
1070 	pmap_set_vm_map_cs_enforced, (pmap_t pmap, bool new_value), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX);
1071 
1072 PMAP_SUPPORT_PROTOTYPES(
1073 	void,
1074 	pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1075 
1076 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
1077 PMAP_SUPPORT_PROTOTYPES(
1078 	void,
1079 	pmap_disable_user_jop, (pmap_t pmap), PMAP_DISABLE_USER_JOP_INDEX);
1080 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
1081 
1082 PMAP_SUPPORT_PROTOTYPES(
1083 	void,
1084 	pmap_trim, (pmap_t grand,
1085 	pmap_t subord,
1086 	addr64_t vstart,
1087 	uint64_t size), PMAP_TRIM_INDEX);
1088 
1089 #if HAS_APPLE_PAC
1090 PMAP_SUPPORT_PROTOTYPES(
1091 	void *,
1092 	pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_SIGN_USER_PTR);
1093 PMAP_SUPPORT_PROTOTYPES(
1094 	void *,
1095 	pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_AUTH_USER_PTR);
1096 #endif /* HAS_APPLE_PAC */
1097 
1098 
1099 
1100 
1101 PMAP_SUPPORT_PROTOTYPES(
1102 	bool,
1103 	pmap_is_trust_cache_loaded, (const uuid_t uuid), PMAP_IS_TRUST_CACHE_LOADED_INDEX);
1104 
1105 PMAP_SUPPORT_PROTOTYPES(
1106 	uint32_t,
1107 	pmap_lookup_in_static_trust_cache, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX);
1108 
1109 PMAP_SUPPORT_PROTOTYPES(
1110 	bool,
1111 	pmap_lookup_in_loaded_trust_caches, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX);
1112 
1113 PMAP_SUPPORT_PROTOTYPES(
1114 	void,
1115 	pmap_set_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
1116 	PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX);
1117 
1118 PMAP_SUPPORT_PROTOTYPES(
1119 	bool,
1120 	pmap_match_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
1121 	PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX);
1122 
1123 PMAP_SUPPORT_PROTOTYPES(
1124 	void,
1125 	pmap_set_local_signing_public_key, (const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE]),
1126 	PMAP_SET_LOCAL_SIGNING_PUBLIC_KEY_INDEX);
1127 
1128 PMAP_SUPPORT_PROTOTYPES(
1129 	void,
1130 	pmap_unrestrict_local_signing, (const uint8_t cdhash[CS_CDHASH_LEN]),
1131 	PMAP_UNRESTRICT_LOCAL_SIGNING_INDEX);
1132 
1133 PMAP_SUPPORT_PROTOTYPES(
1134 	void,
1135 	pmap_nop, (pmap_t pmap), PMAP_NOP_INDEX);
1136 
1137 void pmap_footprint_suspend(vm_map_t    map,
1138     boolean_t   suspend);
1139 PMAP_SUPPORT_PROTOTYPES(
1140 	void,
1141 	pmap_footprint_suspend, (vm_map_t map,
1142 	boolean_t suspend),
1143 	PMAP_FOOTPRINT_SUSPEND_INDEX);
1144 
1145 
1146 
1147 
1148 #if DEVELOPMENT || DEBUG
1149 PMAP_SUPPORT_PROTOTYPES(
1150 	kern_return_t,
1151 	pmap_test_text_corruption, (pmap_paddr_t),
1152 	PMAP_TEST_TEXT_CORRUPTION_INDEX);
1153 #endif /* DEVELOPMENT || DEBUG */
1154 
1155 #if     (__ARM_VMSA__ > 7)
1156 /*
1157  * The low global vector page is mapped at a fixed alias.
1158  * Since the page size is 16k for H8 and newer we map the globals to a 16k
1159  * aligned address. Readers of the globals (e.g. lldb, panic server) need
1160  * to check both addresses anyway for backward compatibility. So for now
1161  * we leave H6 and H7 where they were.
1162  */
1163 #if (ARM_PGSHIFT == 14)
1164 #define LOWGLOBAL_ALIAS         (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1165 #else
1166 #define LOWGLOBAL_ALIAS         (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1167 #endif
1168 
1169 #else
1170 #define LOWGLOBAL_ALIAS         (0xFFFF1000)
1171 #endif
1172 
1173 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1174 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1175 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1176 
1177 #if XNU_MONITOR
1178 
1179 #if __has_feature(ptrauth_calls)
1180 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
1181 #else
1182 #define __ptrauth_ppl_handler
1183 #endif
1184 
1185 /*
1186  * Table of function pointers used for PPL dispatch.
1187  */
1188 const void * __ptrauth_ppl_handler const ppl_handler_table[PMAP_COUNT] = {
1189 	[ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
1190 	[ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
1191 	[MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
1192 	[PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
1193 	[PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
1194 	[PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
1195 	[PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
1196 	[PMAP_CREATE_INDEX] = pmap_create_options_internal,
1197 	[PMAP_DESTROY_INDEX] = pmap_destroy_internal,
1198 	[PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
1199 	[PMAP_FIND_PA_INDEX] = pmap_find_pa_internal,
1200 	[PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
1201 	[PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
1202 	[PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
1203 	[PMAP_RO_ZONE_MEMCPY_INDEX] = pmap_ro_zone_memcpy_internal,
1204 	[PMAP_RO_ZONE_BZERO_INDEX] = pmap_ro_zone_bzero_internal,
1205 	[PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
1206 	[PMAP_NEST_INDEX] = pmap_nest_internal,
1207 	[PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
1208 	[PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
1209 	[PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
1210 	[PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
1211 	[PMAP_REFERENCE_INDEX] = pmap_reference_internal,
1212 	[PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
1213 	[PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
1214 	[PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
1215 	[PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
1216 	[PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
1217 	[PMAP_SWITCH_INDEX] = pmap_switch_internal,
1218 	[PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
1219 	[PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
1220 	[PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
1221 	[PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
1222 	[PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
1223 	[PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
1224 	[PMAP_SET_VM_MAP_CS_ENFORCED_INDEX] = pmap_set_vm_map_cs_enforced_internal,
1225 	[PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
1226 	[PMAP_IS_TRUST_CACHE_LOADED_INDEX] = pmap_is_trust_cache_loaded_internal,
1227 	[PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX] = pmap_lookup_in_static_trust_cache_internal,
1228 	[PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX] = pmap_lookup_in_loaded_trust_caches_internal,
1229 	[PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_set_compilation_service_cdhash_internal,
1230 	[PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_match_compilation_service_cdhash_internal,
1231 	[PMAP_SET_LOCAL_SIGNING_PUBLIC_KEY_INDEX] = pmap_set_local_signing_public_key_internal,
1232 	[PMAP_UNRESTRICT_LOCAL_SIGNING_INDEX] = pmap_unrestrict_local_signing_internal,
1233 	[PMAP_TRIM_INDEX] = pmap_trim_internal,
1234 	[PMAP_LEDGER_VERIFY_SIZE_INDEX] = pmap_ledger_verify_size_internal,
1235 	[PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
1236 	[PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
1237 #if HAS_APPLE_PAC
1238 	[PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
1239 	[PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
1240 #endif /* HAS_APPLE_PAC */
1241 #if __ARM_RANGE_TLBI__
1242 	[PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
1243 #endif /* __ARM_RANGE_TLBI__ */
1244 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
1245 	[PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
1246 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
1247 	[PMAP_NOP_INDEX] = pmap_nop_internal,
1248 
1249 #if DEVELOPMENT || DEBUG
1250 	[PMAP_TEST_TEXT_CORRUPTION_INDEX] = pmap_test_text_corruption_internal,
1251 #endif /* DEVELOPMENT || DEBUG */
1252 };
1253 #endif
1254 
1255 #if XNU_MONITOR
1256 /**
1257  * A convenience function for setting protections on a single physical
1258  * aperture or static region mapping without invalidating the TLB.
1259  *
1260  * @note This function does not perform any TLB invalidations. That must be done
1261  *       separately to be able to safely use the updated mapping.
1262  *
1263  * @note This function understands the difference between the VM page size and
1264  *       the kernel page size and will update multiple PTEs if the sizes differ.
1265  *       In other words, enough PTEs will always get updated to change the
1266  *       permissions on a PAGE_SIZE amount of memory.
1267  *
1268  * @note The PVH lock for the physical page represented by this mapping must
1269  *       already be locked.
1270  *
1271  * @note This function assumes the caller has already verified that the PTE
1272  *       pointer does indeed point to a physical aperture or static region page
1273  *       table. Please validate your inputs before passing it along to this
1274  *       function.
1275  *
1276  * @param ptep Pointer to the physical aperture or static region page table to
1277  *             update with a new XPRR index.
1278  * @param expected_perm The XPRR index that is expected to already exist at the
1279  *                      current mapping. If the current index doesn't match this
1280  *                      then the system will panic.
1281  * @param new_perm The new XPRR index to update the mapping with.
1282  */
1283 MARK_AS_PMAP_TEXT static void
pmap_set_pte_xprr_perm(pt_entry_t * const ptep,unsigned int expected_perm,unsigned int new_perm)1284 pmap_set_pte_xprr_perm(
1285 	pt_entry_t * const ptep,
1286 	unsigned int expected_perm,
1287 	unsigned int new_perm)
1288 {
1289 	assert(ptep != NULL);
1290 
1291 	pt_entry_t spte = *ptep;
1292 	pvh_assert_locked(pa_index(pte_to_pa(spte)));
1293 
1294 	if (__improbable((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM))) {
1295 		panic_plain("%s: invalid XPRR index, ptep=%p, new_perm=%u, expected_perm=%u",
1296 		    __func__, ptep, new_perm, expected_perm);
1297 	}
1298 
1299 	/**
1300 	 * The PTE involved should be valid, should not have the hint bit set, and
1301 	 * should have the expected XPRR index.
1302 	 */
1303 	if (__improbable((spte & ARM_PTE_TYPE_MASK) == ARM_PTE_TYPE_FAULT)) {
1304 		panic_plain("%s: physical aperture or static region PTE is invalid, "
1305 		    "ptep=%p, spte=%#llx, new_perm=%u, expected_perm=%u",
1306 		    __func__, ptep, spte, new_perm, expected_perm);
1307 	}
1308 
1309 	if (__improbable(spte & ARM_PTE_HINT_MASK)) {
1310 		panic_plain("%s: physical aperture or static region PTE has hint bit "
1311 		    "set, ptep=%p, spte=0x%llx, new_perm=%u, expected_perm=%u",
1312 		    __func__, ptep, spte, new_perm, expected_perm);
1313 	}
1314 
1315 	if (__improbable(pte_to_xprr_perm(spte) != expected_perm)) {
1316 		panic("%s: perm=%llu does not match expected_perm, spte=0x%llx, "
1317 		    "ptep=%p, new_perm=%u, expected_perm=%u",
1318 		    __func__, pte_to_xprr_perm(spte), spte, ptep, new_perm, expected_perm);
1319 	}
1320 
1321 	pt_entry_t template = spte;
1322 	template &= ~ARM_PTE_XPRR_MASK;
1323 	template |= xprr_perm_to_pte(new_perm);
1324 
1325 	write_pte_strong(ptep, template);
1326 }
1327 
1328 /**
1329  * Update the protections on a single physical aperture mapping and invalidate
1330  * the TLB so the mapping can be used.
1331  *
1332  * @note The PVH lock for the physical page must already be locked.
1333  *
1334  * @param pai The physical address index of the page whose physical aperture
1335  *            mapping will be updated with new permissions.
1336  * @param expected_perm The XPRR index that is expected to already exist at the
1337  *                      current mapping. If the current index doesn't match this
1338  *                      then the system will panic.
1339  * @param new_perm The new XPRR index to update the mapping with.
1340  */
1341 MARK_AS_PMAP_TEXT void
pmap_set_xprr_perm(unsigned int pai,unsigned int expected_perm,unsigned int new_perm)1342 pmap_set_xprr_perm(
1343 	unsigned int pai,
1344 	unsigned int expected_perm,
1345 	unsigned int new_perm)
1346 {
1347 	pvh_assert_locked(pai);
1348 
1349 	const vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
1350 	pt_entry_t * const ptep = pmap_pte(kernel_pmap, kva);
1351 
1352 	pmap_set_pte_xprr_perm(ptep, expected_perm, new_perm);
1353 
1354 	native_pt_ops.flush_tlb_region_async(kva, PAGE_SIZE, kernel_pmap, true);
1355 	sync_tlb_flush();
1356 }
1357 
1358 /**
1359  * Update the protections on a range of physical aperture or static region
1360  * mappings and invalidate the TLB so the mappings can be used.
1361  *
1362  * @note Static region mappings can only be updated before machine_lockdown().
1363  *       Physical aperture mappings can be updated at any time.
1364  *
1365  * @param start The starting virtual address of the static region or physical
1366  *              aperture range whose permissions will be updated.
1367  * @param end The final (inclusive) virtual address of the static region or
1368  *            physical aperture range whose permissions will be updated.
1369  * @param expected_perm The XPRR index that is expected to already exist at the
1370  *                      current mappings. If the current indices don't match
1371  *                      this then the system will panic.
1372  * @param new_perm The new XPRR index to update the mappings with.
1373  */
1374 MARK_AS_PMAP_TEXT static void
pmap_set_range_xprr_perm(vm_address_t start,vm_address_t end,unsigned int expected_perm,unsigned int new_perm)1375 pmap_set_range_xprr_perm(
1376 	vm_address_t start,
1377 	vm_address_t end,
1378 	unsigned int expected_perm,
1379 	unsigned int new_perm)
1380 {
1381 #if (__ARM_VMSA__ == 7)
1382 #error This function is not supported on older ARM hardware.
1383 #endif /* (__ARM_VMSA__ == 7) */
1384 
1385 	/**
1386 	 * Validate our arguments; any invalid argument will be grounds for a panic.
1387 	 */
1388 	if (__improbable((start | end) & ARM_PGMASK)) {
1389 		panic_plain("%s: start or end not page aligned, "
1390 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1391 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1392 	}
1393 
1394 	if (__improbable(start > end)) {
1395 		panic("%s: start > end, start=%p, end=%p, new_perm=%u, expected_perm=%u",
1396 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1397 	}
1398 
1399 	const bool in_physmap = (start >= physmap_base) && (end < physmap_end);
1400 	const bool in_static = (start >= gVirtBase) && (end < static_memory_end);
1401 
1402 	if (__improbable(!(in_physmap || in_static))) {
1403 		panic_plain("%s: address not in static region or physical aperture, "
1404 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1405 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1406 	}
1407 
1408 	if (__improbable((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM))) {
1409 		panic_plain("%s: invalid XPRR index, "
1410 		    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1411 		    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1412 	}
1413 
1414 	/*
1415 	 * Walk over the PTEs for the given range, and set the protections on those
1416 	 * PTEs. Each iteration of this loop will update all of the leaf PTEs within
1417 	 * one twig entry (whichever twig entry currently maps "va").
1418 	 */
1419 	vm_address_t va = start;
1420 	while (va < end) {
1421 		/**
1422 		 * Get the last VA that the twig entry for "va" maps. All of the leaf
1423 		 * PTEs from va to tte_va_end will have their permissions updated.
1424 		 */
1425 		vm_address_t tte_va_end =
1426 		    (va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr);
1427 
1428 		if (tte_va_end > end) {
1429 			tte_va_end = end;
1430 		}
1431 
1432 		tt_entry_t *ttep = pmap_tte(kernel_pmap, va);
1433 
1434 		if (ttep == NULL) {
1435 			panic_plain("%s: physical aperture or static region tte is NULL, "
1436 			    "start=%p, end=%p, new_perm=%u, expected_perm=%u",
1437 			    __func__, (void *)start, (void *)end, new_perm, expected_perm);
1438 		}
1439 
1440 		tt_entry_t tte = *ttep;
1441 
1442 		if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1443 			panic_plain("%s: tte=0x%llx is not a table type entry, "
1444 			    "start=%p, end=%p, new_perm=%u, expected_perm=%u", __func__,
1445 			    tte, (void *)start, (void *)end, new_perm, expected_perm);
1446 		}
1447 
1448 		/* Walk over the given L3 page table page and update the PTEs. */
1449 		pt_entry_t * const ptep = (pt_entry_t *)ttetokv(tte);
1450 		pt_entry_t * const begin_ptep = &ptep[pte_index(native_pt_attr, va)];
1451 		const uint64_t num_ptes = (tte_va_end - va) >> pt_attr_leaf_shift(native_pt_attr);
1452 		pt_entry_t * const end_ptep = begin_ptep + num_ptes;
1453 
1454 		/**
1455 		 * The current PTE pointer is incremented by the page ratio (ratio of
1456 		 * VM page size to kernel hardware page size) because one call to
1457 		 * pmap_set_pte_xprr_perm() will update all PTE entries required to map
1458 		 * a PAGE_SIZE worth of hardware pages.
1459 		 */
1460 		for (pt_entry_t *cur_ptep = begin_ptep; cur_ptep < end_ptep;
1461 		    cur_ptep += PAGE_RATIO, va += PAGE_SIZE) {
1462 			unsigned int pai = pa_index(pte_to_pa(*cur_ptep));
1463 			pvh_lock(pai);
1464 			pmap_set_pte_xprr_perm(cur_ptep, expected_perm, new_perm);
1465 			pvh_unlock(pai);
1466 		}
1467 
1468 		va = tte_va_end;
1469 	}
1470 
1471 	PMAP_UPDATE_TLBS(kernel_pmap, start, end, false, true);
1472 }
1473 
1474 #endif /* XNU_MONITOR */
1475 
1476 static inline void
PMAP_ZINFO_PALLOC(pmap_t pmap,int bytes)1477 PMAP_ZINFO_PALLOC(
1478 	pmap_t pmap, int bytes)
1479 {
1480 	pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
1481 }
1482 
1483 static inline void
PMAP_ZINFO_PFREE(pmap_t pmap,int bytes)1484 PMAP_ZINFO_PFREE(
1485 	pmap_t pmap,
1486 	int bytes)
1487 {
1488 	pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1489 }
1490 
1491 void
pmap_tt_ledger_credit(pmap_t pmap,vm_size_t size)1492 pmap_tt_ledger_credit(
1493 	pmap_t          pmap,
1494 	vm_size_t       size)
1495 {
1496 	if (pmap != kernel_pmap) {
1497 		pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
1498 		pmap_ledger_credit(pmap, task_ledgers.page_table, size);
1499 	}
1500 }
1501 
1502 void
pmap_tt_ledger_debit(pmap_t pmap,vm_size_t size)1503 pmap_tt_ledger_debit(
1504 	pmap_t          pmap,
1505 	vm_size_t       size)
1506 {
1507 	if (pmap != kernel_pmap) {
1508 		pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
1509 		pmap_ledger_debit(pmap, task_ledgers.page_table, size);
1510 	}
1511 }
1512 
1513 static inline void
pmap_update_plru(uint16_t asid_index)1514 pmap_update_plru(uint16_t asid_index)
1515 {
1516 	if (__probable(pmap_asid_plru)) {
1517 		unsigned plru_index = asid_index >> 6;
1518 		if (__improbable(os_atomic_andnot(&asid_plru_bitmap[plru_index], (1ULL << (asid_index & 63)), relaxed) == 0)) {
1519 			asid_plru_generation[plru_index] = ++asid_plru_gencount;
1520 			asid_plru_bitmap[plru_index] = ((plru_index == (MAX_HW_ASIDS >> 6)) ? ~(1ULL << 63) : UINT64_MAX);
1521 		}
1522 	}
1523 }
1524 
1525 static bool
alloc_asid(pmap_t pmap)1526 alloc_asid(pmap_t pmap)
1527 {
1528 	int vasid = -1;
1529 	uint16_t hw_asid;
1530 
1531 	pmap_simple_lock(&asid_lock);
1532 
1533 	if (__probable(pmap_asid_plru)) {
1534 		unsigned plru_index = 0;
1535 		uint64_t lowest_gen = asid_plru_generation[0];
1536 		uint64_t lowest_gen_bitmap = asid_plru_bitmap[0];
1537 		for (unsigned i = 1; i < (sizeof(asid_plru_generation) / sizeof(asid_plru_generation[0])); ++i) {
1538 			if (asid_plru_generation[i] < lowest_gen) {
1539 				plru_index = i;
1540 				lowest_gen = asid_plru_generation[i];
1541 				lowest_gen_bitmap = asid_plru_bitmap[i];
1542 			}
1543 		}
1544 
1545 		for (; plru_index < BITMAP_LEN(pmap_max_asids); plru_index += ((MAX_HW_ASIDS + 1) >> 6)) {
1546 			uint64_t temp_plru = lowest_gen_bitmap & asid_bitmap[plru_index];
1547 			if (temp_plru) {
1548 				vasid = (plru_index << 6) + lsb_first(temp_plru);
1549 #if DEVELOPMENT || DEBUG
1550 				++pmap_asid_hits;
1551 #endif
1552 				break;
1553 			}
1554 		}
1555 	}
1556 	if (__improbable(vasid < 0)) {
1557 		// bitmap_first() returns highest-order bits first, but a 0-based scheme works
1558 		// slightly better with the collision detection scheme used by pmap_switch_internal().
1559 		vasid = bitmap_lsb_first(&asid_bitmap[0], pmap_max_asids);
1560 #if DEVELOPMENT || DEBUG
1561 		++pmap_asid_misses;
1562 #endif
1563 	}
1564 	if (__improbable(vasid < 0)) {
1565 		pmap_simple_unlock(&asid_lock);
1566 		return false;
1567 	}
1568 	assert((uint32_t)vasid < pmap_max_asids);
1569 	assert(bitmap_test(&asid_bitmap[0], (unsigned int)vasid));
1570 	bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
1571 	pmap_simple_unlock(&asid_lock);
1572 	hw_asid = (uint16_t)(vasid % asid_chunk_size);
1573 	pmap->sw_asid = (uint8_t)(vasid / asid_chunk_size);
1574 	if (__improbable(hw_asid == MAX_HW_ASIDS)) {
1575 		/* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
1576 		 * reassign to a reserved VASID. */
1577 		assert(pmap->sw_asid < UINT8_MAX);
1578 		pmap->sw_asid = UINT8_MAX;
1579 		/* Allocate from the high end of the hardware ASID range to reduce the likelihood of
1580 		 * aliasing with vital system processes, which are likely to have lower ASIDs. */
1581 		hw_asid = MAX_HW_ASIDS - 1 - (uint16_t)(vasid / asid_chunk_size);
1582 		assert(hw_asid < MAX_HW_ASIDS);
1583 	}
1584 	pmap_update_plru(hw_asid);
1585 	hw_asid += 1;  // Account for ASID 0, which is reserved for the kernel
1586 #if __ARM_KERNEL_PROTECT__
1587 	hw_asid <<= 1;  // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
1588 #endif
1589 	pmap->hw_asid = hw_asid;
1590 	return true;
1591 }
1592 
1593 static void
free_asid(pmap_t pmap)1594 free_asid(pmap_t pmap)
1595 {
1596 	unsigned int vasid;
1597 	uint16_t hw_asid = os_atomic_xchg(&pmap->hw_asid, 0, relaxed);
1598 	if (__improbable(hw_asid == 0)) {
1599 		return;
1600 	}
1601 
1602 #if __ARM_KERNEL_PROTECT__
1603 	hw_asid >>= 1;
1604 #endif
1605 	hw_asid -= 1;
1606 
1607 	if (__improbable(pmap->sw_asid == UINT8_MAX)) {
1608 		vasid = ((MAX_HW_ASIDS - 1 - hw_asid) * asid_chunk_size) + MAX_HW_ASIDS;
1609 	} else {
1610 		vasid = ((unsigned int)pmap->sw_asid * asid_chunk_size) + hw_asid;
1611 	}
1612 
1613 	if (__probable(pmap_asid_plru)) {
1614 		os_atomic_or(&asid_plru_bitmap[hw_asid >> 6], (1ULL << (hw_asid & 63)), relaxed);
1615 	}
1616 	pmap_simple_lock(&asid_lock);
1617 	assert(!bitmap_test(&asid_bitmap[0], vasid));
1618 	bitmap_set(&asid_bitmap[0], vasid);
1619 	pmap_simple_unlock(&asid_lock);
1620 }
1621 
1622 
1623 boolean_t
pmap_valid_address(pmap_paddr_t addr)1624 pmap_valid_address(
1625 	pmap_paddr_t addr)
1626 {
1627 	return pa_valid(addr);
1628 }
1629 
1630 
1631 
1632 
1633 
1634 
1635 /*
1636  *      Map memory at initialization.  The physical addresses being
1637  *      mapped are not managed and are never unmapped.
1638  *
1639  *      For now, VM is already on, we only need to map the
1640  *      specified memory.
1641  */
1642 vm_map_address_t
pmap_map(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot,unsigned int flags)1643 pmap_map(
1644 	vm_map_address_t virt,
1645 	vm_offset_t start,
1646 	vm_offset_t end,
1647 	vm_prot_t prot,
1648 	unsigned int flags)
1649 {
1650 	kern_return_t   kr;
1651 	vm_size_t       ps;
1652 
1653 	ps = PAGE_SIZE;
1654 	while (start < end) {
1655 		kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
1656 		    prot, VM_PROT_NONE, flags, FALSE);
1657 
1658 		if (kr != KERN_SUCCESS) {
1659 			panic("%s: failed pmap_enter, "
1660 			    "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
1661 			    __FUNCTION__,
1662 			    (void *) virt, (void *) start, (void *) end, prot, flags);
1663 		}
1664 
1665 		virt += ps;
1666 		start += ps;
1667 	}
1668 	return virt;
1669 }
1670 
1671 vm_map_address_t
pmap_map_bd_with_options(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot,int32_t options)1672 pmap_map_bd_with_options(
1673 	vm_map_address_t virt,
1674 	vm_offset_t start,
1675 	vm_offset_t end,
1676 	vm_prot_t prot,
1677 	int32_t options)
1678 {
1679 	pt_entry_t      tmplate;
1680 	pt_entry_t     *ptep;
1681 	vm_map_address_t vaddr;
1682 	vm_offset_t     paddr;
1683 	pt_entry_t      mem_attr;
1684 
1685 	switch (options & PMAP_MAP_BD_MASK) {
1686 	case PMAP_MAP_BD_WCOMB:
1687 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
1688 #if     (__ARM_VMSA__ > 7)
1689 		mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
1690 #else
1691 		mem_attr |= ARM_PTE_SH;
1692 #endif
1693 		break;
1694 	case PMAP_MAP_BD_POSTED:
1695 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
1696 		break;
1697 	case PMAP_MAP_BD_POSTED_REORDERED:
1698 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
1699 		break;
1700 	case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
1701 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
1702 		break;
1703 	default:
1704 		mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
1705 		break;
1706 	}
1707 
1708 	tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
1709 	    mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
1710 #if __ARM_KERNEL_PROTECT__
1711 	tmplate |= ARM_PTE_NG;
1712 #endif /* __ARM_KERNEL_PROTECT__ */
1713 
1714 	vaddr = virt;
1715 	paddr = start;
1716 	while (paddr < end) {
1717 		ptep = pmap_pte(kernel_pmap, vaddr);
1718 		if (ptep == PT_ENTRY_NULL) {
1719 			panic("%s: no PTE for vaddr=%p, "
1720 			    "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
1721 			    __FUNCTION__, (void*)vaddr,
1722 			    (void*)virt, (void*)start, (void*)end, prot, options);
1723 		}
1724 
1725 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1726 		write_pte_strong(ptep, tmplate);
1727 
1728 		pte_increment_pa(tmplate);
1729 		vaddr += PAGE_SIZE;
1730 		paddr += PAGE_SIZE;
1731 	}
1732 
1733 	if (end >= start) {
1734 		flush_mmu_tlb_region(virt, (unsigned)(end - start));
1735 	}
1736 
1737 	return vaddr;
1738 }
1739 
1740 /*
1741  *      Back-door routine for mapping kernel VM at initialization.
1742  *      Useful for mapping memory outside the range
1743  *      [vm_first_phys, vm_last_phys] (i.e., devices).
1744  *      Otherwise like pmap_map.
1745  */
1746 vm_map_address_t
pmap_map_bd(vm_map_address_t virt,vm_offset_t start,vm_offset_t end,vm_prot_t prot)1747 pmap_map_bd(
1748 	vm_map_address_t virt,
1749 	vm_offset_t start,
1750 	vm_offset_t end,
1751 	vm_prot_t prot)
1752 {
1753 	pt_entry_t      tmplate;
1754 	pt_entry_t              *ptep;
1755 	vm_map_address_t vaddr;
1756 	vm_offset_t             paddr;
1757 
1758 	/* not cacheable and not buffered */
1759 	tmplate = pa_to_pte(start)
1760 	    | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
1761 	    | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
1762 	    | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
1763 #if __ARM_KERNEL_PROTECT__
1764 	tmplate |= ARM_PTE_NG;
1765 #endif /* __ARM_KERNEL_PROTECT__ */
1766 
1767 	vaddr = virt;
1768 	paddr = start;
1769 	while (paddr < end) {
1770 		ptep = pmap_pte(kernel_pmap, vaddr);
1771 		if (ptep == PT_ENTRY_NULL) {
1772 			panic("pmap_map_bd");
1773 		}
1774 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1775 		write_pte_strong(ptep, tmplate);
1776 
1777 		pte_increment_pa(tmplate);
1778 		vaddr += PAGE_SIZE;
1779 		paddr += PAGE_SIZE;
1780 	}
1781 
1782 	if (end >= start) {
1783 		flush_mmu_tlb_region(virt, (unsigned)(end - start));
1784 	}
1785 
1786 	return vaddr;
1787 }
1788 
1789 /*
1790  *      Back-door routine for mapping kernel VM at initialization.
1791  *      Useful for mapping memory specific physical addresses in early
1792  *      boot (i.e., before kernel_map is initialized).
1793  *
1794  *      Maps are in the VM_HIGH_KERNEL_WINDOW area.
1795  */
1796 
1797 vm_map_address_t
pmap_map_high_window_bd(vm_offset_t pa_start,vm_size_t len,vm_prot_t prot)1798 pmap_map_high_window_bd(
1799 	vm_offset_t pa_start,
1800 	vm_size_t len,
1801 	vm_prot_t prot)
1802 {
1803 	pt_entry_t              *ptep, pte;
1804 #if (__ARM_VMSA__ == 7)
1805 	vm_map_address_t        va_start = VM_HIGH_KERNEL_WINDOW;
1806 	vm_map_address_t        va_max = VM_MAX_KERNEL_ADDRESS;
1807 #else
1808 	vm_map_address_t        va_start = VREGION1_START;
1809 	vm_map_address_t        va_max = VREGION1_START + VREGION1_SIZE;
1810 #endif
1811 	vm_map_address_t        va_end;
1812 	vm_map_address_t        va;
1813 	vm_size_t               offset;
1814 
1815 	offset = pa_start & PAGE_MASK;
1816 	pa_start -= offset;
1817 	len += offset;
1818 
1819 	if (len > (va_max - va_start)) {
1820 		panic("%s: area too large, "
1821 		    "pa_start=%p, len=%p, prot=0x%x",
1822 		    __FUNCTION__,
1823 		    (void*)pa_start, (void*)len, prot);
1824 	}
1825 
1826 scan:
1827 	for (; va_start < va_max; va_start += PAGE_SIZE) {
1828 		ptep = pmap_pte(kernel_pmap, va_start);
1829 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1830 		if (*ptep == ARM_PTE_TYPE_FAULT) {
1831 			break;
1832 		}
1833 	}
1834 	if (va_start > va_max) {
1835 		panic("%s: insufficient pages, "
1836 		    "pa_start=%p, len=%p, prot=0x%x",
1837 		    __FUNCTION__,
1838 		    (void*)pa_start, (void*)len, prot);
1839 	}
1840 
1841 	for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
1842 		ptep = pmap_pte(kernel_pmap, va_end);
1843 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
1844 		if (*ptep != ARM_PTE_TYPE_FAULT) {
1845 			va_start = va_end + PAGE_SIZE;
1846 			goto scan;
1847 		}
1848 	}
1849 
1850 	for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
1851 		ptep = pmap_pte(kernel_pmap, va);
1852 		pte = pa_to_pte(pa_start)
1853 		    | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
1854 		    | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
1855 		    | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
1856 #if     (__ARM_VMSA__ > 7)
1857 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
1858 #else
1859 		pte |= ARM_PTE_SH;
1860 #endif
1861 #if __ARM_KERNEL_PROTECT__
1862 		pte |= ARM_PTE_NG;
1863 #endif /* __ARM_KERNEL_PROTECT__ */
1864 		write_pte_strong(ptep, pte);
1865 	}
1866 	PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false, true);
1867 #if KASAN
1868 	kasan_notify_address(va_start, len);
1869 #endif
1870 	return va_start;
1871 }
1872 
1873 static uint32_t
pmap_compute_max_asids(void)1874 pmap_compute_max_asids(void)
1875 {
1876 	DTEntry entry;
1877 	void const *prop = NULL;
1878 	uint32_t max_asids;
1879 	int err;
1880 	unsigned int prop_size;
1881 
1882 	err = SecureDTLookupEntry(NULL, "/defaults", &entry);
1883 	assert(err == kSuccess);
1884 
1885 	if (kSuccess != SecureDTGetProperty(entry, "pmap-max-asids", &prop, &prop_size)) {
1886 		/* TODO: consider allowing maxproc limits to be scaled earlier so that
1887 		 * we can choose a more flexible default value here. */
1888 		return MAX_ASIDS;
1889 	}
1890 
1891 	if (prop_size != sizeof(max_asids)) {
1892 		panic("pmap-max-asids property is not a 32-bit integer");
1893 	}
1894 
1895 	max_asids = *((uint32_t const *)prop);
1896 	/* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
1897 	max_asids = (max_asids + 63) & ~63UL;
1898 
1899 	if (((max_asids + MAX_HW_ASIDS) / (MAX_HW_ASIDS + 1)) > MIN(MAX_HW_ASIDS, UINT8_MAX)) {
1900 		/* currently capped by size of pmap->sw_asid */
1901 		panic("pmap-max-asids too large");
1902 	}
1903 	if (max_asids == 0) {
1904 		panic("pmap-max-asids cannot be zero");
1905 	}
1906 	return max_asids;
1907 }
1908 
1909 #if __arm64__
1910 /*
1911  * pmap_get_arm64_prot
1912  *
1913  * return effective armv8 VMSA block protections including
1914  * table AP/PXN/XN overrides of a pmap entry
1915  *
1916  */
1917 
1918 uint64_t
pmap_get_arm64_prot(pmap_t pmap,vm_offset_t addr)1919 pmap_get_arm64_prot(
1920 	pmap_t pmap,
1921 	vm_offset_t addr)
1922 {
1923 	tt_entry_t tte = 0;
1924 	unsigned int level = 0;
1925 	uint64_t tte_type = 0;
1926 	uint64_t effective_prot_bits = 0;
1927 	uint64_t aggregate_tte = 0;
1928 	uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
1929 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
1930 
1931 	for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
1932 		tte = *pmap_ttne(pmap, level, addr);
1933 
1934 		if (!(tte & ARM_TTE_VALID)) {
1935 			return 0;
1936 		}
1937 
1938 		tte_type = tte & ARM_TTE_TYPE_MASK;
1939 
1940 		if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
1941 		    (level == pt_attr->pta_max_level)) {
1942 			/* Block or page mapping; both have the same protection bit layout. */
1943 			break;
1944 		} else if (tte_type == ARM_TTE_TYPE_TABLE) {
1945 			/* All of the table bits we care about are overrides, so just OR them together. */
1946 			aggregate_tte |= tte;
1947 		}
1948 	}
1949 
1950 	table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
1951 	table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
1952 	table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
1953 
1954 	/* Start with the PTE bits. */
1955 	effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
1956 
1957 	/* Table AP bits mask out block/page AP bits */
1958 	effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
1959 
1960 	/* XN/PXN bits can be OR'd in. */
1961 	effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
1962 	effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
1963 
1964 	return effective_prot_bits;
1965 }
1966 #endif /* __arm64__ */
1967 
1968 static void
pmap_set_srd_fusing()1969 pmap_set_srd_fusing()
1970 {
1971 	DTEntry entry;
1972 	uint32_t const *prop = NULL;
1973 	int err;
1974 	unsigned int prop_size = 0;
1975 
1976 	err = SecureDTLookupEntry(NULL, "/chosen", &entry);
1977 	if (err != kSuccess) {
1978 		panic("PMAP: no chosen DT node");
1979 	}
1980 
1981 	if (kSuccess == SecureDTGetProperty(entry, "research-enabled", (const void**)&prop, &prop_size)) {
1982 		if (prop_size == sizeof(uint32_t)) {
1983 			srd_fused = *prop;
1984 		}
1985 	}
1986 
1987 #if DEVELOPMENT || DEBUG
1988 	PE_parse_boot_argn("srd_fusing", &srd_fused, sizeof(srd_fused));
1989 #endif
1990 }
1991 
1992 /*
1993  *	Bootstrap the system enough to run with virtual memory.
1994  *
1995  *	The early VM initialization code has already allocated
1996  *	the first CPU's translation table and made entries for
1997  *	all the one-to-one mappings to be found there.
1998  *
1999  *	We must set up the kernel pmap structures, the
2000  *	physical-to-virtual translation lookup tables for the
2001  *	physical memory to be managed (between avail_start and
2002  *	avail_end).
2003  *
2004  *	Map the kernel's code and data, and allocate the system page table.
2005  *	Page_size must already be set.
2006  *
2007  *	Parameters:
2008  *	first_avail	first available physical page -
2009  *			   after kernel page tables
2010  *	avail_start	PA of first managed physical page
2011  *	avail_end	PA of last managed physical page
2012  */
2013 
2014 void
pmap_bootstrap(vm_offset_t vstart)2015 pmap_bootstrap(
2016 	vm_offset_t vstart)
2017 {
2018 	vm_map_offset_t maxoffset;
2019 
2020 	lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
2021 
2022 	pmap_set_srd_fusing();
2023 
2024 #if XNU_MONITOR
2025 
2026 #if DEVELOPMENT || DEBUG
2027 	PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
2028 #endif
2029 
2030 #if CONFIG_CSR_FROM_DT
2031 	if (csr_unsafe_kernel_text) {
2032 		pmap_ppl_disable = true;
2033 	}
2034 #endif /* CONFIG_CSR_FROM_DT */
2035 
2036 #endif /* XNU_MONITOR */
2037 
2038 #if DEVELOPMENT || DEBUG
2039 	if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
2040 		kprintf("Kernel traces for pmap operations enabled\n");
2041 	}
2042 #endif
2043 
2044 	/*
2045 	 *	Initialize the kernel pmap.
2046 	 */
2047 	pmap_stamp = 1;
2048 #if ARM_PARAMETERIZED_PMAP
2049 	kernel_pmap->pmap_pt_attr = native_pt_attr;
2050 #endif /* ARM_PARAMETERIZED_PMAP */
2051 #if HAS_APPLE_PAC
2052 	kernel_pmap->disable_jop = 0;
2053 #endif /* HAS_APPLE_PAC */
2054 	kernel_pmap->tte = cpu_tte;
2055 	kernel_pmap->ttep = cpu_ttep;
2056 #if (__ARM_VMSA__ > 7)
2057 	kernel_pmap->min = UINT64_MAX - (1ULL << (64 - T1SZ_BOOT)) + 1;
2058 #else
2059 	kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2060 #endif
2061 	kernel_pmap->max = UINTPTR_MAX;
2062 	os_atomic_init(&kernel_pmap->ref_count, 1);
2063 #if XNU_MONITOR
2064 	os_atomic_init(&kernel_pmap->nested_count, 0);
2065 #endif
2066 	kernel_pmap->gc_status = 0;
2067 	kernel_pmap->nx_enabled = TRUE;
2068 #ifdef  __arm64__
2069 	kernel_pmap->is_64bit = TRUE;
2070 #else
2071 	kernel_pmap->is_64bit = FALSE;
2072 #endif
2073 	kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
2074 
2075 #if ARM_PARAMETERIZED_PMAP
2076 	kernel_pmap->pmap_pt_attr = native_pt_attr;
2077 #endif /* ARM_PARAMETERIZED_PMAP */
2078 
2079 	kernel_pmap->nested_region_addr = 0x0ULL;
2080 	kernel_pmap->nested_region_size = 0x0ULL;
2081 	kernel_pmap->nested_region_asid_bitmap = NULL;
2082 	kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
2083 	kernel_pmap->type = PMAP_TYPE_KERNEL;
2084 
2085 #if (__ARM_VMSA__ == 7)
2086 	kernel_pmap->tte_index_max = 4 * (ARM_PGBYTES / sizeof(tt_entry_t));
2087 #endif
2088 	kernel_pmap->hw_asid = 0;
2089 	kernel_pmap->sw_asid = 0;
2090 
2091 	pmap_lock_init(kernel_pmap);
2092 
2093 	pmap_max_asids = pmap_compute_max_asids();
2094 	pmap_asid_plru = (pmap_max_asids > MAX_HW_ASIDS);
2095 	PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru, sizeof(pmap_asid_plru));
2096 	/* Align the range of available hardware ASIDs to a multiple of 64 to enable the
2097 	 * masking used by the PLRU scheme.  This means we must handle the case in which
2098 	 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
2099 	_Static_assert(sizeof(asid_plru_bitmap[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
2100 	_Static_assert(((MAX_HW_ASIDS + 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
2101 	asid_chunk_size = (pmap_asid_plru ? (MAX_HW_ASIDS + 1) : MAX_HW_ASIDS);
2102 
2103 	const vm_size_t asid_table_size = sizeof(*asid_bitmap) * BITMAP_LEN(pmap_max_asids);
2104 
2105 	/**
2106 	 * Bootstrap the core pmap data structures (e.g., pv_head_table,
2107 	 * pp_attr_table, etc). This function will use `avail_start` to allocate
2108 	 * space for these data structures.
2109 	 * */
2110 	pmap_data_bootstrap();
2111 
2112 	/**
2113 	 * Don't make any assumptions about the alignment of avail_start before this
2114 	 * point (i.e., pmap_data_bootstrap() performs allocations).
2115 	 */
2116 	avail_start = PMAP_ALIGN(avail_start, __alignof(bitmap_t));
2117 
2118 	const pmap_paddr_t pmap_struct_start = avail_start;
2119 
2120 	asid_bitmap = (bitmap_t*)phystokv(avail_start);
2121 	avail_start = round_page(avail_start + asid_table_size);
2122 
2123 	memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
2124 
2125 	vm_first_phys = gPhysBase;
2126 	vm_last_phys = trunc_page(avail_end);
2127 
2128 	queue_init(&map_pmap_list);
2129 	queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
2130 	free_page_size_tt_list = TT_FREE_ENTRY_NULL;
2131 	free_page_size_tt_count = 0;
2132 	free_page_size_tt_max = 0;
2133 	free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
2134 	free_two_page_size_tt_count = 0;
2135 	free_two_page_size_tt_max = 0;
2136 	free_tt_list = TT_FREE_ENTRY_NULL;
2137 	free_tt_count = 0;
2138 	free_tt_max = 0;
2139 
2140 	virtual_space_start = vstart;
2141 	virtual_space_end = VM_MAX_KERNEL_ADDRESS;
2142 
2143 	bitmap_full(&asid_bitmap[0], pmap_max_asids);
2144 	bitmap_full(&asid_plru_bitmap[0], MAX_HW_ASIDS);
2145 	// Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
2146 	asid_plru_bitmap[MAX_HW_ASIDS >> 6] = ~(1ULL << 63);
2147 
2148 
2149 
2150 	if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
2151 		maxoffset = trunc_page(maxoffset);
2152 		if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
2153 		    && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
2154 			arm_pmap_max_offset_default = maxoffset;
2155 		}
2156 	}
2157 #if defined(__arm64__)
2158 	if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
2159 		maxoffset = trunc_page(maxoffset);
2160 		if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
2161 		    && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
2162 			arm64_pmap_max_offset_default = maxoffset;
2163 		}
2164 	}
2165 #endif
2166 
2167 	PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed, sizeof(pmap_panic_dev_wimg_on_managed));
2168 
2169 
2170 #if MACH_ASSERT
2171 	PE_parse_boot_argn("vm_footprint_suspend_allowed",
2172 	    &vm_footprint_suspend_allowed,
2173 	    sizeof(vm_footprint_suspend_allowed));
2174 #endif /* MACH_ASSERT */
2175 
2176 #if KASAN
2177 	/* Shadow the CPU copy windows, as they fall outside of the physical aperture */
2178 	kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
2179 #endif /* KASAN */
2180 
2181 	/**
2182 	 * Ensure that avail_start is always left on a page boundary. The calling
2183 	 * code might not perform any alignment before allocating page tables so
2184 	 * this is important.
2185 	 */
2186 	avail_start = round_page(avail_start);
2187 }
2188 
2189 #if XNU_MONITOR
2190 
2191 static inline void
pa_set_range_monitor(pmap_paddr_t start_pa,pmap_paddr_t end_pa)2192 pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
2193 {
2194 	pmap_paddr_t cur_pa;
2195 	for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
2196 		assert(pa_valid(cur_pa));
2197 		ppattr_pa_set_monitor(cur_pa);
2198 	}
2199 }
2200 
2201 void
pa_set_range_xprr_perm(pmap_paddr_t start_pa,pmap_paddr_t end_pa,unsigned int expected_perm,unsigned int new_perm)2202 pa_set_range_xprr_perm(pmap_paddr_t start_pa,
2203     pmap_paddr_t end_pa,
2204     unsigned int expected_perm,
2205     unsigned int new_perm)
2206 {
2207 	vm_offset_t start_va = phystokv(start_pa);
2208 	vm_offset_t end_va = start_va + (end_pa - start_pa);
2209 
2210 	pa_set_range_monitor(start_pa, end_pa);
2211 	pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
2212 }
2213 
2214 static void
pmap_lockdown_kc(void)2215 pmap_lockdown_kc(void)
2216 {
2217 	extern vm_offset_t vm_kernelcache_base;
2218 	extern vm_offset_t vm_kernelcache_top;
2219 	pmap_paddr_t start_pa = kvtophys_nofail(vm_kernelcache_base);
2220 	pmap_paddr_t end_pa = start_pa + (vm_kernelcache_top - vm_kernelcache_base);
2221 	pmap_paddr_t cur_pa = start_pa;
2222 	vm_offset_t cur_va = vm_kernelcache_base;
2223 	while (cur_pa < end_pa) {
2224 		vm_size_t range_size = end_pa - cur_pa;
2225 		vm_offset_t ptov_va = phystokv_range(cur_pa, &range_size);
2226 		if (ptov_va != cur_va) {
2227 			/*
2228 			 * If the physical address maps back to a virtual address that is non-linear
2229 			 * w.r.t. the kernelcache, that means it corresponds to memory that will be
2230 			 * reclaimed by the OS and should therefore not be locked down.
2231 			 */
2232 			cur_pa += range_size;
2233 			cur_va += range_size;
2234 			continue;
2235 		}
2236 		unsigned int pai = pa_index(cur_pa);
2237 		pv_entry_t **pv_h  = pai_to_pvh(pai);
2238 
2239 		vm_offset_t pvh_flags = pvh_get_flags(pv_h);
2240 
2241 		if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN_MASK)) {
2242 			panic("pai %d already locked down", pai);
2243 		}
2244 		pvh_set_flags(pv_h, pvh_flags | PVH_FLAG_LOCKDOWN_KC);
2245 		cur_pa += ARM_PGBYTES;
2246 		cur_va += ARM_PGBYTES;
2247 	}
2248 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2249 	extern uint64_t ctrr_ro_test;
2250 	extern uint64_t ctrr_nx_test;
2251 	pmap_paddr_t exclude_pages[] = {kvtophys_nofail((vm_offset_t)&ctrr_ro_test), kvtophys_nofail((vm_offset_t)&ctrr_nx_test)};
2252 	for (unsigned i = 0; i < (sizeof(exclude_pages) / sizeof(exclude_pages[0])); ++i) {
2253 		pv_entry_t **pv_h  = pai_to_pvh(pa_index(exclude_pages[i]));
2254 		pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_LOCKDOWN_KC);
2255 	}
2256 #endif
2257 }
2258 
2259 void
pmap_static_allocations_done(void)2260 pmap_static_allocations_done(void)
2261 {
2262 	pmap_paddr_t monitor_start_pa;
2263 	pmap_paddr_t monitor_end_pa;
2264 
2265 	/*
2266 	 * Protect the bootstrap (V=P and V->P) page tables.
2267 	 *
2268 	 * These bootstrap allocations will be used primarily for page tables.
2269 	 * If we wish to secure the page tables, we need to start by marking
2270 	 * these bootstrap allocations as pages that we want to protect.
2271 	 */
2272 	monitor_start_pa = kvtophys_nofail((vm_offset_t)&bootstrap_pagetables);
2273 	monitor_end_pa = monitor_start_pa + BOOTSTRAP_TABLE_SIZE;
2274 
2275 	/* The bootstrap page tables are mapped RW at boostrap. */
2276 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RO_PERM);
2277 
2278 	/*
2279 	 * We use avail_start as a pointer to the first address that has not
2280 	 * been reserved for bootstrap, so we know which pages to give to the
2281 	 * virtual memory layer.
2282 	 */
2283 	monitor_start_pa = BootArgs->topOfKernelData;
2284 	monitor_end_pa = avail_start;
2285 
2286 	/* The other bootstrap allocations are mapped RW at bootstrap. */
2287 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2288 
2289 	/*
2290 	 * The RO page tables are mapped RW in arm_vm_init() and later restricted
2291 	 * to RO in arm_vm_prot_finalize(), which is called after this function.
2292 	 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
2293 	 * they can't be allocated for other uses.  We don't need a special xPRR
2294 	 * protection index, as there is no PPL_RO index, and these pages are ultimately
2295 	 * protected by KTRR/CTRR.  Furthermore, use of PPL_RW for these pages would
2296 	 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
2297 	 * lookup table index to USER_XO before APRR is applied, leading the hardware
2298 	 * to believe we are dealing with an user XO page upon performing a translation.
2299 	 */
2300 	monitor_start_pa = kvtophys_nofail((vm_offset_t)&ropagetable_begin);
2301 	monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
2302 	pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
2303 
2304 	monitor_start_pa = kvtophys_nofail(segPPLDATAB);
2305 	monitor_end_pa = monitor_start_pa + segSizePPLDATA;
2306 
2307 	/* PPL data is RW for the PPL, RO for the kernel. */
2308 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2309 
2310 	monitor_start_pa = kvtophys_nofail(segPPLTEXTB);
2311 	monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
2312 
2313 	/* PPL text is RX for the PPL, RO for the kernel. */
2314 	pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
2315 
2316 
2317 	/*
2318 	 * In order to support DTrace, the save areas for the PPL must be
2319 	 * writable.  This is due to the fact that DTrace will try to update
2320 	 * register state.
2321 	 */
2322 	if (pmap_ppl_disable) {
2323 		vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
2324 		vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
2325 
2326 		pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
2327 	}
2328 
2329 
2330 	if (segSizePPLDATACONST > 0) {
2331 		monitor_start_pa = kvtophys_nofail(segPPLDATACONSTB);
2332 		monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
2333 
2334 		pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
2335 	}
2336 
2337 	/*
2338 	 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
2339 	 * precaution.  The real RW mappings are at a different location with guard pages.
2340 	 */
2341 	pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
2342 
2343 	/* Prevent remapping of the kernelcache */
2344 	pmap_lockdown_kc();
2345 }
2346 
2347 void
pmap_lockdown_ppl(void)2348 pmap_lockdown_ppl(void)
2349 {
2350 	/* Mark the PPL as being locked down. */
2351 
2352 #error "XPRR configuration error"
2353 }
2354 #endif /* XNU_MONITOR */
2355 
2356 void
pmap_virtual_space(vm_offset_t * startp,vm_offset_t * endp)2357 pmap_virtual_space(
2358 	vm_offset_t *startp,
2359 	vm_offset_t *endp
2360 	)
2361 {
2362 	*startp = virtual_space_start;
2363 	*endp = virtual_space_end;
2364 }
2365 
2366 
2367 boolean_t
pmap_virtual_region(unsigned int region_select,vm_map_offset_t * startp,vm_map_size_t * size)2368 pmap_virtual_region(
2369 	unsigned int region_select,
2370 	vm_map_offset_t *startp,
2371 	vm_map_size_t *size
2372 	)
2373 {
2374 	boolean_t       ret = FALSE;
2375 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2376 	if (region_select == 0) {
2377 		/*
2378 		 * In this config, the bootstrap mappings should occupy their own L2
2379 		 * TTs, as they should be immutable after boot.  Having the associated
2380 		 * TTEs and PTEs in their own pages allows us to lock down those pages,
2381 		 * while allowing the rest of the kernel address range to be remapped.
2382 		 */
2383 #if     (__ARM_VMSA__ > 7)
2384 		*startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
2385 #else
2386 #error Unsupported configuration
2387 #endif
2388 #if defined(ARM_LARGE_MEMORY)
2389 		*size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
2390 #else
2391 		*size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
2392 #endif
2393 		ret = TRUE;
2394 	}
2395 #else /* !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)) */
2396 #if defined(ARM_LARGE_MEMORY)
2397 	/* For large memory systems with no KTRR/CTRR such as virtual machines */
2398 #if     (__ARM_VMSA__ > 7)
2399 	*startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
2400 #else
2401 #error Unsupported configuration
2402 #endif
2403 	if (region_select == 0) {
2404 		*size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
2405 		ret = TRUE;
2406 	}
2407 #else /* !defined(ARM_LARGE_MEMORY) */
2408 #if     (__ARM_VMSA__ > 7)
2409 	unsigned long low_global_vr_mask = 0;
2410 	vm_map_size_t low_global_vr_size = 0;
2411 #endif
2412 
2413 	if (region_select == 0) {
2414 #if     (__ARM_VMSA__ == 7)
2415 		*startp = gVirtBase & 0xFFC00000;
2416 		*size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
2417 #else
2418 		/* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
2419 		if (!TEST_PAGE_SIZE_4K) {
2420 			*startp = gVirtBase & 0xFFFFFFFFFE000000;
2421 			*size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
2422 		} else {
2423 			*startp = gVirtBase & 0xFFFFFFFFFF800000;
2424 			*size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
2425 		}
2426 #endif
2427 		ret = TRUE;
2428 	}
2429 	if (region_select == 1) {
2430 		*startp = VREGION1_START;
2431 		*size = VREGION1_SIZE;
2432 		ret = TRUE;
2433 	}
2434 #if     (__ARM_VMSA__ > 7)
2435 	/* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
2436 	if (!TEST_PAGE_SIZE_4K) {
2437 		low_global_vr_mask = 0xFFFFFFFFFE000000;
2438 		low_global_vr_size = 0x2000000;
2439 	} else {
2440 		low_global_vr_mask = 0xFFFFFFFFFF800000;
2441 		low_global_vr_size = 0x800000;
2442 	}
2443 
2444 	if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
2445 		*startp = LOW_GLOBAL_BASE_ADDRESS;
2446 		*size = low_global_vr_size;
2447 		ret = TRUE;
2448 	}
2449 
2450 	if (region_select == 3) {
2451 		/* In this config, we allow the bootstrap mappings to occupy the same
2452 		 * page table pages as the heap.
2453 		 */
2454 		*startp = VM_MIN_KERNEL_ADDRESS;
2455 		*size = LOW_GLOBAL_BASE_ADDRESS - *startp;
2456 		ret = TRUE;
2457 	}
2458 #endif
2459 #endif /* defined(ARM_LARGE_MEMORY) */
2460 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
2461 	return ret;
2462 }
2463 
2464 /*
2465  * Routines to track and allocate physical pages during early boot.
2466  * On most systems that memory runs from first_avail through to avail_end
2467  * with no gaps.
2468  *
2469  * However if the system supports ECC and bad_ram_pages_count > 0, we
2470  * need to be careful and skip those pages.
2471  */
2472 static unsigned int avail_page_count = 0;
2473 static bool need_ram_ranges_init = true;
2474 
2475 #if defined(__arm64__)
2476 pmap_paddr_t *bad_ram_pages = NULL;
2477 unsigned int bad_ram_pages_count = 0;
2478 
2479 /*
2480  * We use this sub-range of bad_ram_pages for pmap_next_page()
2481  */
2482 static pmap_paddr_t *skip_pages;
2483 static unsigned int skip_pages_count = 0;
2484 
2485 #define MAX_BAD_RAM_PAGE_COUNT 64
2486 static pmap_paddr_t bad_ram_pages_arr[MAX_BAD_RAM_PAGE_COUNT];
2487 
2488 /*
2489  * XXX - temporary code to get the bad pages array from boot-args.
2490  * expects a comma separated list of offsets from the start
2491  * of physical memory to be considered bad.
2492  *
2493  * HERE JOE -- will eventually be replaced by data provided by iboot
2494  */
2495 static void
parse_bad_ram_pages_boot_arg(void)2496 parse_bad_ram_pages_boot_arg(void)
2497 {
2498 	char buf[256] = {0};
2499 	char *s = buf;
2500 	char *end;
2501 	int count = 0;
2502 	pmap_paddr_t num;
2503 	extern uint64_t strtouq(const char *, char **, int);
2504 
2505 	if (!PE_parse_boot_arg_str("bad_ram_pages", buf, sizeof(buf))) {
2506 		goto done;
2507 	}
2508 
2509 	while (*s && count < MAX_BAD_RAM_PAGE_COUNT) {
2510 		num = (pmap_paddr_t)strtouq(s, &end, 0);
2511 		if (num == 0) {
2512 			break;
2513 		}
2514 		num &= ~PAGE_MASK;
2515 
2516 		bad_ram_pages_arr[count++] = gDramBase + num;
2517 
2518 		if (*end != ',') {
2519 			break;
2520 		}
2521 
2522 		s = end + 1;
2523 	}
2524 
2525 done:
2526 	bad_ram_pages = bad_ram_pages_arr;
2527 	bad_ram_pages_count = count;
2528 }
2529 
2530 /*
2531  * Comparison routine for qsort of array of physical addresses.
2532  */
2533 static int
pmap_paddr_cmp(void * a,void * b)2534 pmap_paddr_cmp(void *a, void *b)
2535 {
2536 	pmap_paddr_t *x = a;
2537 	pmap_paddr_t *y = b;
2538 	if (*x < *y) {
2539 		return -1;
2540 	}
2541 	return *x > *y;
2542 }
2543 #endif /* defined(__arm64__) */
2544 
2545 /*
2546  * Look up ppn in the sorted bad_ram_pages array.
2547  */
2548 bool
pmap_is_bad_ram(__unused ppnum_t ppn)2549 pmap_is_bad_ram(__unused ppnum_t ppn)
2550 {
2551 #if defined(__arm64__)
2552 	pmap_paddr_t pa = ptoa(ppn);
2553 	int low = 0;
2554 	int high = bad_ram_pages_count - 1;
2555 	int mid;
2556 
2557 	while (low <= high) {
2558 		mid = (low + high) / 2;
2559 		if (bad_ram_pages[mid] < pa) {
2560 			low = mid + 1;
2561 		} else if (bad_ram_pages[mid] > pa) {
2562 			high = mid - 1;
2563 		} else {
2564 			return true;
2565 		}
2566 	}
2567 #endif /* defined(__arm64__) */
2568 	return false;
2569 }
2570 
2571 /*
2572  * Initialize the count of available pages. If we have bad_ram_pages, then sort the list of them.
2573  * No lock needed here, as this code is called while kernel boot up is single threaded.
2574  */
2575 static void
initialize_ram_ranges(void)2576 initialize_ram_ranges(void)
2577 {
2578 	pmap_paddr_t first = first_avail;
2579 	pmap_paddr_t end = avail_end;
2580 
2581 	assert(first <= end);
2582 	assert(first == (first & ~PAGE_MASK));
2583 	assert(end == (end & ~PAGE_MASK));
2584 	avail_page_count = atop(end - first);
2585 
2586 #if defined(__arm64__)
2587 	/*
2588 	 * XXX Temporary code for testing, until there is iboot support
2589 	 *
2590 	 * Parse a list of known bad pages from a boot-args.
2591 	 */
2592 	parse_bad_ram_pages_boot_arg();
2593 
2594 	/*
2595 	 * Sort and filter the bad pages list and adjust avail_page_count.
2596 	 */
2597 	if (bad_ram_pages_count != 0) {
2598 		qsort(bad_ram_pages, bad_ram_pages_count, sizeof(*bad_ram_pages), (cmpfunc_t)pmap_paddr_cmp);
2599 		skip_pages = bad_ram_pages;
2600 		skip_pages_count = bad_ram_pages_count;
2601 
2602 		/* ignore any pages before first */
2603 		while (skip_pages_count > 0 && skip_pages[0] < first) {
2604 			--skip_pages_count;
2605 			++skip_pages;
2606 		}
2607 
2608 		/* ignore any pages at or after end */
2609 		while (skip_pages_count > 0 && skip_pages[skip_pages_count - 1] >= end) {
2610 			--skip_pages_count;
2611 		}
2612 
2613 		avail_page_count -= skip_pages_count;
2614 	}
2615 #endif /* defined(__arm64__) */
2616 	need_ram_ranges_init = false;
2617 }
2618 
2619 unsigned int
pmap_free_pages(void)2620 pmap_free_pages(
2621 	void)
2622 {
2623 	if (need_ram_ranges_init) {
2624 		initialize_ram_ranges();
2625 	}
2626 	return avail_page_count;
2627 }
2628 
2629 unsigned int
pmap_free_pages_span(void)2630 pmap_free_pages_span(
2631 	void)
2632 {
2633 	if (need_ram_ranges_init) {
2634 		initialize_ram_ranges();
2635 	}
2636 	return (unsigned int)atop(avail_end - first_avail);
2637 }
2638 
2639 
2640 boolean_t
pmap_next_page_hi(ppnum_t * pnum,__unused boolean_t might_free)2641 pmap_next_page_hi(
2642 	ppnum_t            * pnum,
2643 	__unused boolean_t might_free)
2644 {
2645 	return pmap_next_page(pnum);
2646 }
2647 
2648 
2649 boolean_t
pmap_next_page(ppnum_t * pnum)2650 pmap_next_page(
2651 	ppnum_t *pnum)
2652 {
2653 	if (need_ram_ranges_init) {
2654 		initialize_ram_ranges();
2655 	}
2656 
2657 #if defined(__arm64__)
2658 	/*
2659 	 * Skip over any known bad pages.
2660 	 */
2661 	while (skip_pages_count > 0 && first_avail == skip_pages[0]) {
2662 		first_avail += PAGE_SIZE;
2663 		++skip_pages;
2664 		--skip_pages_count;
2665 	}
2666 #endif /* defined(__arm64__) */
2667 
2668 	if (first_avail != avail_end) {
2669 		*pnum = (ppnum_t)atop(first_avail);
2670 		first_avail += PAGE_SIZE;
2671 		assert(avail_page_count > 0);
2672 		--avail_page_count;
2673 		return TRUE;
2674 	}
2675 	assert(avail_page_count == 0);
2676 	return FALSE;
2677 }
2678 
2679 void
pmap_retire_page(__unused ppnum_t pnum)2680 pmap_retire_page(
2681 	__unused ppnum_t pnum)
2682 {
2683 	/* XXX Justin TBD - mark the page as unusable in pmap data structures */
2684 }
2685 
2686 
2687 /*
2688  *	Initialize the pmap module.
2689  *	Called by vm_init, to initialize any structures that the pmap
2690  *	system needs to map virtual memory.
2691  */
2692 void
pmap_init(void)2693 pmap_init(
2694 	void)
2695 {
2696 	/*
2697 	 *	Protect page zero in the kernel map.
2698 	 *	(can be overruled by permanent transltion
2699 	 *	table entries at page zero - see arm_vm_init).
2700 	 */
2701 	vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
2702 
2703 	pmap_initialized = TRUE;
2704 
2705 	/*
2706 	 *	Create the zone of physical maps
2707 	 *	and the physical-to-virtual entries.
2708 	 */
2709 	pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
2710 	    ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
2711 
2712 
2713 	/*
2714 	 *	Initialize the pmap object (for tracking the vm_page_t
2715 	 *	structures for pages we allocate to be page tables in
2716 	 *	pmap_expand().
2717 	 */
2718 	_vm_object_allocate(mem_size, pmap_object);
2719 	pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2720 
2721 	/*
2722 	 * The values of [hard_]maxproc may have been scaled, make sure
2723 	 * they are still less than the value of pmap_max_asids.
2724 	 */
2725 	if ((uint32_t)maxproc > pmap_max_asids) {
2726 		maxproc = pmap_max_asids;
2727 	}
2728 	if ((uint32_t)hard_maxproc > pmap_max_asids) {
2729 		hard_maxproc = pmap_max_asids;
2730 	}
2731 }
2732 
2733 /**
2734  * Verify that a given physical page contains no mappings (outside of the
2735  * default physical aperture mapping).
2736  *
2737  * @param ppnum Physical page number to check there are no mappings to.
2738  *
2739  * @return True if there are no mappings, false otherwise or if the page is not
2740  *         kernel-managed.
2741  */
2742 bool
pmap_verify_free(ppnum_t ppnum)2743 pmap_verify_free(ppnum_t ppnum)
2744 {
2745 	const pmap_paddr_t pa = ptoa(ppnum);
2746 
2747 	assert(pa != vm_page_fictitious_addr);
2748 
2749 	/* Only mappings to kernel-managed physical memory are tracked. */
2750 	if (!pa_valid(pa)) {
2751 		return false;
2752 	}
2753 
2754 	const unsigned int pai = pa_index(pa);
2755 	pv_entry_t **pvh = pai_to_pvh(pai);
2756 
2757 	return pvh_test_type(pvh, PVH_TYPE_NULL);
2758 }
2759 
2760 #if MACH_ASSERT
2761 /**
2762  * Verify that a given physical page contains no mappings (outside of the
2763  * default physical aperture mapping) and if it does, then panic.
2764  *
2765  * @note It's recommended to use pmap_verify_free() directly when operating in
2766  *       the PPL since the PVH lock isn't getting grabbed here (due to this code
2767  *       normally being called from outside of the PPL, and the pv_head_table
2768  *       can't be modified outside of the PPL).
2769  *
2770  * @param ppnum Physical page number to check there are no mappings to.
2771  */
2772 void
pmap_assert_free(ppnum_t ppnum)2773 pmap_assert_free(ppnum_t ppnum)
2774 {
2775 	const pmap_paddr_t pa = ptoa(ppnum);
2776 
2777 	/* Only mappings to kernel-managed physical memory are tracked. */
2778 	if (__probable(!pa_valid(pa) || pmap_verify_free(ppnum))) {
2779 		return;
2780 	}
2781 
2782 	const unsigned int pai = pa_index(pa);
2783 	pv_entry_t **pvh = pai_to_pvh(pai);
2784 
2785 	/**
2786 	 * This function is always called from outside of the PPL. Because of this,
2787 	 * the PVH entry can't be locked. This function is generally only called
2788 	 * before the VM reclaims a physical page and shouldn't be creating new
2789 	 * mappings. Even if a new mapping is created while parsing the hierarchy,
2790 	 * the worst case is that the system will panic in another way, and we were
2791 	 * already about to panic anyway.
2792 	 */
2793 
2794 	/**
2795 	 * Since pmap_verify_free() returned false, that means there is at least one
2796 	 * mapping left. Let's get some extra info on the first mapping we find to
2797 	 * dump in the panic string (the common case is that there is one spare
2798 	 * mapping that was never unmapped).
2799 	 */
2800 	pt_entry_t *first_ptep = PT_ENTRY_NULL;
2801 
2802 	if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
2803 		first_ptep = pvh_ptep(pvh);
2804 	} else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
2805 		pv_entry_t *pvep = pvh_pve_list(pvh);
2806 
2807 		/* Each PVE can contain multiple PTEs. Let's find the first one. */
2808 		for (int pve_ptep_idx = 0; pve_ptep_idx < PTE_PER_PVE; pve_ptep_idx++) {
2809 			first_ptep = pve_get_ptep(pvep, pve_ptep_idx);
2810 			if (first_ptep != PT_ENTRY_NULL) {
2811 				break;
2812 			}
2813 		}
2814 
2815 		/* The PVE should have at least one valid PTE. */
2816 		assert(first_ptep != PT_ENTRY_NULL);
2817 	} else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
2818 		panic("%s: Physical page is being used as a page table at PVH %p (pai: %d)",
2819 		    __func__, pvh, pai);
2820 	} else {
2821 		/**
2822 		 * The mapping disappeared between here and the pmap_verify_free() call.
2823 		 * The only way that can happen is if the VM was racing this call with
2824 		 * a call that unmaps PTEs. Operations on this page should not be
2825 		 * occurring at the same time as this check, and unfortunately we can't
2826 		 * lock the PVH entry to prevent it, so just panic instead.
2827 		 */
2828 		panic("%s: Mapping was detected but is now gone. Is the VM racing this "
2829 		    "call with an operation that unmaps PTEs? PVH %p (pai: %d)",
2830 		    __func__, pvh, pai);
2831 	}
2832 
2833 	/* Panic with a unique string identifying the first bad mapping and owner. */
2834 	{
2835 		/* First PTE is mapped by the main CPUs. */
2836 		pmap_t pmap = ptep_get_pmap(first_ptep);
2837 		const char *type = (pmap == kernel_pmap) ? "Kernel" : "User";
2838 
2839 		panic("%s: Found at least one mapping to %#llx. First PTEP (%p) is a "
2840 		    "%s CPU mapping (pmap: %p)",
2841 		    __func__, (uint64_t)pa, first_ptep, type, pmap);
2842 	}
2843 }
2844 #endif
2845 
2846 
2847 static vm_size_t
pmap_root_alloc_size(pmap_t pmap)2848 pmap_root_alloc_size(pmap_t pmap)
2849 {
2850 #if (__ARM_VMSA__ > 7)
2851 #pragma unused(pmap)
2852 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2853 	unsigned int root_level = pt_attr_root_level(pt_attr);
2854 	return ((pt_attr_ln_index_mask(pt_attr, root_level) >> pt_attr_ln_shift(pt_attr, root_level)) + 1) * sizeof(tt_entry_t);
2855 #else
2856 	(void)pmap;
2857 	return PMAP_ROOT_ALLOC_SIZE;
2858 #endif
2859 }
2860 
2861 /*
2862  *	Create and return a physical map.
2863  *
2864  *	If the size specified for the map
2865  *	is zero, the map is an actual physical
2866  *	map, and may be referenced by the
2867  *	hardware.
2868  *
2869  *	If the size specified is non-zero,
2870  *	the map will be used in software only, and
2871  *	is bounded by that size.
2872  */
2873 MARK_AS_PMAP_TEXT pmap_t
pmap_create_options_internal(ledger_t ledger,vm_map_size_t size,unsigned int flags,kern_return_t * kr)2874 pmap_create_options_internal(
2875 	ledger_t ledger,
2876 	vm_map_size_t size,
2877 	unsigned int flags,
2878 	kern_return_t *kr)
2879 {
2880 	unsigned        i;
2881 	unsigned        tte_index_max;
2882 	pmap_t          p;
2883 	bool is_64bit = flags & PMAP_CREATE_64BIT;
2884 #if defined(HAS_APPLE_PAC)
2885 	bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
2886 #endif /* defined(HAS_APPLE_PAC) */
2887 	kern_return_t   local_kr = KERN_SUCCESS;
2888 
2889 	/*
2890 	 *	A software use-only map doesn't even need a pmap.
2891 	 */
2892 	if (size != 0) {
2893 		return PMAP_NULL;
2894 	}
2895 
2896 	if (0 != (flags & ~PMAP_CREATE_KNOWN_FLAGS)) {
2897 		return PMAP_NULL;
2898 	}
2899 
2900 #if XNU_MONITOR
2901 	if ((local_kr = pmap_alloc_pmap(&p)) != KERN_SUCCESS) {
2902 		goto pmap_create_fail;
2903 	}
2904 
2905 	assert(p != PMAP_NULL);
2906 
2907 	if (ledger) {
2908 		pmap_ledger_validate(ledger);
2909 		pmap_ledger_retain(ledger);
2910 	}
2911 #else
2912 	/*
2913 	 *	Allocate a pmap struct from the pmap_zone.  Then allocate
2914 	 *	the translation table of the right size for the pmap.
2915 	 */
2916 	if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
2917 		local_kr = KERN_RESOURCE_SHORTAGE;
2918 		goto pmap_create_fail;
2919 	}
2920 #endif
2921 
2922 	p->ledger = ledger;
2923 
2924 
2925 	p->pmap_vm_map_cs_enforced = false;
2926 
2927 	p->min = 0;
2928 	if (flags & PMAP_CREATE_64BIT) {
2929 	} else {
2930 	}
2931 
2932 #if defined(HAS_APPLE_PAC)
2933 	p->disable_jop = disable_jop;
2934 #endif /* defined(HAS_APPLE_PAC) */
2935 
2936 	p->nested_region_true_start = 0;
2937 	p->nested_region_true_end = ~0;
2938 
2939 	p->gc_status = 0;
2940 	p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
2941 	p->nx_enabled = true;
2942 	p->is_64bit = is_64bit;
2943 	p->nested_pmap = PMAP_NULL;
2944 	p->type = PMAP_TYPE_USER;
2945 
2946 #if ARM_PARAMETERIZED_PMAP
2947 	/* Default to the native pt_attr */
2948 	p->pmap_pt_attr = native_pt_attr;
2949 #endif /* ARM_PARAMETERIZED_PMAP */
2950 #if __ARM_MIXED_PAGE_SIZE__
2951 	if (flags & PMAP_CREATE_FORCE_4K_PAGES) {
2952 		p->pmap_pt_attr = &pmap_pt_attr_4k;
2953 	}
2954 #endif /* __ARM_MIXED_PAGE_SIZE__ */
2955 	p->max = pmap_user_va_size(p);
2956 
2957 	if (!pmap_get_pt_ops(p)->alloc_id(p)) {
2958 		local_kr = KERN_NO_SPACE;
2959 		goto id_alloc_fail;
2960 	}
2961 
2962 	pmap_lock_init(p);
2963 
2964 	p->tt_entry_free = (tt_entry_t *)0;
2965 	tte_index_max = ((unsigned)pmap_root_alloc_size(p) / sizeof(tt_entry_t));
2966 
2967 #if     (__ARM_VMSA__ == 7)
2968 	p->tte_index_max = tte_index_max;
2969 #endif
2970 
2971 #if XNU_MONITOR
2972 	p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), PMAP_TT_ALLOCATE_NOWAIT);
2973 #else
2974 	p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), 0);
2975 #endif
2976 	if (!(p->tte)) {
2977 		local_kr = KERN_RESOURCE_SHORTAGE;
2978 		goto tt1_alloc_fail;
2979 	}
2980 
2981 	p->ttep = ml_static_vtop((vm_offset_t)p->tte);
2982 	PMAP_TRACE(4, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
2983 
2984 	/* nullify the translation table */
2985 	for (i = 0; i < tte_index_max; i++) {
2986 		p->tte[i] = ARM_TTE_TYPE_FAULT;
2987 	}
2988 
2989 	FLUSH_PTE();
2990 
2991 	/*
2992 	 *  initialize the rest of the structure
2993 	 */
2994 	p->nested_region_addr = 0x0ULL;
2995 	p->nested_region_size = 0x0ULL;
2996 	p->nested_region_asid_bitmap = NULL;
2997 	p->nested_region_asid_bitmap_size = 0x0UL;
2998 
2999 	p->nested_has_no_bounds_ref = false;
3000 	p->nested_no_bounds_refcnt = 0;
3001 	p->nested_bounds_set = false;
3002 
3003 
3004 #if MACH_ASSERT
3005 	p->pmap_stats_assert = TRUE;
3006 	p->pmap_pid = 0;
3007 	strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
3008 #endif /* MACH_ASSERT */
3009 #if DEVELOPMENT || DEBUG
3010 	p->footprint_was_suspended = FALSE;
3011 #endif /* DEVELOPMENT || DEBUG */
3012 
3013 #if XNU_MONITOR
3014 	os_atomic_init(&p->nested_count, 0);
3015 	assert(os_atomic_load(&p->ref_count, relaxed) == 0);
3016 	/* Ensure prior updates to the new pmap are visible before the non-zero ref_count is visible */
3017 	os_atomic_thread_fence(release);
3018 #endif
3019 	os_atomic_init(&p->ref_count, 1);
3020 	pmap_simple_lock(&pmaps_lock);
3021 	queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3022 	pmap_simple_unlock(&pmaps_lock);
3023 
3024 	return p;
3025 
3026 tt1_alloc_fail:
3027 	pmap_get_pt_ops(p)->free_id(p);
3028 id_alloc_fail:
3029 #if XNU_MONITOR
3030 	pmap_free_pmap(p);
3031 
3032 	if (ledger) {
3033 		pmap_ledger_release(ledger);
3034 	}
3035 #else
3036 	zfree(pmap_zone, p);
3037 #endif
3038 pmap_create_fail:
3039 #if XNU_MONITOR
3040 	pmap_pin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
3041 #endif
3042 	*kr = local_kr;
3043 #if XNU_MONITOR
3044 	pmap_unpin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
3045 #endif
3046 	return PMAP_NULL;
3047 }
3048 
3049 pmap_t
pmap_create_options(ledger_t ledger,vm_map_size_t size,unsigned int flags)3050 pmap_create_options(
3051 	ledger_t ledger,
3052 	vm_map_size_t size,
3053 	unsigned int flags)
3054 {
3055 	pmap_t pmap;
3056 	kern_return_t kr = KERN_SUCCESS;
3057 
3058 	PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
3059 
3060 	ledger_reference(ledger);
3061 
3062 #if XNU_MONITOR
3063 	for (;;) {
3064 		pmap = pmap_create_options_ppl(ledger, size, flags, &kr);
3065 		if (kr != KERN_RESOURCE_SHORTAGE) {
3066 			break;
3067 		}
3068 		assert(pmap == PMAP_NULL);
3069 		pmap_alloc_page_for_ppl(0);
3070 		kr = KERN_SUCCESS;
3071 	}
3072 #else
3073 	pmap = pmap_create_options_internal(ledger, size, flags, &kr);
3074 #endif
3075 
3076 	if (pmap == PMAP_NULL) {
3077 		ledger_dereference(ledger);
3078 	}
3079 
3080 	PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3081 
3082 	return pmap;
3083 }
3084 
3085 #if XNU_MONITOR
3086 /*
3087  * This symbol remains in place when the PPL is enabled so that the dispatch
3088  * table does not change from development to release configurations.
3089  */
3090 #endif
3091 #if MACH_ASSERT || XNU_MONITOR
3092 MARK_AS_PMAP_TEXT void
pmap_set_process_internal(__unused pmap_t pmap,__unused int pid,__unused char * procname)3093 pmap_set_process_internal(
3094 	__unused pmap_t pmap,
3095 	__unused int pid,
3096 	__unused char *procname)
3097 {
3098 #if MACH_ASSERT
3099 	if (pmap == NULL) {
3100 		return;
3101 	}
3102 
3103 	validate_pmap_mutable(pmap);
3104 
3105 	pmap->pmap_pid = pid;
3106 	strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3107 	if (pmap_ledgers_panic_leeway) {
3108 		/*
3109 		 * XXX FBDP
3110 		 * Some processes somehow trigger some issues that make
3111 		 * the pmap stats and ledgers go off track, causing
3112 		 * some assertion failures and ledger panics.
3113 		 * Turn off the sanity checks if we allow some ledger leeway
3114 		 * because of that.  We'll still do a final check in
3115 		 * pmap_check_ledgers() for discrepancies larger than the
3116 		 * allowed leeway after the address space has been fully
3117 		 * cleaned up.
3118 		 */
3119 		pmap->pmap_stats_assert = FALSE;
3120 		ledger_disable_panic_on_negative(pmap->ledger,
3121 		    task_ledgers.phys_footprint);
3122 		ledger_disable_panic_on_negative(pmap->ledger,
3123 		    task_ledgers.internal);
3124 		ledger_disable_panic_on_negative(pmap->ledger,
3125 		    task_ledgers.internal_compressed);
3126 		ledger_disable_panic_on_negative(pmap->ledger,
3127 		    task_ledgers.iokit_mapped);
3128 		ledger_disable_panic_on_negative(pmap->ledger,
3129 		    task_ledgers.alternate_accounting);
3130 		ledger_disable_panic_on_negative(pmap->ledger,
3131 		    task_ledgers.alternate_accounting_compressed);
3132 	}
3133 #endif /* MACH_ASSERT */
3134 }
3135 #endif /* MACH_ASSERT || XNU_MONITOR */
3136 
3137 #if MACH_ASSERT
3138 void
pmap_set_process(pmap_t pmap,int pid,char * procname)3139 pmap_set_process(
3140 	pmap_t pmap,
3141 	int pid,
3142 	char *procname)
3143 {
3144 #if XNU_MONITOR
3145 	pmap_set_process_ppl(pmap, pid, procname);
3146 #else
3147 	pmap_set_process_internal(pmap, pid, procname);
3148 #endif
3149 }
3150 #endif /* MACH_ASSERT */
3151 
3152 #if (__ARM_VMSA__ > 7)
3153 /*
3154  * pmap_deallocate_all_leaf_tts:
3155  *
3156  * Recursive function for deallocating all leaf TTEs.  Walks the given TT,
3157  * removing and deallocating all TTEs.
3158  */
3159 MARK_AS_PMAP_TEXT static void
pmap_deallocate_all_leaf_tts(pmap_t pmap,tt_entry_t * first_ttep,unsigned level)3160 pmap_deallocate_all_leaf_tts(pmap_t pmap, tt_entry_t * first_ttep, unsigned level)
3161 {
3162 	tt_entry_t tte = ARM_TTE_EMPTY;
3163 	tt_entry_t * ttep = NULL;
3164 	tt_entry_t * last_ttep = NULL;
3165 
3166 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3167 
3168 	assert(level < pt_attr_leaf_level(pt_attr));
3169 
3170 	last_ttep = &first_ttep[ttn_index(pt_attr, ~0, level)];
3171 
3172 	for (ttep = first_ttep; ttep <= last_ttep; ttep++) {
3173 		tte = *ttep;
3174 
3175 		if (!(tte & ARM_TTE_VALID)) {
3176 			continue;
3177 		}
3178 
3179 		if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
3180 			panic("%s: found block mapping, ttep=%p, tte=%p, "
3181 			    "pmap=%p, first_ttep=%p, level=%u",
3182 			    __FUNCTION__, ttep, (void *)tte,
3183 			    pmap, first_ttep, level);
3184 		}
3185 
3186 		/* Must be valid, type table */
3187 		if (level < pt_attr_twig_level(pt_attr)) {
3188 			/* If we haven't reached the twig level, recurse to the next level. */
3189 			pmap_deallocate_all_leaf_tts(pmap, (tt_entry_t *)phystokv((tte) & ARM_TTE_TABLE_MASK), level + 1);
3190 		}
3191 
3192 		/* Remove the TTE. */
3193 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3194 		pmap_tte_deallocate(pmap, 0, 0, false, ttep, level);
3195 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3196 	}
3197 }
3198 #endif /* (__ARM_VMSA__ > 7) */
3199 
3200 /*
3201  * We maintain stats and ledgers so that a task's physical footprint is:
3202  * phys_footprint = ((internal - alternate_accounting)
3203  *                   + (internal_compressed - alternate_accounting_compressed)
3204  *                   + iokit_mapped
3205  *                   + purgeable_nonvolatile
3206  *                   + purgeable_nonvolatile_compressed
3207  *                   + page_table)
3208  * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3209  */
3210 
3211 /*
3212  *	Retire the given physical map from service.
3213  *	Should only be called if the map contains
3214  *	no valid mappings.
3215  */
3216 MARK_AS_PMAP_TEXT void
pmap_destroy_internal(pmap_t pmap)3217 pmap_destroy_internal(
3218 	pmap_t pmap)
3219 {
3220 	if (pmap == PMAP_NULL) {
3221 		return;
3222 	}
3223 
3224 	validate_pmap(pmap);
3225 
3226 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3227 
3228 	int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
3229 	if (ref_count > 0) {
3230 		return;
3231 	} else if (__improbable(ref_count < 0)) {
3232 		panic("pmap %p: refcount underflow", pmap);
3233 	} else if (__improbable(pmap == kernel_pmap)) {
3234 		panic("pmap %p: attempt to destroy kernel pmap", pmap);
3235 	} else if (__improbable(pmap->type == PMAP_TYPE_COMMPAGE)) {
3236 		panic("pmap %p: attempt to destroy commpage pmap", pmap);
3237 	}
3238 
3239 #if XNU_MONITOR
3240 	/*
3241 	 * Issue a store-load barrier to ensure the checks of nested_count and the per-CPU
3242 	 * pmaps below will not be speculated ahead of the decrement of ref_count above.
3243 	 * That ensures that if the pmap is currently in use elsewhere, this path will
3244 	 * either observe it in use and panic, or PMAP_VALIDATE_MUTABLE will observe a
3245 	 * ref_count of 0 and panic.
3246 	 */
3247 	os_atomic_thread_fence(seq_cst);
3248 	if (__improbable(os_atomic_load(&pmap->nested_count, relaxed) != 0)) {
3249 		panic("pmap %p: attempt to destroy while nested", pmap);
3250 	}
3251 	const int max_cpu = ml_get_max_cpu_number();
3252 	for (unsigned int i = 0; i <= max_cpu; ++i) {
3253 		const pmap_cpu_data_t *cpu_data = pmap_get_remote_cpu_data(i);
3254 		if (cpu_data == NULL) {
3255 			continue;
3256 		}
3257 		if (__improbable(os_atomic_load(&cpu_data->inflight_pmap, relaxed) == pmap)) {
3258 			panic("pmap %p: attempting to destroy while in-flight on cpu %llu", pmap, (uint64_t)i);
3259 		} else if (__improbable(os_atomic_load(&cpu_data->active_pmap, relaxed) == pmap)) {
3260 			panic("pmap %p: attempting to destroy while active on cpu %llu", pmap, (uint64_t)i);
3261 		}
3262 	}
3263 #endif
3264 #if (__ARM_VMSA__ > 7)
3265 	pmap_unmap_sharedpage(pmap);
3266 #endif /* (__ARM_VMSA__ > 7) */
3267 
3268 	pmap_simple_lock(&pmaps_lock);
3269 #if !XNU_MONITOR
3270 	while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3271 		pmap->gc_status |= PMAP_GC_WAIT;
3272 		assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
3273 		pmap_simple_unlock(&pmaps_lock);
3274 		(void) thread_block(THREAD_CONTINUE_NULL);
3275 		pmap_simple_lock(&pmaps_lock);
3276 	}
3277 #endif /* !XNU_MONITOR */
3278 	queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3279 	pmap_simple_unlock(&pmaps_lock);
3280 
3281 	pmap_trim_self(pmap);
3282 
3283 	/*
3284 	 *	Free the memory maps, then the
3285 	 *	pmap structure.
3286 	 */
3287 #if (__ARM_VMSA__ == 7)
3288 	unsigned int i = 0;
3289 	pt_entry_t     *ttep;
3290 
3291 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3292 	for (i = 0; i < pmap->tte_index_max; i++) {
3293 		ttep = &pmap->tte[i];
3294 		if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3295 			pmap_tte_deallocate(pmap, 0, 0, false, ttep, PMAP_TT_L1_LEVEL);
3296 		}
3297 	}
3298 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3299 #else /* (__ARM_VMSA__ == 7) */
3300 	pmap_deallocate_all_leaf_tts(pmap, pmap->tte, pt_attr_root_level(pt_attr));
3301 #endif /* (__ARM_VMSA__ == 7) */
3302 
3303 
3304 
3305 	if (pmap->tte) {
3306 #if (__ARM_VMSA__ == 7)
3307 		pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
3308 		pmap->tte_index_max = 0;
3309 #else /* (__ARM_VMSA__ == 7) */
3310 		pmap_tt1_deallocate(pmap, pmap->tte, pmap_root_alloc_size(pmap), 0);
3311 #endif /* (__ARM_VMSA__ == 7) */
3312 		pmap->tte = (tt_entry_t *) NULL;
3313 		pmap->ttep = 0;
3314 	}
3315 
3316 	assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3317 
3318 	if (__improbable(pmap->type == PMAP_TYPE_NESTED)) {
3319 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(pmap->nested_region_addr, pmap->nested_region_size, pmap, false);
3320 		sync_tlb_flush();
3321 	} else {
3322 		pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
3323 		sync_tlb_flush();
3324 		/* return its asid to the pool */
3325 		pmap_get_pt_ops(pmap)->free_id(pmap);
3326 		if (pmap->nested_pmap != NULL) {
3327 #if XNU_MONITOR
3328 			os_atomic_dec(&pmap->nested_pmap->nested_count, relaxed);
3329 #endif
3330 			/* release the reference we hold on the nested pmap */
3331 			pmap_destroy_internal(pmap->nested_pmap);
3332 		}
3333 	}
3334 
3335 	pmap_check_ledgers(pmap);
3336 
3337 	if (pmap->nested_region_asid_bitmap) {
3338 #if XNU_MONITOR
3339 		pmap_pages_free(kvtophys_nofail((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
3340 #else
3341 		kfree_data(pmap->nested_region_asid_bitmap,
3342 		    pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
3343 #endif
3344 	}
3345 
3346 #if XNU_MONITOR
3347 	if (pmap->ledger) {
3348 		pmap_ledger_release(pmap->ledger);
3349 	}
3350 
3351 	pmap_lock_destroy(pmap);
3352 	pmap_free_pmap(pmap);
3353 #else
3354 	pmap_lock_destroy(pmap);
3355 	zfree(pmap_zone, pmap);
3356 #endif
3357 }
3358 
3359 void
pmap_destroy(pmap_t pmap)3360 pmap_destroy(
3361 	pmap_t pmap)
3362 {
3363 	PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3364 
3365 	ledger_t ledger = pmap->ledger;
3366 
3367 #if XNU_MONITOR
3368 	pmap_destroy_ppl(pmap);
3369 
3370 	pmap_ledger_check_balance(pmap);
3371 #else
3372 	pmap_destroy_internal(pmap);
3373 #endif
3374 
3375 	ledger_dereference(ledger);
3376 
3377 	PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
3378 }
3379 
3380 
3381 /*
3382  *	Add a reference to the specified pmap.
3383  */
3384 MARK_AS_PMAP_TEXT void
pmap_reference_internal(pmap_t pmap)3385 pmap_reference_internal(
3386 	pmap_t pmap)
3387 {
3388 	if (pmap != PMAP_NULL) {
3389 		validate_pmap_mutable(pmap);
3390 		os_atomic_inc(&pmap->ref_count, relaxed);
3391 	}
3392 }
3393 
3394 void
pmap_reference(pmap_t pmap)3395 pmap_reference(
3396 	pmap_t pmap)
3397 {
3398 #if XNU_MONITOR
3399 	pmap_reference_ppl(pmap);
3400 #else
3401 	pmap_reference_internal(pmap);
3402 #endif
3403 }
3404 
3405 static tt_entry_t *
pmap_tt1_allocate(pmap_t pmap,vm_size_t size,unsigned option)3406 pmap_tt1_allocate(
3407 	pmap_t          pmap,
3408 	vm_size_t       size,
3409 	unsigned        option)
3410 {
3411 	tt_entry_t      *tt1 = NULL;
3412 	tt_free_entry_t *tt1_free;
3413 	pmap_paddr_t    pa;
3414 	vm_address_t    va;
3415 	vm_address_t    va_end;
3416 	kern_return_t   ret;
3417 
3418 	if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
3419 		size = PAGE_SIZE;
3420 	}
3421 
3422 	pmap_simple_lock(&tt1_lock);
3423 	if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
3424 		free_page_size_tt_count--;
3425 		tt1 = (tt_entry_t *)free_page_size_tt_list;
3426 		free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3427 	} else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
3428 		free_two_page_size_tt_count--;
3429 		tt1 = (tt_entry_t *)free_two_page_size_tt_list;
3430 		free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
3431 	} else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
3432 		free_tt_count--;
3433 		tt1 = (tt_entry_t *)free_tt_list;
3434 		free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
3435 	}
3436 
3437 	pmap_simple_unlock(&tt1_lock);
3438 
3439 	if (tt1 != NULL) {
3440 		pmap_tt_ledger_credit(pmap, size);
3441 		return (tt_entry_t *)tt1;
3442 	}
3443 
3444 	ret = pmap_pages_alloc_zeroed(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
3445 
3446 	if (ret == KERN_RESOURCE_SHORTAGE) {
3447 		return (tt_entry_t *)0;
3448 	}
3449 
3450 #if XNU_MONITOR
3451 	assert(pa);
3452 #endif
3453 
3454 	if (size < PAGE_SIZE) {
3455 		va = phystokv(pa) + size;
3456 		tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
3457 		tt_free_entry_t *next_free = NULL;
3458 		for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
3459 			tt1_free = (tt_free_entry_t *)va;
3460 			tt1_free->next = next_free;
3461 			next_free = tt1_free;
3462 		}
3463 		pmap_simple_lock(&tt1_lock);
3464 		local_free_list->next = free_tt_list;
3465 		free_tt_list = next_free;
3466 		free_tt_count += ((PAGE_SIZE / size) - 1);
3467 		if (free_tt_count > free_tt_max) {
3468 			free_tt_max = free_tt_count;
3469 		}
3470 		pmap_simple_unlock(&tt1_lock);
3471 	}
3472 
3473 	/* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3474 	 * Depending on the device, this can vary between 512b and 16K. */
3475 	OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3476 	OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
3477 	pmap_tt_ledger_credit(pmap, size);
3478 
3479 	return (tt_entry_t *) phystokv(pa);
3480 }
3481 
3482 static void
pmap_tt1_deallocate(pmap_t pmap,tt_entry_t * tt,vm_size_t size,unsigned option)3483 pmap_tt1_deallocate(
3484 	pmap_t pmap,
3485 	tt_entry_t *tt,
3486 	vm_size_t size,
3487 	unsigned option)
3488 {
3489 	tt_free_entry_t *tt_entry;
3490 
3491 	if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
3492 		size = PAGE_SIZE;
3493 	}
3494 
3495 	tt_entry = (tt_free_entry_t *)tt;
3496 	assert(not_in_kdp);
3497 	pmap_simple_lock(&tt1_lock);
3498 
3499 	if (size < PAGE_SIZE) {
3500 		free_tt_count++;
3501 		if (free_tt_count > free_tt_max) {
3502 			free_tt_max = free_tt_count;
3503 		}
3504 		tt_entry->next = free_tt_list;
3505 		free_tt_list = tt_entry;
3506 	}
3507 
3508 	if (size == PAGE_SIZE) {
3509 		free_page_size_tt_count++;
3510 		if (free_page_size_tt_count > free_page_size_tt_max) {
3511 			free_page_size_tt_max = free_page_size_tt_count;
3512 		}
3513 		tt_entry->next = free_page_size_tt_list;
3514 		free_page_size_tt_list = tt_entry;
3515 	}
3516 
3517 	if (size == 2 * PAGE_SIZE) {
3518 		free_two_page_size_tt_count++;
3519 		if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
3520 			free_two_page_size_tt_max = free_two_page_size_tt_count;
3521 		}
3522 		tt_entry->next = free_two_page_size_tt_list;
3523 		free_two_page_size_tt_list = tt_entry;
3524 	}
3525 
3526 	if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
3527 		pmap_simple_unlock(&tt1_lock);
3528 		pmap_tt_ledger_debit(pmap, size);
3529 		return;
3530 	}
3531 
3532 	while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
3533 		free_page_size_tt_count--;
3534 		tt = (tt_entry_t *)free_page_size_tt_list;
3535 		free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3536 
3537 		pmap_simple_unlock(&tt1_lock);
3538 
3539 		pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
3540 
3541 		OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3542 
3543 		pmap_simple_lock(&tt1_lock);
3544 	}
3545 
3546 	while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
3547 		free_two_page_size_tt_count--;
3548 		tt = (tt_entry_t *)free_two_page_size_tt_list;
3549 		free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
3550 
3551 		pmap_simple_unlock(&tt1_lock);
3552 
3553 		pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
3554 
3555 		OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
3556 
3557 		pmap_simple_lock(&tt1_lock);
3558 	}
3559 	pmap_simple_unlock(&tt1_lock);
3560 	pmap_tt_ledger_debit(pmap, size);
3561 }
3562 
3563 MARK_AS_PMAP_TEXT static kern_return_t
pmap_tt_allocate(pmap_t pmap,tt_entry_t ** ttp,unsigned int level,unsigned int options)3564 pmap_tt_allocate(
3565 	pmap_t pmap,
3566 	tt_entry_t **ttp,
3567 	unsigned int level,
3568 	unsigned int options)
3569 {
3570 	pmap_paddr_t pa;
3571 	*ttp = NULL;
3572 
3573 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3574 	if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
3575 		tt_free_entry_t *tt_free_cur, *tt_free_next;
3576 
3577 		tt_free_cur = ((tt_free_entry_t *)pmap->tt_entry_free);
3578 		tt_free_next = tt_free_cur->next;
3579 		tt_free_cur->next = NULL;
3580 		*ttp = (tt_entry_t *)tt_free_cur;
3581 		pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
3582 	}
3583 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3584 
3585 	if (*ttp == NULL) {
3586 		pt_desc_t       *ptdp;
3587 
3588 		/*
3589 		 *  Allocate a VM page for the level x page table entries.
3590 		 */
3591 		while (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
3592 			if (options & PMAP_OPTIONS_NOWAIT) {
3593 				return KERN_RESOURCE_SHORTAGE;
3594 			}
3595 			VM_PAGE_WAIT();
3596 		}
3597 
3598 		while ((ptdp = ptd_alloc(pmap)) == NULL) {
3599 			if (options & PMAP_OPTIONS_NOWAIT) {
3600 				pmap_pages_free(pa, PAGE_SIZE);
3601 				return KERN_RESOURCE_SHORTAGE;
3602 			}
3603 			VM_PAGE_WAIT();
3604 		}
3605 
3606 		if (level < pt_attr_leaf_level(pmap_get_pt_attr(pmap))) {
3607 			OSAddAtomic64(1, &alloc_ttepages_count);
3608 			OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3609 		} else {
3610 			OSAddAtomic64(1, &alloc_ptepages_count);
3611 			OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3612 		}
3613 
3614 		pmap_tt_ledger_credit(pmap, PAGE_SIZE);
3615 
3616 		PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
3617 
3618 		pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
3619 
3620 		uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
3621 		if (PAGE_SIZE > pmap_page_size) {
3622 			vm_address_t    va;
3623 			vm_address_t    va_end;
3624 
3625 			pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3626 
3627 			for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + pmap_page_size; va < va_end; va = va + pmap_page_size) {
3628 				((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3629 				pmap->tt_entry_free = (tt_entry_t *)va;
3630 			}
3631 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3632 		}
3633 
3634 		*ttp = (tt_entry_t *)phystokv(pa);
3635 	}
3636 
3637 #if XNU_MONITOR
3638 	assert(*ttp);
3639 #endif
3640 
3641 	return KERN_SUCCESS;
3642 }
3643 
3644 
3645 static void
pmap_tt_deallocate(pmap_t pmap,tt_entry_t * ttp,unsigned int level)3646 pmap_tt_deallocate(
3647 	pmap_t pmap,
3648 	tt_entry_t *ttp,
3649 	unsigned int level)
3650 {
3651 	pt_desc_t *ptdp;
3652 	ptd_info_t *ptd_info;
3653 	unsigned pt_acc_cnt;
3654 	unsigned i;
3655 	vm_offset_t     free_page = 0;
3656 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3657 	unsigned max_pt_index = PAGE_SIZE / pt_attr_page_size(pt_attr);
3658 
3659 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3660 
3661 	ptdp = ptep_get_ptd(ttp);
3662 	ptd_info = ptd_get_info(ptdp, ttp);
3663 
3664 	ptdp->va[ptd_get_index(ptdp, ttp)] = (vm_offset_t)-1;
3665 
3666 	if ((level < pt_attr_leaf_level(pt_attr)) && (ptd_info->refcnt == PT_DESC_REFCOUNT)) {
3667 		ptd_info->refcnt = 0;
3668 	}
3669 
3670 	if (ptd_info->refcnt != 0) {
3671 		panic("pmap_tt_deallocate(): ptdp %p, count %d", ptdp, ptd_info->refcnt);
3672 	}
3673 
3674 	ptd_info->refcnt = 0;
3675 
3676 	for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
3677 		pt_acc_cnt += ptdp->ptd_info[i].refcnt;
3678 	}
3679 
3680 	if (pt_acc_cnt == 0) {
3681 		tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3682 		unsigned pt_free_entry_cnt = 1;
3683 
3684 		while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
3685 			tt_free_entry_t *tt_free_list_next;
3686 
3687 			tt_free_list_next = tt_free_list->next;
3688 			if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
3689 				pt_free_entry_cnt++;
3690 			}
3691 			tt_free_list = tt_free_list_next;
3692 		}
3693 		if (pt_free_entry_cnt == max_pt_index) {
3694 			tt_free_entry_t *tt_free_list_cur;
3695 
3696 			free_page = (vm_offset_t)ttp & ~PAGE_MASK;
3697 			tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
3698 			tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
3699 
3700 			while (tt_free_list_cur) {
3701 				tt_free_entry_t *tt_free_list_next;
3702 
3703 				tt_free_list_next = tt_free_list_cur->next;
3704 				if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
3705 					tt_free_list->next = tt_free_list_next->next;
3706 				} else {
3707 					tt_free_list = tt_free_list_next;
3708 				}
3709 				tt_free_list_cur = tt_free_list_next;
3710 			}
3711 		} else {
3712 			((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3713 			pmap->tt_entry_free = ttp;
3714 		}
3715 	} else {
3716 		((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
3717 		pmap->tt_entry_free = ttp;
3718 	}
3719 
3720 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3721 
3722 	if (free_page != 0) {
3723 		ptd_deallocate(ptep_get_ptd((pt_entry_t*)free_page));
3724 		*(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
3725 		pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
3726 		if (level < pt_attr_leaf_level(pt_attr)) {
3727 			OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
3728 		} else {
3729 			OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
3730 		}
3731 		PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
3732 		pmap_tt_ledger_debit(pmap, PAGE_SIZE);
3733 	}
3734 }
3735 
3736 /**
3737  * Safely clear out a translation table entry.
3738  *
3739  * @note If the TTE to clear out points to a leaf table, then that leaf table
3740  *       must have a refcnt of zero before the TTE can be removed.
3741  *
3742  * @param pmap The pmap containing the page table whose TTE is being removed.
3743  * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
3744  * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
3745  * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
3746  * @param ttep Pointer to the TTE that should be cleared out.
3747  * @param level The level of the page table that contains the TTE to be removed.
3748  */
3749 static void
pmap_tte_remove(pmap_t pmap,vm_offset_t va_start,vm_offset_t va_end,bool need_strong_sync,tt_entry_t * ttep,unsigned int level)3750 pmap_tte_remove(
3751 	pmap_t pmap,
3752 	vm_offset_t va_start,
3753 	vm_offset_t va_end,
3754 	bool need_strong_sync,
3755 	tt_entry_t *ttep,
3756 	unsigned int level)
3757 {
3758 	const tt_entry_t tte = *ttep;
3759 
3760 	if (__improbable(tte == ARM_TTE_EMPTY)) {
3761 		panic("%s: L%d TTE is already empty. Potential double unmap or memory "
3762 		    "stomper? pmap=%p ttep=%p", __func__, level, pmap, ttep);
3763 	}
3764 
3765 	/**
3766 	 * Remember, the passed in "level" parameter refers to the level above the
3767 	 * table that's getting removed (e.g., removing an L2 TTE will unmap an L3
3768 	 * page table).
3769 	 */
3770 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3771 	const bool remove_leaf_table = (level == pt_attr_twig_level(pt_attr));
3772 
3773 	/**
3774 	 * Get the refcnt of the table to be deleted. We only track refcnts on leaf
3775 	 * page tables, so hardcode the refcnt to the appropriate sentinel value on
3776 	 * non-leaf page tables.
3777 	 */
3778 	unsigned short refcnt = (remove_leaf_table) ?
3779 	    ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt : PT_DESC_REFCOUNT;
3780 
3781 	/**
3782 	 * If we've detected a non-zero refcount on a leaf pagetable, that's not automatically
3783 	 * a cause for panic.  It could be that pmap_disconnect() is running on another CPU and
3784 	 * has cleared a PTE (thus causing the vm_map_destroy() path to observe an empty PTE and
3785 	 * therefore do no PVH locking or removal), but hasn't yet dropped the refcount for
3786 	 * the cleared PTE.  Wait until either the refcount reaches 0 or we observe no pending
3787 	 * pmap_disconnect() operations on other CPUs.
3788 	 */
3789 	if (__improbable(remove_leaf_table && refcnt != 0)) {
3790 		do {
3791 			const int max_cpu = ml_get_max_cpu_number();
3792 			bool inflight_disconnect = false;
3793 			for (unsigned int i = 0; i <= max_cpu; ++i) {
3794 				const pmap_cpu_data_t *cpu_data = pmap_get_remote_cpu_data(i);
3795 				if (cpu_data == NULL) {
3796 					continue;
3797 				}
3798 				if (os_atomic_load_exclusive(&cpu_data->inflight_disconnect, acquire)) {
3799 					__builtin_arm_wfe();
3800 					inflight_disconnect = true;
3801 					break;
3802 				}
3803 				os_atomic_clear_exclusive();
3804 			}
3805 			refcnt = os_atomic_load(&(ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt), relaxed);
3806 			if (!inflight_disconnect) {
3807 				break;
3808 			}
3809 		} while (refcnt != 0);
3810 	}
3811 
3812 #if MACH_ASSERT
3813 	/**
3814 	 * On internal devices, always do the page table consistency check
3815 	 * regardless of page table level or the actual refcnt value.
3816 	 */
3817 	{
3818 #else /* MACH_ASSERT */
3819 	/**
3820 	 * Only perform the page table consistency check when deleting leaf page
3821 	 * tables and it seems like there might be valid/compressed mappings
3822 	 * leftover.
3823 	 */
3824 	if (__improbable(remove_leaf_table && refcnt != 0)) {
3825 #endif /* MACH_ASSERT */
3826 
3827 		/**
3828 		 * There are multiple problems that can arise as a non-zero refcnt:
3829 		 * 1. A bug in the refcnt management logic.
3830 		 * 2. A memory stomper or hardware failure.
3831 		 * 3. The VM forgetting to unmap all of the valid mappings in an address
3832 		 *    space before destroying a pmap.
3833 		 *
3834 		 * By looping over the page table and determining how many valid or
3835 		 * compressed entries there actually are, we can narrow down which of
3836 		 * these three cases is causing this panic. If the expected refcnt
3837 		 * (valid + compressed) and the actual refcnt don't match then the
3838 		 * problem is probably either a memory corruption issue (if the
3839 		 * non-empty entries don't match valid+compressed, that could also be a
3840 		 * sign of corruption) or refcnt management bug. Otherwise, there
3841 		 * actually are leftover mappings and the higher layers of xnu are
3842 		 * probably at fault.
3843 		 */
3844 		const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
3845 		pt_entry_t *bpte = ((pt_entry_t *) (ttetokv(tte) & ~(pmap_page_size - 1)));
3846 
3847 		pt_entry_t *ptep = bpte;
3848 		unsigned short non_empty = 0, valid = 0, comp = 0;
3849 		for (unsigned int i = 0; i < (pmap_page_size / sizeof(*ptep)); i++, ptep++) {
3850 			/* Keep track of all non-empty entries to detect memory corruption. */
3851 			if (__improbable(*ptep != ARM_PTE_EMPTY)) {
3852 				non_empty++;
3853 			}
3854 
3855 			if (__improbable(ARM_PTE_IS_COMPRESSED(*ptep, ptep))) {
3856 				comp++;
3857 			} else if (__improbable((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE)) {
3858 				valid++;
3859 			}
3860 		}
3861 
3862 #if MACH_ASSERT
3863 		/**
3864 		 * On internal machines, panic whenever a page table getting deleted has
3865 		 * leftover mappings (valid or otherwise) or a leaf page table has a
3866 		 * non-zero refcnt.
3867 		 */
3868 		if (__improbable((non_empty != 0) || (remove_leaf_table && refcnt != 0))) {
3869 #else /* MACH_ASSERT */
3870 		/* We already know the leaf page-table has a non-zero refcnt, so panic. */
3871 		{
3872 #endif /* MACH_ASSERT */
3873 			panic("%s: Found inconsistent state in soon to be deleted L%d table: %d valid, "
3874 			    "%d compressed, %d non-empty, refcnt=%d, L%d tte=%#llx, pmap=%p, bpte=%p", __func__,
3875 			    level + 1, valid, comp, non_empty, refcnt, level, (uint64_t)tte, pmap, bpte);
3876 		}
3877 	}
3878 
3879 #if (__ARM_VMSA__ == 7)
3880 	{
3881 		tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
3882 		unsigned i;
3883 
3884 		for (i = 0; i < 4; i++, ttep_4M++) {
3885 			*ttep_4M = (tt_entry_t) 0;
3886 		}
3887 		FLUSH_PTE_STRONG();
3888 	}
3889 #else
3890 	*ttep = (tt_entry_t) 0;
3891 	FLUSH_PTE_STRONG();
3892 #endif /* (__ARM_VMSA__ == 7) */
3893 	// If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
3894 	if (va_end > va_start) {
3895 #if (__ARM_VMSA__ == 7)
3896 		// Ensure intermediate translations are flushed for each 1MB block
3897 		flush_mmu_tlb_entry_async((va_start & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
3898 		flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
3899 		flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
3900 		flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
3901 #endif
3902 		PMAP_UPDATE_TLBS(pmap, va_start, va_end, need_strong_sync, false);
3903 	}
3904 }
3905 
3906 /**
3907  * Given a pointer to an entry within a `level` page table, delete the
3908  * page table at `level` + 1 that is represented by that entry. For instance,
3909  * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
3910  * contains the PA of the L3 table, and `level` would be "2".
3911  *
3912  * @note If the table getting deallocated is a leaf table, then that leaf table
3913  *       must have a refcnt of zero before getting deallocated. All other levels
3914  *       must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
3915  *
3916  * @param pmap The pmap that owns the page table to be deallocated.
3917  * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
3918  * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
3919  * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
3920  * @param ttep Pointer to the `level` TTE to remove.
3921  * @param level The level of the table that contains an entry pointing to the
3922  *              table to be removed. The deallocated page table will be a
3923  *              `level` + 1 table (so if `level` is 2, then an L3 table will be
3924  *              deleted).
3925  */
3926 void
3927 pmap_tte_deallocate(
3928 	pmap_t pmap,
3929 	vm_offset_t va_start,
3930 	vm_offset_t va_end,
3931 	bool need_strong_sync,
3932 	tt_entry_t *ttep,
3933 	unsigned int level)
3934 {
3935 	pmap_paddr_t pa;
3936 	tt_entry_t tte;
3937 
3938 	pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
3939 
3940 	tte = *ttep;
3941 
3942 	if (tte_get_ptd(tte)->pmap != pmap) {
3943 		panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
3944 		    __func__, tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
3945 	}
3946 
3947 	pmap_tte_remove(pmap, va_start, va_end, need_strong_sync, ttep, level);
3948 
3949 	if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3950 		uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
3951 
3952 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
3953 
3954 		/* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
3955 		 * aligned on 1K boundaries.  We clear the surrounding "chunk" of 4 TTEs above. */
3956 		pa = tte_to_pa(tte) & ~(pmap_page_size - 1);
3957 		pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
3958 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
3959 	}
3960 }
3961 
3962 /*
3963  *	Remove a range of hardware page-table entries.
3964  *	The entries given are the first (inclusive)
3965  *	and last (exclusive) entries for the VM pages.
3966  *	The virtual address is the va for the first pte.
3967  *
3968  *	The pmap must be locked.
3969  *	If the pmap is not the kernel pmap, the range must lie
3970  *	entirely within one pte-page.  This is NOT checked.
3971  *	Assumes that the pte-page exists.
3972  *
3973  *	Returns the number of PTE changed
3974  */
3975 MARK_AS_PMAP_TEXT static int
3976 pmap_remove_range(
3977 	pmap_t pmap,
3978 	vm_map_address_t va,
3979 	pt_entry_t *bpte,
3980 	pt_entry_t *epte)
3981 {
3982 	bool need_strong_sync = false;
3983 	int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, NULL,
3984 	    &need_strong_sync, PMAP_OPTIONS_REMOVE);
3985 	if (num_changed > 0) {
3986 		PMAP_UPDATE_TLBS(pmap, va,
3987 		    va + (pt_attr_page_size(pmap_get_pt_attr(pmap)) * (epte - bpte)), need_strong_sync, true);
3988 	}
3989 	return num_changed;
3990 }
3991 
3992 
3993 #ifdef PVH_FLAG_EXEC
3994 
3995 /*
3996  *	Update the access protection bits of the physical aperture mapping for a page.
3997  *	This is useful, for example, in guranteeing that a verified executable page
3998  *	has no writable mappings anywhere in the system, including the physical
3999  *	aperture.  flush_tlb_async can be set to true to avoid unnecessary TLB
4000  *	synchronization overhead in cases where the call to this function is
4001  *	guaranteed to be followed by other TLB operations.
4002  */
4003 void
4004 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
4005 {
4006 #if __ARM_PTE_PHYSMAP__
4007 	pvh_assert_locked(pai);
4008 	vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
4009 	pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
4010 
4011 	pt_entry_t tmplate = *pte_p;
4012 	if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
4013 		return;
4014 	}
4015 	tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
4016 #if (__ARM_VMSA__ > 7)
4017 	if (tmplate & ARM_PTE_HINT_MASK) {
4018 		panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4019 		    __func__, pte_p, (void *)kva, tmplate);
4020 	}
4021 #endif
4022 	write_pte_strong(pte_p, tmplate);
4023 	flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap, true);
4024 	if (!flush_tlb_async) {
4025 		sync_tlb_flush();
4026 	}
4027 #endif
4028 }
4029 
4030 #endif /* defined(PVH_FLAG_EXEC) */
4031 
4032 MARK_AS_PMAP_TEXT int
4033 pmap_remove_range_options(
4034 	pmap_t pmap,
4035 	vm_map_address_t va,
4036 	pt_entry_t *bpte,
4037 	pt_entry_t *epte,
4038 	vm_map_address_t *eva,
4039 	bool *need_strong_sync __unused,
4040 	int options)
4041 {
4042 	pt_entry_t     *cpte;
4043 	size_t          npages = 0;
4044 	int             num_removed, num_unwired;
4045 	int             num_pte_changed;
4046 	unsigned int    pai = 0;
4047 	pmap_paddr_t    pa;
4048 	int             num_external, num_internal, num_reusable;
4049 	int             num_alt_internal;
4050 	uint64_t        num_compressed, num_alt_compressed;
4051 	int16_t         refcnt = 0;
4052 
4053 	pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
4054 
4055 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4056 	uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
4057 
4058 	if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
4059 		panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
4060 	}
4061 
4062 	num_removed = 0;
4063 	num_unwired = 0;
4064 	num_pte_changed = 0;
4065 	num_external = 0;
4066 	num_internal = 0;
4067 	num_reusable = 0;
4068 	num_compressed = 0;
4069 	num_alt_internal = 0;
4070 	num_alt_compressed = 0;
4071 
4072 #if XNU_MONITOR
4073 	bool ro_va = false;
4074 	if (__improbable((pmap == kernel_pmap) && (eva != NULL) && zone_spans_ro_va(va, *eva))) {
4075 		ro_va = true;
4076 	}
4077 #endif
4078 	for (cpte = bpte; cpte < epte;
4079 	    cpte += PAGE_RATIO, va += pmap_page_size) {
4080 		pt_entry_t      spte;
4081 		boolean_t       managed = FALSE;
4082 
4083 		/*
4084 		 * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
4085 		 * so we need to be as aggressive as possible in checking for preemption when we can.
4086 		 */
4087 		if (__improbable((eva != NULL) && npages++ && pmap_pending_preemption())) {
4088 			*eva = va;
4089 			break;
4090 		}
4091 
4092 		spte = *((volatile pt_entry_t*)cpte);
4093 
4094 		while (!managed) {
4095 			if (pmap != kernel_pmap &&
4096 			    (options & PMAP_OPTIONS_REMOVE) &&
4097 			    (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
4098 				/*
4099 				 * "pmap" must be locked at this point,
4100 				 * so this should not race with another
4101 				 * pmap_remove_range() or pmap_enter().
4102 				 */
4103 
4104 				/* one less "compressed"... */
4105 				num_compressed++;
4106 				if (spte & ARM_PTE_COMPRESSED_ALT) {
4107 					/* ... but it used to be "ALTACCT" */
4108 					num_alt_compressed++;
4109 				}
4110 
4111 				/* clear marker */
4112 				write_pte_fast(cpte, ARM_PTE_TYPE_FAULT);
4113 				/*
4114 				 * "refcnt" also accounts for
4115 				 * our "compressed" markers,
4116 				 * so let's update it here.
4117 				 */
4118 				--refcnt;
4119 				spte = *((volatile pt_entry_t*)cpte);
4120 			}
4121 			/*
4122 			 * It may be possible for the pte to transition from managed
4123 			 * to unmanaged in this timeframe; for now, elide the assert.
4124 			 * We should break out as a consequence of checking pa_valid.
4125 			 */
4126 			//assert(!ARM_PTE_IS_COMPRESSED(spte));
4127 			pa = pte_to_pa(spte);
4128 			if (!pa_valid(pa)) {
4129 #if XNU_MONITOR
4130 				unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
4131 #endif
4132 #if XNU_MONITOR
4133 				if (__improbable((cacheattr & PP_ATTR_MONITOR) &&
4134 				    (pte_to_xprr_perm(spte) != XPRR_KERN_RO_PERM) && !pmap_ppl_disable)) {
4135 					panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
4136 					    __func__, (uint64_t)pa);
4137 				}
4138 #endif
4139 				break;
4140 			}
4141 			pai = pa_index(pa);
4142 			pvh_lock(pai);
4143 			spte = *((volatile pt_entry_t*)cpte);
4144 			pa = pte_to_pa(spte);
4145 			if (pai == pa_index(pa)) {
4146 				managed = TRUE;
4147 				break; // Leave pai locked as we will unlock it after we free the PV entry
4148 			}
4149 			pvh_unlock(pai);
4150 		}
4151 
4152 		if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
4153 			/*
4154 			 * There used to be a valid mapping here but it
4155 			 * has already been removed when the page was
4156 			 * sent to the VM compressor, so nothing left to
4157 			 * remove now...
4158 			 */
4159 			continue;
4160 		}
4161 
4162 		/* remove the translation, do not flush the TLB */
4163 		if (*cpte != ARM_PTE_TYPE_FAULT) {
4164 			assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4165 			assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4166 #if MACH_ASSERT
4167 			if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4168 				panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
4169 				    cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va, (uint64_t)ptep_get_va(cpte));
4170 			}
4171 #endif
4172 			write_pte_fast(cpte, ARM_PTE_TYPE_FAULT);
4173 			num_pte_changed++;
4174 		}
4175 
4176 		if ((spte != ARM_PTE_TYPE_FAULT) &&
4177 		    (pmap != kernel_pmap)) {
4178 			assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
4179 			assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
4180 			--refcnt;
4181 		}
4182 
4183 		if (pte_is_wired(spte)) {
4184 			pte_set_wired(pmap, cpte, 0);
4185 			num_unwired++;
4186 		}
4187 		/*
4188 		 * if not managed, we're done
4189 		 */
4190 		if (!managed) {
4191 			continue;
4192 		}
4193 
4194 #if XNU_MONITOR
4195 		if (__improbable(ro_va)) {
4196 			pmap_ppl_unlockdown_page_locked(pai, PVH_FLAG_LOCKDOWN_RO, true);
4197 		}
4198 #endif
4199 
4200 		/*
4201 		 * find and remove the mapping from the chain for this
4202 		 * physical address.
4203 		 */
4204 		bool is_altacct = pmap_remove_pv(pmap, cpte, pai, true);
4205 
4206 		if (is_altacct) {
4207 			assert(ppattr_test_internal(pai));
4208 			num_internal++;
4209 			num_alt_internal++;
4210 			if (!pvh_test_type(pai_to_pvh(pai), PVH_TYPE_PTEP)) {
4211 				ppattr_clear_altacct(pai);
4212 			}
4213 		} else if (ppattr_test_internal(pai)) {
4214 			if (ppattr_test_reusable(pai)) {
4215 				num_reusable++;
4216 			} else {
4217 				num_internal++;
4218 			}
4219 		} else {
4220 			num_external++;
4221 		}
4222 		pvh_unlock(pai);
4223 		num_removed++;
4224 	}
4225 
4226 	/*
4227 	 *	Update the counts
4228 	 */
4229 	pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size);
4230 
4231 	if (pmap != kernel_pmap) {
4232 		if ((refcnt != 0) && (OSAddAtomic16(refcnt, (SInt16 *) &(ptep_get_info(bpte)->refcnt)) <= 0)) {
4233 			panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte), bpte, epte);
4234 		}
4235 
4236 		/* update ledgers */
4237 		pmap_ledger_debit(pmap, task_ledgers.external, (num_external) * pmap_page_size);
4238 		pmap_ledger_debit(pmap, task_ledgers.reusable, (num_reusable) * pmap_page_size);
4239 		pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size);
4240 		pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pmap_page_size);
4241 		pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pmap_page_size);
4242 		pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pmap_page_size);
4243 		pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pmap_page_size);
4244 		/* make needed adjustments to phys_footprint */
4245 		pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4246 		    ((num_internal -
4247 		    num_alt_internal) +
4248 		    (num_compressed -
4249 		    num_alt_compressed)) * pmap_page_size);
4250 	}
4251 
4252 	/* flush the ptable entries we have written */
4253 	if (num_pte_changed > 0) {
4254 		FLUSH_PTE_STRONG();
4255 	}
4256 
4257 	return num_pte_changed;
4258 }
4259 
4260 
4261 /*
4262  *	Remove the given range of addresses
4263  *	from the specified map.
4264  *
4265  *	It is assumed that the start and end are properly
4266  *	rounded to the hardware page size.
4267  */
4268 void
4269 pmap_remove(
4270 	pmap_t pmap,
4271 	vm_map_address_t start,
4272 	vm_map_address_t end)
4273 {
4274 	pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4275 }
4276 
4277 MARK_AS_PMAP_TEXT vm_map_address_t
4278 pmap_remove_options_internal(
4279 	pmap_t pmap,
4280 	vm_map_address_t start,
4281 	vm_map_address_t end,
4282 	int options)
4283 {
4284 	vm_map_address_t eva = end;
4285 	pt_entry_t     *bpte, *epte;
4286 	pt_entry_t     *pte_p;
4287 	tt_entry_t     *tte_p;
4288 	int             remove_count = 0;
4289 	bool            need_strong_sync = false;
4290 
4291 	if (__improbable(end < start)) {
4292 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
4293 	}
4294 	if (__improbable(pmap->type == PMAP_TYPE_COMMPAGE)) {
4295 		panic("%s: attempt to remove mappings from commpage pmap %p", __func__, pmap);
4296 	}
4297 
4298 	validate_pmap_mutable(pmap);
4299 
4300 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4301 
4302 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
4303 
4304 	tte_p = pmap_tte(pmap, start);
4305 
4306 	if (tte_p == (tt_entry_t *) NULL) {
4307 		goto done;
4308 	}
4309 
4310 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4311 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
4312 		bpte = &pte_p[pte_index(pt_attr, start)];
4313 		epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
4314 
4315 		remove_count = pmap_remove_range_options(pmap, start, bpte, epte, &eva,
4316 		    &need_strong_sync, options);
4317 
4318 		if ((pmap->type == PMAP_TYPE_USER) && (ptep_get_info(pte_p)->refcnt == 0)) {
4319 			pmap_tte_deallocate(pmap, start, eva, need_strong_sync, tte_p, pt_attr_twig_level(pt_attr));
4320 			remove_count = 0; // pmap_tte_deallocate has flushed the TLB for us
4321 		}
4322 	}
4323 
4324 done:
4325 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
4326 
4327 	if (remove_count > 0) {
4328 		PMAP_UPDATE_TLBS(pmap, start, eva, need_strong_sync, true);
4329 	}
4330 	return eva;
4331 }
4332 
4333 void
4334 pmap_remove_options(
4335 	pmap_t pmap,
4336 	vm_map_address_t start,
4337 	vm_map_address_t end,
4338 	int options)
4339 {
4340 	vm_map_address_t va;
4341 
4342 	if (pmap == PMAP_NULL) {
4343 		return;
4344 	}
4345 
4346 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4347 
4348 	PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4349 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4350 	    VM_KERNEL_ADDRHIDE(end));
4351 
4352 #if MACH_ASSERT
4353 	if ((start | end) & pt_attr_leaf_offmask(pt_attr)) {
4354 		panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx",
4355 		    pmap, (uint64_t)start, (uint64_t)end);
4356 	}
4357 	if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4358 		panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx",
4359 		    pmap, (uint64_t)start, (uint64_t)end);
4360 	}
4361 #endif
4362 
4363 	/*
4364 	 * We allow single-page requests to execute non-preemptibly,
4365 	 * as it doesn't make sense to sample AST_URGENT for a single-page
4366 	 * operation, and there are a couple of special use cases that
4367 	 * require a non-preemptible single-page operation.
4368 	 */
4369 	if ((end - start) > pt_attr_page_size(pt_attr)) {
4370 		pmap_verify_preemptible();
4371 	}
4372 
4373 	/*
4374 	 *      Invalidate the translation buffer first
4375 	 */
4376 	va = start;
4377 	while (va < end) {
4378 		vm_map_address_t l;
4379 
4380 		l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
4381 		if (l > end) {
4382 			l = end;
4383 		}
4384 
4385 #if XNU_MONITOR
4386 		va = pmap_remove_options_ppl(pmap, va, l, options);
4387 
4388 		pmap_ledger_check_balance(pmap);
4389 #else
4390 		va = pmap_remove_options_internal(pmap, va, l, options);
4391 #endif
4392 	}
4393 
4394 	PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4395 }
4396 
4397 
4398 /*
4399  *	Remove phys addr if mapped in specified map
4400  */
4401 void
4402 pmap_remove_some_phys(
4403 	__unused pmap_t map,
4404 	__unused ppnum_t pn)
4405 {
4406 	/* Implement to support working set code */
4407 }
4408 
4409 /*
4410  * Implementation of PMAP_SWITCH_USER that Mach VM uses to
4411  * switch a thread onto a new vm_map.
4412  */
4413 void
4414 pmap_switch_user(thread_t thread, vm_map_t new_map)
4415 {
4416 	pmap_t new_pmap = new_map->pmap;
4417 
4418 
4419 	thread->map = new_map;
4420 	pmap_set_pmap(new_pmap, thread);
4421 
4422 }
4423 
4424 void
4425 pmap_set_pmap(
4426 	pmap_t pmap,
4427 #if     !__ARM_USER_PROTECT__
4428 	__unused
4429 #endif
4430 	thread_t        thread)
4431 {
4432 	pmap_switch(pmap);
4433 #if __ARM_USER_PROTECT__
4434 	thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
4435 	thread->machine.asid = pmap->hw_asid;
4436 #endif
4437 }
4438 
4439 static void
4440 pmap_flush_core_tlb_asid_async(pmap_t pmap)
4441 {
4442 #if (__ARM_VMSA__ == 7)
4443 	flush_core_tlb_asid_async(pmap->hw_asid);
4444 #else
4445 	flush_core_tlb_asid_async(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
4446 #endif
4447 }
4448 
4449 static inline bool
4450 pmap_user_ttb_is_clear(void)
4451 {
4452 #if (__ARM_VMSA__ > 7)
4453 	return get_mmu_ttb() == (invalid_ttep & TTBR_BADDR_MASK);
4454 #else
4455 	return get_mmu_ttb() == kernel_pmap->ttep;
4456 #endif
4457 }
4458 
4459 MARK_AS_PMAP_TEXT void
4460 pmap_switch_internal(
4461 	pmap_t pmap)
4462 {
4463 	pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
4464 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4465 #if XNU_MONITOR
4466 	os_atomic_store(&cpu_data_ptr->active_pmap, pmap, relaxed);
4467 #endif
4468 	validate_pmap_mutable(pmap);
4469 	uint16_t asid_index = pmap->hw_asid;
4470 	bool do_asid_flush = false;
4471 	bool do_commpage_flush = false;
4472 
4473 	if (__improbable((asid_index == 0) && (pmap != kernel_pmap))) {
4474 		panic("%s: attempt to activate pmap with invalid ASID %p", __func__, pmap);
4475 	}
4476 #if __ARM_KERNEL_PROTECT__
4477 	asid_index >>= 1;
4478 #endif
4479 
4480 	pmap_t                    last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
4481 #if (__ARM_VMSA__ > 7)
4482 	__unused const pt_attr_t *last_nested_pmap_attr = cpu_data_ptr->cpu_nested_pmap_attr;
4483 	__unused vm_map_address_t last_nested_region_addr = cpu_data_ptr->cpu_nested_region_addr;
4484 	__unused vm_map_offset_t  last_nested_region_size = cpu_data_ptr->cpu_nested_region_size;
4485 #endif
4486 	bool do_shared_region_flush = ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap));
4487 	bool break_before_make = do_shared_region_flush;
4488 
4489 	if ((pmap_max_asids > MAX_HW_ASIDS) && (asid_index > 0)) {
4490 		asid_index -= 1;
4491 		pmap_update_plru(asid_index);
4492 
4493 		/* Paranoia. */
4494 		assert(asid_index < (sizeof(cpu_data_ptr->cpu_sw_asids) / sizeof(*cpu_data_ptr->cpu_sw_asids)));
4495 
4496 		/* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4497 		uint8_t new_sw_asid = pmap->sw_asid;
4498 		uint8_t last_sw_asid = cpu_data_ptr->cpu_sw_asids[asid_index];
4499 
4500 		if (new_sw_asid != last_sw_asid) {
4501 			/*
4502 			 * If the virtual ASID of the new pmap does not match the virtual ASID
4503 			 * last seen on this CPU for the physical ASID (that was a mouthful),
4504 			 * then this switch runs the risk of aliasing.  We need to flush the
4505 			 * TLB for this phyiscal ASID in this case.
4506 			 */
4507 			cpu_data_ptr->cpu_sw_asids[asid_index] = new_sw_asid;
4508 			do_asid_flush = true;
4509 			break_before_make = true;
4510 		}
4511 	}
4512 
4513 #if __ARM_MIXED_PAGE_SIZE__
4514 	if (pt_attr->pta_tcr_value != get_tcr()) {
4515 		break_before_make = true;
4516 	}
4517 #endif
4518 #if __ARM_MIXED_PAGE_SIZE__
4519 	/*
4520 	 * For mixed page size configurations, we need to flush the global commpage mappings from
4521 	 * the TLB when transitioning between address spaces with different page sizes.  Otherwise
4522 	 * it's possible for a TLB fill against the incoming commpage to produce a TLB entry which
4523 	 * which partially overlaps a TLB entry from the outgoing commpage, leading to a TLB
4524 	 * conflict abort or other unpredictable behavior.
4525 	 */
4526 	if (pt_attr_leaf_shift(pt_attr) != cpu_data_ptr->commpage_page_shift) {
4527 		do_commpage_flush = true;
4528 	}
4529 	if (do_commpage_flush) {
4530 		break_before_make = true;
4531 	}
4532 #endif
4533 	if (__improbable(break_before_make && !pmap_user_ttb_is_clear())) {
4534 		PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB), VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4535 		pmap_clear_user_ttb_internal();
4536 	}
4537 
4538 	/* If we're switching to a different nested pmap (i.e. shared region), we'll need
4539 	 * to flush the userspace mappings for that region.  Those mappings are global
4540 	 * and will not be protected by the ASID.  It should also be cheaper to flush the
4541 	 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
4542 	if (__improbable(do_shared_region_flush)) {
4543 #if __ARM_RANGE_TLBI__
4544 		uint64_t page_shift_prev = pt_attr_leaf_shift(last_nested_pmap_attr);
4545 		vm_map_offset_t npages_prev = last_nested_region_size >> page_shift_prev;
4546 
4547 		/* NOTE: here we flush the global TLB entries for the previous nested region only.
4548 		 * There may still be non-global entries that overlap with the incoming pmap's
4549 		 * nested region.  On Apple SoCs at least, this is acceptable.  Those non-global entries
4550 		 * must necessarily belong to a different ASID than the incoming pmap, or they would
4551 		 * be flushed in the do_asid_flush case below.  This will prevent them from conflicting
4552 		 * with the incoming pmap's nested region.  However, the ARMv8 ARM is not crystal clear
4553 		 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
4554 		 * to consider additional invalidation here in the future. */
4555 		if (npages_prev <= ARM64_TLB_RANGE_PAGES) {
4556 			flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t)npages_prev, 0, last_nested_region_addr, page_shift_prev));
4557 		} else {
4558 			do_asid_flush = false;
4559 			flush_core_tlb_async();
4560 		}
4561 #else
4562 		do_asid_flush = false;
4563 		flush_core_tlb_async();
4564 #endif // __ARM_RANGE_TLBI__
4565 	}
4566 
4567 #if __ARM_MIXED_PAGE_SIZE__
4568 	if (__improbable(do_commpage_flush)) {
4569 		const uint64_t commpage_shift = cpu_data_ptr->commpage_page_shift;
4570 		const uint64_t rtlbi_param = generate_rtlbi_param((ppnum_t)_COMM_PAGE64_NESTING_SIZE >> commpage_shift,
4571 		    0, _COMM_PAGE64_NESTING_START, commpage_shift);
4572 		flush_core_tlb_allrange_async(rtlbi_param);
4573 	}
4574 #endif
4575 	if (__improbable(do_asid_flush)) {
4576 		pmap_flush_core_tlb_asid_async(pmap);
4577 #if DEVELOPMENT || DEBUG
4578 		os_atomic_inc(&pmap_asid_flushes, relaxed);
4579 #endif
4580 	}
4581 	if (__improbable(do_asid_flush || do_shared_region_flush || do_commpage_flush)) {
4582 		sync_tlb_flush_local();
4583 	}
4584 
4585 	pmap_switch_user_ttb(pmap, cpu_data_ptr);
4586 }
4587 
4588 void
4589 pmap_switch(
4590 	pmap_t pmap)
4591 {
4592 	PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4593 #if XNU_MONITOR
4594 	pmap_switch_ppl(pmap);
4595 #else
4596 	pmap_switch_internal(pmap);
4597 #endif
4598 	PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
4599 }
4600 
4601 void
4602 pmap_page_protect(
4603 	ppnum_t ppnum,
4604 	vm_prot_t prot)
4605 {
4606 	pmap_page_protect_options(ppnum, prot, 0, NULL);
4607 }
4608 
4609 /*
4610  *	Routine:	pmap_page_protect_options
4611  *
4612  *	Function:
4613  *		Lower the permission for all mappings to a given
4614  *		page.
4615  */
4616 MARK_AS_PMAP_TEXT static void
4617 pmap_page_protect_options_with_flush_range(
4618 	ppnum_t ppnum,
4619 	vm_prot_t prot,
4620 	unsigned int options,
4621 	pmap_tlb_flush_range_t *flush_range)
4622 {
4623 	pmap_paddr_t    phys = ptoa(ppnum);
4624 	pv_entry_t    **pv_h;
4625 	pv_entry_t     *pve_p, *orig_pve_p;
4626 	pv_entry_t     *pveh_p;
4627 	pv_entry_t     *pvet_p;
4628 	pt_entry_t     *pte_p, *orig_pte_p;
4629 	pv_entry_t     *new_pve_p;
4630 	pt_entry_t     *new_pte_p;
4631 	vm_offset_t     pvh_flags;
4632 	unsigned int    pai;
4633 	bool            remove;
4634 	bool            set_NX;
4635 	unsigned int    pvh_cnt = 0;
4636 	unsigned int    pass1_updated = 0;
4637 	unsigned int    pass2_updated = 0;
4638 
4639 	assert(ppnum != vm_page_fictitious_addr);
4640 
4641 	/* Only work with managed pages. */
4642 	if (!pa_valid(phys)) {
4643 		return;
4644 	}
4645 
4646 	/*
4647 	 * Determine the new protection.
4648 	 */
4649 	switch (prot) {
4650 	case VM_PROT_ALL:
4651 		return;         /* nothing to do */
4652 	case VM_PROT_READ:
4653 	case VM_PROT_READ | VM_PROT_EXECUTE:
4654 		remove = false;
4655 		break;
4656 	default:
4657 		/* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
4658 		options = options & ~PMAP_OPTIONS_NOFLUSH;
4659 		remove = true;
4660 		break;
4661 	}
4662 
4663 	pmap_cpu_data_t *pmap_cpu_data = NULL;
4664 	if (remove) {
4665 #if !XNU_MONITOR
4666 		mp_disable_preemption();
4667 #endif
4668 		pmap_cpu_data = pmap_get_cpu_data();
4669 		os_atomic_store(&pmap_cpu_data->inflight_disconnect, true, relaxed);
4670 		/*
4671 		 * Ensure the store to inflight_disconnect will be observed before any of the
4672 		 * ensuing PTE/refcount stores in this function.  This flag is used to avoid
4673 		 * a race in which the VM may clear a pmap's mappings and destroy the pmap on
4674 		 * another CPU, in between this function's clearing a PTE and dropping the
4675 		 * corresponding pagetable refcount.  That can lead to a panic if the
4676 		 * destroying thread observes a non-zero refcount.  For this we need a store-
4677 		 * store barrier; a store-release operation would not be sufficient.
4678 		 */
4679 		os_atomic_thread_fence(release);
4680 	}
4681 
4682 	pai = pa_index(phys);
4683 	pvh_lock(pai);
4684 	pv_h = pai_to_pvh(pai);
4685 	pvh_flags = pvh_get_flags(pv_h);
4686 
4687 #if XNU_MONITOR
4688 	if (__improbable(remove && (pvh_flags & PVH_FLAG_LOCKDOWN_MASK))) {
4689 		panic("%d is locked down (%#llx), cannot remove", pai, (uint64_t)pvh_get_flags(pv_h));
4690 	}
4691 	if (__improbable(ppattr_pa_test_monitor(phys))) {
4692 		panic("%s: PA 0x%llx belongs to PPL.", __func__, (uint64_t)phys);
4693 	}
4694 #endif
4695 
4696 	orig_pte_p = pte_p = PT_ENTRY_NULL;
4697 	orig_pve_p = pve_p = PV_ENTRY_NULL;
4698 	pveh_p = PV_ENTRY_NULL;
4699 	pvet_p = PV_ENTRY_NULL;
4700 	new_pve_p = PV_ENTRY_NULL;
4701 	new_pte_p = PT_ENTRY_NULL;
4702 
4703 
4704 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4705 		orig_pte_p = pte_p = pvh_ptep(pv_h);
4706 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4707 		orig_pve_p = pve_p = pvh_pve_list(pv_h);
4708 		pveh_p = pve_p;
4709 	} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
4710 		panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)phys);
4711 	}
4712 
4713 	/* Pass 1: Update all CPU PTEs and accounting info as necessary */
4714 	int pve_ptep_idx = 0;
4715 
4716 	/*
4717 	 * issue_tlbi is used to indicate that this function will need to issue at least one TLB
4718 	 * invalidation during pass 2.  tlb_flush_needed only indicates that PTE permissions have
4719 	 * changed and that a TLB flush will be needed *at some point*, so we'll need to call
4720 	 * FLUSH_PTE_STRONG() to synchronize prior PTE updates.  In the case of a flush_range
4721 	 * operation, TLB invalidation may be handled by the caller so it's possible for
4722 	 * tlb_flush_needed to be true while issue_tlbi is false.
4723 	 */
4724 	bool issue_tlbi = false;
4725 	bool tlb_flush_needed = false;
4726 	const bool compress = ((options & PMAP_OPTIONS_COMPRESSOR) && ppattr_test_internal(pai));
4727 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4728 		pt_entry_t tmplate = ARM_PTE_TYPE_FAULT;
4729 		bool update = false;
4730 
4731 		if (pve_p != PV_ENTRY_NULL) {
4732 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
4733 			if (pte_p == PT_ENTRY_NULL) {
4734 				goto protect_skip_pve_pass1;
4735 			}
4736 		}
4737 
4738 #ifdef PVH_FLAG_IOMMU
4739 		if (pvh_ptep_is_iommu(pte_p)) {
4740 #if XNU_MONITOR
4741 			if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN_MASK)) {
4742 				panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu %p, pve_p=%p",
4743 				    ppnum, ptep_get_iommu(pte_p), pve_p);
4744 			}
4745 #endif
4746 			if (remove && (options & PMAP_OPTIONS_COMPRESSOR)) {
4747 				panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu %p, pve_p=%p",
4748 				    ppnum, ptep_get_iommu(pte_p), pve_p);
4749 			}
4750 			goto protect_skip_pve_pass1;
4751 		}
4752 #endif
4753 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
4754 		const pmap_t pmap = ptdp->pmap;
4755 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
4756 
4757 		if (__improbable((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum))) {
4758 #if MACH_ASSERT
4759 			if ((pmap != NULL) && (pve_p != PV_ENTRY_NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4760 				/* Temporarily set PTEP to NULL so that the logic below doesn't pick it up as duplicate. */
4761 				pt_entry_t *temp_ptep = pve_get_ptep(pve_p, pve_ptep_idx);
4762 				pve_set_ptep(pve_p, pve_ptep_idx, PT_ENTRY_NULL);
4763 
4764 				pv_entry_t *check_pvep = pve_p;
4765 
4766 				do {
4767 					if (pve_find_ptep_index(check_pvep, pte_p) != -1) {
4768 						panic_plain("%s: duplicate pve entry ptep=%p pmap=%p, pvh=%p, "
4769 						    "pvep=%p, pai=0x%x", __func__, pte_p, pmap, pv_h, pve_p, pai);
4770 					}
4771 				} while ((check_pvep = pve_next(check_pvep)) != PV_ENTRY_NULL);
4772 
4773 				/* Restore previous PTEP value. */
4774 				pve_set_ptep(pve_p, pve_ptep_idx, temp_ptep);
4775 			}
4776 #endif
4777 			panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x",
4778 			    pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
4779 		}
4780 
4781 #if DEVELOPMENT || DEBUG
4782 		if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
4783 #else
4784 		if ((prot & VM_PROT_EXECUTE))
4785 #endif
4786 		{
4787 			set_NX = false;
4788 		} else {
4789 			set_NX = true;
4790 		}
4791 
4792 		/* Remove the mapping if new protection is NONE */
4793 		if (remove) {
4794 			const bool is_altacct = ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx);
4795 			const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4796 			pt_entry_t spte = *pte_p;
4797 
4798 			if (pte_is_wired(spte)) {
4799 				pte_set_wired(pmap, pte_p, 0);
4800 				spte = *pte_p;
4801 				if (pmap != kernel_pmap) {
4802 					pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4803 				}
4804 			}
4805 
4806 			assertf(atop(pte_to_pa(spte)) == ppnum, "unexpected value 0x%llx for pte %p mapping ppnum 0x%x",
4807 			    (uint64_t)spte, pte_p, ppnum);
4808 
4809 			if (compress && (pmap != kernel_pmap)) {
4810 				assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
4811 				/* mark this PTE as having been "compressed" */
4812 				tmplate = ARM_PTE_COMPRESSED;
4813 				if (is_altacct) {
4814 					tmplate |= ARM_PTE_COMPRESSED_ALT;
4815 				}
4816 			} else {
4817 				tmplate = ARM_PTE_TYPE_FAULT;
4818 			}
4819 
4820 			assert(spte != tmplate);
4821 			write_pte_fast(pte_p, tmplate);
4822 			update = true;
4823 			++pass1_updated;
4824 
4825 			pmap_ledger_debit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4826 
4827 #if MACH_ASSERT
4828 			/*
4829 			 * We only ever compress internal pages.
4830 			 */
4831 			if (options & PMAP_OPTIONS_COMPRESSOR) {
4832 				assert(ppattr_test_internal(pai));
4833 			}
4834 #endif
4835 
4836 			if (pmap != kernel_pmap) {
4837 				if (ppattr_test_reusable(pai) &&
4838 				    ppattr_test_internal(pai) &&
4839 				    !is_altacct) {
4840 					pmap_ledger_debit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4841 				} else if (!ppattr_test_internal(pai)) {
4842 					pmap_ledger_debit(pmap, task_ledgers.external, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4843 				}
4844 
4845 				if (is_altacct) {
4846 					assert(ppattr_test_internal(pai));
4847 					pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4848 					pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4849 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4850 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4851 						pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4852 					}
4853 					ppattr_pve_clr_altacct(pai, pve_p, pve_ptep_idx);
4854 				} else if (ppattr_test_reusable(pai)) {
4855 					assert(ppattr_test_internal(pai));
4856 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4857 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4858 						/* was not in footprint, but is now */
4859 						pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4860 					}
4861 				} else if (ppattr_test_internal(pai)) {
4862 					pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4863 
4864 					/*
4865 					 * Update all stats related to physical footprint, which only
4866 					 * deals with internal pages.
4867 					 */
4868 					if (options & PMAP_OPTIONS_COMPRESSOR) {
4869 						/*
4870 						 * This removal is only being done so we can send this page to
4871 						 * the compressor; therefore it mustn't affect total task footprint.
4872 						 */
4873 						pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4874 					} else {
4875 						/*
4876 						 * This internal page isn't going to the compressor, so adjust stats to keep
4877 						 * phys_footprint up to date.
4878 						 */
4879 						pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
4880 					}
4881 				} else {
4882 					/* external page: no impact on ledgers */
4883 				}
4884 			}
4885 			assert((pve_p == PV_ENTRY_NULL) || !pve_get_altacct(pve_p, pve_ptep_idx));
4886 		} else {
4887 			pt_entry_t spte = *pte_p;
4888 			const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
4889 
4890 			if (pmap == kernel_pmap) {
4891 				tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
4892 			} else {
4893 				tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
4894 			}
4895 
4896 			/*
4897 			 * While the naive implementation of this would serve to add execute
4898 			 * permission, this is not how the VM uses this interface, or how
4899 			 * x86_64 implements it.  So ignore requests to add execute permissions.
4900 			 */
4901 			if (set_NX) {
4902 				tmplate |= pt_attr_leaf_xn(pt_attr);
4903 			}
4904 
4905 
4906 			assert(spte != ARM_PTE_TYPE_FAULT);
4907 			assert(!ARM_PTE_IS_COMPRESSED(spte, pte_p));
4908 
4909 			if (spte != tmplate) {
4910 				/*
4911 				 * Mark the PTE so that we'll know this mapping requires a TLB flush in pass 2.
4912 				 * This allows us to avoid unnecessary flushing e.g. for COW aliases that didn't
4913 				 * require permission updates.  We use the ARM_PTE_WRITEABLE bit as that bit
4914 				 * should always be cleared by this function.
4915 				 */
4916 				pte_set_was_writeable(tmplate, true);
4917 				write_pte_fast(pte_p, tmplate);
4918 				update = true;
4919 				++pass1_updated;
4920 			} else if (pte_was_writeable(tmplate)) {
4921 				/*
4922 				 * We didn't change any of the relevant permission bits in the PTE, so we don't need
4923 				 * to flush the TLB, but we do want to clear the "was_writeable" flag.  When revoking
4924 				 * write access to a page, this function should always at least clear that flag for
4925 				 * all PTEs, as the VM is effectively requesting that subsequent write accesses to
4926 				 * these mappings go through vm_fault().  We therefore don't want those accesses to
4927 				 * be handled through arm_fast_fault().
4928 				 */
4929 				pte_set_was_writeable(tmplate, false);
4930 				write_pte_fast(pte_p, tmplate);
4931 			}
4932 		}
4933 
4934 		if (!issue_tlbi && update && !(options & PMAP_OPTIONS_NOFLUSH)) {
4935 			tlb_flush_needed = true;
4936 			if (remove || !flush_range || (flush_range->ptfr_pmap != pmap) ||
4937 			    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
4938 				issue_tlbi = true;
4939 			}
4940 		}
4941 protect_skip_pve_pass1:
4942 		pte_p = PT_ENTRY_NULL;
4943 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
4944 			pve_ptep_idx = 0;
4945 			pve_p = pve_next(pve_p);
4946 		}
4947 	}
4948 
4949 	if (tlb_flush_needed) {
4950 		FLUSH_PTE_STRONG();
4951 	}
4952 
4953 	if (!remove && !issue_tlbi) {
4954 		goto protect_finish;
4955 	}
4956 
4957 	/* Pass 2: Invalidate TLBs and update the list to remove CPU mappings */
4958 	pv_entry_t **pve_pp = pv_h;
4959 	pve_p = orig_pve_p;
4960 	pte_p = orig_pte_p;
4961 	pve_ptep_idx = 0;
4962 
4963 	/*
4964 	 * We need to keep track of whether a particular PVE list contains IOMMU
4965 	 * mappings when removing entries, because we should only remove CPU
4966 	 * mappings. If a PVE list contains at least one IOMMU mapping, we keep
4967 	 * it around.
4968 	 */
4969 	bool iommu_mapping_in_pve = false;
4970 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
4971 		if (pve_p != PV_ENTRY_NULL) {
4972 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
4973 			if (pte_p == PT_ENTRY_NULL) {
4974 				goto protect_skip_pve_pass2;
4975 			}
4976 		}
4977 
4978 #ifdef PVH_FLAG_IOMMU
4979 		if (pvh_ptep_is_iommu(pte_p)) {
4980 			iommu_mapping_in_pve = true;
4981 			if (remove && (pve_p == PV_ENTRY_NULL)) {
4982 				/*
4983 				 * We've found an IOMMU entry and it's the only entry in the PV list.
4984 				 * We don't discard IOMMU entries, so simply set up the new PV list to
4985 				 * contain the single IOMMU PTE and exit the loop.
4986 				 */
4987 				new_pte_p = pte_p;
4988 				break;
4989 			}
4990 			goto protect_skip_pve_pass2;
4991 		}
4992 #endif
4993 		pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
4994 		const pmap_t pmap = ptdp->pmap;
4995 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
4996 
4997 		if (remove) {
4998 			if (!compress && (pmap != kernel_pmap)) {
4999 				/*
5000 				 * We must wait to decrement the refcount until we're completely finished using the PTE
5001 				 * on this path.  Otherwise, if we happened to drop the refcount to zero, a concurrent
5002 				 * pmap_remove() call might observe the zero refcount and free the pagetable out from
5003 				 * under us.
5004 				 */
5005 				if (OSAddAtomic16(-1, (SInt16 *) &(ptd_get_info(ptdp, pte_p)->refcnt)) <= 0) {
5006 					panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
5007 				}
5008 			}
5009 			/* Remove this CPU mapping from PVE list. */
5010 			if (pve_p != PV_ENTRY_NULL) {
5011 				pve_set_ptep(pve_p, pve_ptep_idx, PT_ENTRY_NULL);
5012 			}
5013 		} else {
5014 			pt_entry_t spte = *pte_p;
5015 			if (pte_was_writeable(spte)) {
5016 				pte_set_was_writeable(spte, false);
5017 				write_pte_fast(pte_p, spte);
5018 			} else {
5019 				goto protect_skip_pve_pass2;
5020 			}
5021 		}
5022 		++pass2_updated;
5023 		if (remove || !flush_range || (flush_range->ptfr_pmap != pmap) ||
5024 		    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
5025 			pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
5026 			    pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
5027 		}
5028 
5029 protect_skip_pve_pass2:
5030 		pte_p = PT_ENTRY_NULL;
5031 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
5032 			pve_ptep_idx = 0;
5033 
5034 			if (remove) {
5035 				/**
5036 				 * If there are any IOMMU mappings in the PVE list, preserve
5037 				 * those mappings in a new PVE list (new_pve_p) which will later
5038 				 * become the new PVH entry. Keep track of the CPU mappings in
5039 				 * pveh_p/pvet_p so they can be deallocated later.
5040 				 */
5041 				if (iommu_mapping_in_pve) {
5042 					iommu_mapping_in_pve = false;
5043 					pv_entry_t *temp_pve_p = pve_next(pve_p);
5044 					pve_remove(pv_h, pve_pp, pve_p);
5045 					pveh_p = pvh_pve_list(pv_h);
5046 					pve_p->pve_next = new_pve_p;
5047 					new_pve_p = pve_p;
5048 					pve_p = temp_pve_p;
5049 					continue;
5050 				} else {
5051 					pvet_p = pve_p;
5052 					pvh_cnt++;
5053 				}
5054 			}
5055 
5056 			pve_pp = pve_next_ptr(pve_p);
5057 			pve_p = pve_next(pve_p);
5058 			iommu_mapping_in_pve = false;
5059 		}
5060 	}
5061 
5062 protect_finish:
5063 
5064 #ifdef PVH_FLAG_EXEC
5065 	if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
5066 		pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
5067 	}
5068 #endif
5069 	if (__improbable(pass1_updated != pass2_updated)) {
5070 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
5071 		    __func__, pass1_updated, pass2_updated);
5072 	}
5073 	/* if we removed a bunch of entries, take care of them now */
5074 	if (remove) {
5075 		if (new_pve_p != PV_ENTRY_NULL) {
5076 			pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
5077 			pvh_set_flags(pv_h, pvh_flags);
5078 		} else if (new_pte_p != PT_ENTRY_NULL) {
5079 			pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
5080 			pvh_set_flags(pv_h, pvh_flags);
5081 		} else {
5082 			pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5083 		}
5084 	}
5085 
5086 	if (flush_range && tlb_flush_needed) {
5087 		if (!remove) {
5088 			flush_range->ptfr_flush_needed = true;
5089 			tlb_flush_needed = false;
5090 		}
5091 	}
5092 
5093 	/*
5094 	 * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
5095 	 * lock to allow the backing pages to be repurposed.  This is a security precaution, aimed
5096 	 * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
5097 	 * a page to be repurposed while it is still live in the TLBs.
5098 	 */
5099 	if (remove && tlb_flush_needed) {
5100 		sync_tlb_flush();
5101 	}
5102 
5103 	pvh_unlock(pai);
5104 
5105 	if (remove) {
5106 		os_atomic_store(&pmap_cpu_data->inflight_disconnect, false, release);
5107 #if !XNU_MONITOR
5108 		mp_enable_preemption();
5109 #endif
5110 	}
5111 
5112 	if (!remove && tlb_flush_needed) {
5113 		sync_tlb_flush();
5114 	}
5115 
5116 	if (remove && (pvet_p != PV_ENTRY_NULL)) {
5117 		pv_list_free(pveh_p, pvet_p, pvh_cnt);
5118 	}
5119 }
5120 
5121 MARK_AS_PMAP_TEXT void
5122 pmap_page_protect_options_internal(
5123 	ppnum_t ppnum,
5124 	vm_prot_t prot,
5125 	unsigned int options,
5126 	void *arg)
5127 {
5128 	if (arg != NULL) {
5129 		/*
5130 		 * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
5131 		 * ultimately be flushed.  The nature of ARM TLB maintenance is such that we can flush the
5132 		 * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
5133 		 * model requires that we not exit the PPL without performing required TLB flushes anyway.
5134 		 * In that case, force the flush to take place.
5135 		 */
5136 		options &= ~PMAP_OPTIONS_NOFLUSH;
5137 	}
5138 	pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
5139 }
5140 
5141 void
5142 pmap_page_protect_options(
5143 	ppnum_t ppnum,
5144 	vm_prot_t prot,
5145 	unsigned int options,
5146 	void *arg)
5147 {
5148 	pmap_paddr_t    phys = ptoa(ppnum);
5149 
5150 	assert(ppnum != vm_page_fictitious_addr);
5151 
5152 	/* Only work with managed pages. */
5153 	if (!pa_valid(phys)) {
5154 		return;
5155 	}
5156 
5157 	/*
5158 	 * Determine the new protection.
5159 	 */
5160 	if (prot == VM_PROT_ALL) {
5161 		return;         /* nothing to do */
5162 	}
5163 
5164 	PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5165 
5166 #if XNU_MONITOR
5167 	pmap_page_protect_options_ppl(ppnum, prot, options, arg);
5168 #else
5169 	pmap_page_protect_options_internal(ppnum, prot, options, arg);
5170 #endif
5171 
5172 	PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5173 }
5174 
5175 
5176 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
5177 MARK_AS_PMAP_TEXT void
5178 pmap_disable_user_jop_internal(pmap_t pmap)
5179 {
5180 	if (pmap == kernel_pmap) {
5181 		panic("%s: called with kernel_pmap", __func__);
5182 	}
5183 	validate_pmap_mutable(pmap);
5184 	pmap->disable_jop = true;
5185 }
5186 
5187 void
5188 pmap_disable_user_jop(pmap_t pmap)
5189 {
5190 #if XNU_MONITOR
5191 	pmap_disable_user_jop_ppl(pmap);
5192 #else
5193 	pmap_disable_user_jop_internal(pmap);
5194 #endif
5195 }
5196 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
5197 
5198 /*
5199  * Indicates if the pmap layer enforces some additional restrictions on the
5200  * given set of protections.
5201  */
5202 bool
5203 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
5204 {
5205 	return false;
5206 }
5207 
5208 /*
5209  *	Set the physical protection on the
5210  *	specified range of this map as requested.
5211  *	VERY IMPORTANT: Will not increase permissions.
5212  *	VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5213  */
5214 void
5215 pmap_protect(
5216 	pmap_t pmap,
5217 	vm_map_address_t b,
5218 	vm_map_address_t e,
5219 	vm_prot_t prot)
5220 {
5221 	pmap_protect_options(pmap, b, e, prot, 0, NULL);
5222 }
5223 
5224 MARK_AS_PMAP_TEXT vm_map_address_t
5225 pmap_protect_options_internal(
5226 	pmap_t pmap,
5227 	vm_map_address_t start,
5228 	vm_map_address_t end,
5229 	vm_prot_t prot,
5230 	unsigned int options,
5231 	__unused void *args)
5232 {
5233 	tt_entry_t      *tte_p;
5234 	pt_entry_t      *bpte_p, *epte_p;
5235 	pt_entry_t      *pte_p;
5236 	boolean_t        set_NX = TRUE;
5237 #if (__ARM_VMSA__ > 7)
5238 	boolean_t        set_XO = FALSE;
5239 #endif
5240 	boolean_t        should_have_removed = FALSE;
5241 	bool             need_strong_sync = false;
5242 
5243 	/* Validate the pmap input before accessing its data. */
5244 	validate_pmap_mutable(pmap);
5245 
5246 	const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5247 
5248 	if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
5249 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
5250 	}
5251 
5252 #if DEVELOPMENT || DEBUG
5253 	if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5254 		if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5255 			should_have_removed = TRUE;
5256 		}
5257 	} else
5258 #endif
5259 	{
5260 		/* Determine the new protection. */
5261 		switch (prot) {
5262 #if (__ARM_VMSA__ > 7)
5263 		case VM_PROT_EXECUTE:
5264 			set_XO = TRUE;
5265 			OS_FALLTHROUGH;
5266 #endif
5267 		case VM_PROT_READ:
5268 		case VM_PROT_READ | VM_PROT_EXECUTE:
5269 			break;
5270 		case VM_PROT_READ | VM_PROT_WRITE:
5271 		case VM_PROT_ALL:
5272 			return end;         /* nothing to do */
5273 		default:
5274 			should_have_removed = TRUE;
5275 		}
5276 	}
5277 
5278 	if (should_have_removed) {
5279 		panic("%s: should have been a remove operation, "
5280 		    "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5281 		    __FUNCTION__,
5282 		    pmap, (void *)start, (void *)end, prot, options, args);
5283 	}
5284 
5285 #if DEVELOPMENT || DEBUG
5286 	if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5287 #else
5288 	if ((prot & VM_PROT_EXECUTE))
5289 #endif
5290 	{
5291 		set_NX = FALSE;
5292 	} else {
5293 		set_NX = TRUE;
5294 	}
5295 
5296 	const uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
5297 	vm_map_address_t va = start;
5298 	unsigned int npages = 0;
5299 
5300 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
5301 
5302 	tte_p = pmap_tte(pmap, start);
5303 
5304 	if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5305 		bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5306 		bpte_p = &bpte_p[pte_index(pt_attr, start)];
5307 		epte_p = bpte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
5308 		pte_p = bpte_p;
5309 
5310 		for (pte_p = bpte_p;
5311 		    pte_p < epte_p;
5312 		    pte_p += PAGE_RATIO, va += pmap_page_size) {
5313 			++npages;
5314 			if (__improbable(!(npages % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
5315 			    pmap_pending_preemption())) {
5316 				break;
5317 			}
5318 			pt_entry_t spte;
5319 #if DEVELOPMENT || DEBUG
5320 			boolean_t  force_write = FALSE;
5321 #endif
5322 
5323 			spte = *((volatile pt_entry_t*)pte_p);
5324 
5325 			if ((spte == ARM_PTE_TYPE_FAULT) ||
5326 			    ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5327 				continue;
5328 			}
5329 
5330 			pmap_paddr_t    pa;
5331 			unsigned int    pai = 0;
5332 			boolean_t       managed = FALSE;
5333 
5334 			while (!managed) {
5335 				/*
5336 				 * It may be possible for the pte to transition from managed
5337 				 * to unmanaged in this timeframe; for now, elide the assert.
5338 				 * We should break out as a consequence of checking pa_valid.
5339 				 */
5340 				// assert(!ARM_PTE_IS_COMPRESSED(spte));
5341 				pa = pte_to_pa(spte);
5342 				if (!pa_valid(pa)) {
5343 					break;
5344 				}
5345 				pai = pa_index(pa);
5346 				pvh_lock(pai);
5347 				spte = *((volatile pt_entry_t*)pte_p);
5348 				pa = pte_to_pa(spte);
5349 				if (pai == pa_index(pa)) {
5350 					managed = TRUE;
5351 					break; // Leave the PVH locked as we will unlock it after we free the PTE
5352 				}
5353 				pvh_unlock(pai);
5354 			}
5355 
5356 			if ((spte == ARM_PTE_TYPE_FAULT) ||
5357 			    ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5358 				continue;
5359 			}
5360 
5361 			pt_entry_t      tmplate;
5362 
5363 			if (pmap == kernel_pmap) {
5364 #if DEVELOPMENT || DEBUG
5365 				if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5366 					force_write = TRUE;
5367 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5368 				} else
5369 #endif
5370 				{
5371 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5372 				}
5373 			} else {
5374 #if DEVELOPMENT || DEBUG
5375 				if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5376 					assert(pmap->type != PMAP_TYPE_NESTED);
5377 					force_write = TRUE;
5378 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
5379 				} else
5380 #endif
5381 				{
5382 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5383 				}
5384 			}
5385 
5386 			/*
5387 			 * XXX Removing "NX" would
5388 			 * grant "execute" access
5389 			 * immediately, bypassing any
5390 			 * checks VM might want to do
5391 			 * in its soft fault path.
5392 			 * pmap_protect() and co. are
5393 			 * not allowed to increase
5394 			 * access permissions.
5395 			 */
5396 			if (set_NX) {
5397 				tmplate |= pt_attr_leaf_xn(pt_attr);
5398 			} else {
5399 #if     (__ARM_VMSA__ > 7)
5400 				if (pmap == kernel_pmap) {
5401 					/* do NOT clear "PNX"! */
5402 					tmplate |= ARM_PTE_NX;
5403 				} else {
5404 					/* do NOT clear "NX"! */
5405 					tmplate |= pt_attr_leaf_x(pt_attr);
5406 					if (set_XO) {
5407 						tmplate &= ~ARM_PTE_APMASK;
5408 						tmplate |= pt_attr_leaf_rona(pt_attr);
5409 					}
5410 				}
5411 #endif
5412 			}
5413 
5414 #if DEVELOPMENT || DEBUG
5415 			if (force_write) {
5416 				/*
5417 				 * TODO: Run CS/Monitor checks here.
5418 				 */
5419 				if (managed) {
5420 					/*
5421 					 * We are marking the page as writable,
5422 					 * so we consider it to be modified and
5423 					 * referenced.
5424 					 */
5425 					ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5426 					tmplate |= ARM_PTE_AF;
5427 
5428 					if (ppattr_test_reffault(pai)) {
5429 						ppattr_clear_reffault(pai);
5430 					}
5431 
5432 					if (ppattr_test_modfault(pai)) {
5433 						ppattr_clear_modfault(pai);
5434 					}
5435 				}
5436 			} else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5437 				/*
5438 				 * An immediate request for anything other than
5439 				 * write should still mark the page as
5440 				 * referenced if managed.
5441 				 */
5442 				if (managed) {
5443 					ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
5444 					tmplate |= ARM_PTE_AF;
5445 
5446 					if (ppattr_test_reffault(pai)) {
5447 						ppattr_clear_reffault(pai);
5448 					}
5449 				}
5450 			}
5451 #endif
5452 
5453 			/* We do not expect to write fast fault the entry. */
5454 			pte_set_was_writeable(tmplate, false);
5455 
5456 			write_pte_fast(pte_p, tmplate);
5457 
5458 			if (managed) {
5459 				pvh_assert_locked(pai);
5460 				pvh_unlock(pai);
5461 			}
5462 		}
5463 		FLUSH_PTE_STRONG();
5464 		PMAP_UPDATE_TLBS(pmap, start, va, need_strong_sync, true);
5465 	} else {
5466 		va = end;
5467 	}
5468 
5469 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
5470 	return va;
5471 }
5472 
5473 void
5474 pmap_protect_options(
5475 	pmap_t pmap,
5476 	vm_map_address_t b,
5477 	vm_map_address_t e,
5478 	vm_prot_t prot,
5479 	unsigned int options,
5480 	__unused void *args)
5481 {
5482 	vm_map_address_t l, beg;
5483 
5484 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5485 
5486 	if ((b | e) & pt_attr_leaf_offmask(pt_attr)) {
5487 		panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx",
5488 		    pmap, (uint64_t)b, (uint64_t)e);
5489 	}
5490 
5491 	/*
5492 	 * We allow single-page requests to execute non-preemptibly,
5493 	 * as it doesn't make sense to sample AST_URGENT for a single-page
5494 	 * operation, and there are a couple of special use cases that
5495 	 * require a non-preemptible single-page operation.
5496 	 */
5497 	if ((e - b) > pt_attr_page_size(pt_attr)) {
5498 		pmap_verify_preemptible();
5499 	}
5500 
5501 #if DEVELOPMENT || DEBUG
5502 	if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5503 		if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5504 			pmap_remove_options(pmap, b, e, options);
5505 			return;
5506 		}
5507 	} else
5508 #endif
5509 	{
5510 		/* Determine the new protection. */
5511 		switch (prot) {
5512 		case VM_PROT_EXECUTE:
5513 		case VM_PROT_READ:
5514 		case VM_PROT_READ | VM_PROT_EXECUTE:
5515 			break;
5516 		case VM_PROT_READ | VM_PROT_WRITE:
5517 		case VM_PROT_ALL:
5518 			return;         /* nothing to do */
5519 		default:
5520 			pmap_remove_options(pmap, b, e, options);
5521 			return;
5522 		}
5523 	}
5524 
5525 	PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5526 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5527 	    VM_KERNEL_ADDRHIDE(e));
5528 
5529 	beg = b;
5530 
5531 	while (beg < e) {
5532 		l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5533 
5534 		if (l > e) {
5535 			l = e;
5536 		}
5537 
5538 #if XNU_MONITOR
5539 		beg = pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
5540 #else
5541 		beg = pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5542 #endif
5543 	}
5544 
5545 	PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5546 }
5547 
5548 /**
5549  * Inserts an arbitrary number of physical pages ("block") in a pmap.
5550  *
5551  * @param pmap pmap to insert the pages into.
5552  * @param va virtual address to map the pages into.
5553  * @param pa page number of the first physical page to map.
5554  * @param size block size, in number of pages.
5555  * @param prot mapping protection attributes.
5556  * @param attr flags to pass to pmap_enter().
5557  *
5558  * @return KERN_SUCCESS.
5559  */
5560 kern_return_t
5561 pmap_map_block(
5562 	pmap_t pmap,
5563 	addr64_t va,
5564 	ppnum_t pa,
5565 	uint32_t size,
5566 	vm_prot_t prot,
5567 	int attr,
5568 	unsigned int flags)
5569 {
5570 	return pmap_map_block_addr(pmap, va, ((pmap_paddr_t)pa) << PAGE_SHIFT, size, prot, attr, flags);
5571 }
5572 
5573 /**
5574  * Inserts an arbitrary number of physical pages ("block") in a pmap.
5575  * As opposed to pmap_map_block(), this function takes
5576  * a physical address as an input and operates using the
5577  * page size associated with the input pmap.
5578  *
5579  * @param pmap pmap to insert the pages into.
5580  * @param va virtual address to map the pages into.
5581  * @param pa physical address of the first physical page to map.
5582  * @param size block size, in number of pages.
5583  * @param prot mapping protection attributes.
5584  * @param attr flags to pass to pmap_enter().
5585  *
5586  * @return KERN_SUCCESS.
5587  */
5588 kern_return_t
5589 pmap_map_block_addr(
5590 	pmap_t pmap,
5591 	addr64_t va,
5592 	pmap_paddr_t pa,
5593 	uint32_t size,
5594 	vm_prot_t prot,
5595 	int attr,
5596 	unsigned int flags)
5597 {
5598 #if __ARM_MIXED_PAGE_SIZE__
5599 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5600 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
5601 #else
5602 	const uint64_t pmap_page_size = PAGE_SIZE;
5603 #endif
5604 
5605 	for (ppnum_t page = 0; page < size; page++) {
5606 		if (pmap_enter_addr(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE) != KERN_SUCCESS) {
5607 			panic("%s: failed pmap_enter_addr, "
5608 			    "pmap=%p, va=%#llx, pa=%llu, size=%u, prot=%#x, flags=%#x",
5609 			    __FUNCTION__,
5610 			    pmap, va, (uint64_t)pa, size, prot, flags);
5611 		}
5612 
5613 		va += pmap_page_size;
5614 		pa += pmap_page_size;
5615 	}
5616 
5617 	return KERN_SUCCESS;
5618 }
5619 
5620 kern_return_t
5621 pmap_enter_addr(
5622 	pmap_t pmap,
5623 	vm_map_address_t v,
5624 	pmap_paddr_t pa,
5625 	vm_prot_t prot,
5626 	vm_prot_t fault_type,
5627 	unsigned int flags,
5628 	boolean_t wired)
5629 {
5630 	return pmap_enter_options_addr(pmap, v, pa, prot, fault_type, flags, wired, 0, NULL);
5631 }
5632 
5633 /*
5634  *	Insert the given physical page (p) at
5635  *	the specified virtual address (v) in the
5636  *	target physical map with the protection requested.
5637  *
5638  *	If specified, the page will be wired down, meaning
5639  *	that the related pte can not be reclaimed.
5640  *
5641  *	NB:  This is the only routine which MAY NOT lazy-evaluate
5642  *	or lose information.  That is, this routine must actually
5643  *	insert this page into the given map eventually (must make
5644  *	forward progress eventually.
5645  */
5646 kern_return_t
5647 pmap_enter(
5648 	pmap_t pmap,
5649 	vm_map_address_t v,
5650 	ppnum_t pn,
5651 	vm_prot_t prot,
5652 	vm_prot_t fault_type,
5653 	unsigned int flags,
5654 	boolean_t wired)
5655 {
5656 	return pmap_enter_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired);
5657 }
5658 
5659 /*
5660  * Attempt to commit the pte.
5661  * Succeeds iff able to change *pte_p from old_pte to new_pte.
5662  * Performs no page table or accounting writes on failures.
5663  */
5664 static inline bool
5665 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t *old_pte, pt_entry_t new_pte, vm_map_address_t v)
5666 {
5667 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5668 	bool success = false, changed_wiring = false;
5669 
5670 	__unreachable_ok_push
5671 	if (TEST_PAGE_RATIO_4) {
5672 		/*
5673 		 * 16K virtual pages w/ 4K hw pages.
5674 		 * We actually need to update 4 ptes here which can't easily be done atomically.
5675 		 * As a result we require the exclusive pmap lock.
5676 		 */
5677 		pmap_assert_locked(pmap, PMAP_LOCK_EXCLUSIVE);
5678 		*old_pte = *pte_p;
5679 		if (*old_pte == new_pte) {
5680 			/* Another thread completed this operation. Nothing to do here. */
5681 			success = true;
5682 		} else if (pa_valid(pte_to_pa(new_pte)) && pte_to_pa(*old_pte) != pte_to_pa(new_pte) &&
5683 		    (*old_pte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE) {
5684 			/* pte has been modified by another thread and we hold the wrong PVH lock. Retry. */
5685 			success = false;
5686 		} else {
5687 			write_pte_fast(pte_p, new_pte);
5688 			success = true;
5689 		}
5690 	} else {
5691 		success = os_atomic_cmpxchgv(pte_p, *old_pte, new_pte, old_pte, acq_rel);
5692 	}
5693 	__unreachable_ok_pop
5694 
5695 	if (success && *old_pte != new_pte) {
5696 		if ((*old_pte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE) {
5697 			FLUSH_PTE_STRONG();
5698 			PMAP_UPDATE_TLBS(pmap, v, v + (pt_attr_page_size(pt_attr) * PAGE_RATIO), false, true);
5699 		} else {
5700 			FLUSH_PTE();
5701 			__builtin_arm_isb(ISB_SY);
5702 		}
5703 		changed_wiring = ARM_PTE_IS_COMPRESSED(*old_pte, pte_p) ?
5704 		    (new_pte & ARM_PTE_WIRED) != 0 :
5705 		    (new_pte & ARM_PTE_WIRED) != (*old_pte & ARM_PTE_WIRED);
5706 
5707 		if (pmap != kernel_pmap && changed_wiring) {
5708 			SInt16  *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_info(pte_p)->wiredcnt);
5709 			if (new_pte & ARM_PTE_WIRED) {
5710 				OSAddAtomic16(1, ptd_wiredcnt_ptr);
5711 				pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
5712 			} else {
5713 				OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5714 				pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
5715 			}
5716 		}
5717 
5718 		PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr), PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap),
5719 		    VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + (pt_attr_page_size(pt_attr) * PAGE_RATIO)), new_pte);
5720 	}
5721 	return success;
5722 }
5723 
5724 MARK_AS_PMAP_TEXT static pt_entry_t
5725 wimg_to_pte(unsigned int wimg, __unused pmap_paddr_t pa)
5726 {
5727 	pt_entry_t pte;
5728 
5729 	switch (wimg & (VM_WIMG_MASK)) {
5730 	case VM_WIMG_IO:
5731 		// Map DRAM addresses with VM_WIMG_IO as Device-GRE instead of
5732 		// Device-nGnRnE. On H14+, accesses to them can be reordered by
5733 		// AP, while preserving the security benefits of using device
5734 		// mapping against side-channel attacks. On pre-H14 platforms,
5735 		// the accesses will still be strongly ordered.
5736 		if (is_dram_addr(pa)) {
5737 			pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
5738 		} else {
5739 			pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5740 		}
5741 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5742 		break;
5743 	case VM_WIMG_RT:
5744 #if HAS_UCNORMAL_MEM
5745 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5746 #else
5747 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5748 #endif
5749 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5750 		break;
5751 	case VM_WIMG_POSTED:
5752 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5753 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5754 		break;
5755 	case VM_WIMG_POSTED_REORDERED:
5756 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
5757 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5758 		break;
5759 	case VM_WIMG_POSTED_COMBINED_REORDERED:
5760 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
5761 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5762 		break;
5763 	case VM_WIMG_WCOMB:
5764 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5765 		pte |= ARM_PTE_NX | ARM_PTE_PNX;
5766 		break;
5767 	case VM_WIMG_WTHRU:
5768 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5769 #if     (__ARM_VMSA__ > 7)
5770 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5771 #else
5772 		pte |= ARM_PTE_SH;
5773 #endif
5774 		break;
5775 	case VM_WIMG_COPYBACK:
5776 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5777 #if     (__ARM_VMSA__ > 7)
5778 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5779 #else
5780 		pte |= ARM_PTE_SH;
5781 #endif
5782 		break;
5783 	case VM_WIMG_INNERWBACK:
5784 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5785 #if     (__ARM_VMSA__ > 7)
5786 		pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5787 #else
5788 		pte |= ARM_PTE_SH;
5789 #endif
5790 		break;
5791 	default:
5792 		pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5793 #if     (__ARM_VMSA__ > 7)
5794 		pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5795 #else
5796 		pte |= ARM_PTE_SH;
5797 #endif
5798 	}
5799 
5800 	return pte;
5801 }
5802 
5803 
5804 /*
5805  * Construct a PTE (and the physical page attributes) for the given virtual to
5806  * physical mapping.
5807  *
5808  * This function has no side effects and is safe to call so that it is safe to
5809  * call while attempting a pmap_enter transaction.
5810  */
5811 MARK_AS_PMAP_TEXT static pt_entry_t
5812 pmap_construct_pte(
5813 	const pmap_t pmap,
5814 	vm_map_address_t va,
5815 	pmap_paddr_t pa,
5816 	vm_prot_t prot,
5817 	vm_prot_t fault_type,
5818 	boolean_t wired,
5819 	const pt_attr_t* const pt_attr,
5820 	uint16_t *pp_attr_bits /* OUTPUT */
5821 	)
5822 {
5823 	bool set_NX = false, set_XO = false;
5824 	pt_entry_t pte = pa_to_pte(pa) | ARM_PTE_TYPE;
5825 	assert(pp_attr_bits != NULL);
5826 	*pp_attr_bits = 0;
5827 
5828 	if (wired) {
5829 		pte |= ARM_PTE_WIRED;
5830 	}
5831 
5832 #if DEVELOPMENT || DEBUG
5833 	if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5834 #else
5835 	if ((prot & VM_PROT_EXECUTE))
5836 #endif
5837 	{
5838 		set_NX = false;
5839 	} else {
5840 		set_NX = true;
5841 	}
5842 
5843 #if (__ARM_VMSA__ > 7)
5844 	if (prot == VM_PROT_EXECUTE) {
5845 		set_XO = true;
5846 	}
5847 #endif
5848 
5849 	if (set_NX) {
5850 		pte |= pt_attr_leaf_xn(pt_attr);
5851 	} else {
5852 #if     (__ARM_VMSA__ > 7)
5853 		if (pmap == kernel_pmap) {
5854 			pte |= ARM_PTE_NX;
5855 		} else {
5856 			pte |= pt_attr_leaf_x(pt_attr);
5857 		}
5858 #endif
5859 	}
5860 
5861 	if (pmap == kernel_pmap) {
5862 #if __ARM_KERNEL_PROTECT__
5863 		pte |= ARM_PTE_NG;
5864 #endif /* __ARM_KERNEL_PROTECT__ */
5865 		if (prot & VM_PROT_WRITE) {
5866 			pte |= ARM_PTE_AP(AP_RWNA);
5867 			*pp_attr_bits |= PP_ATTR_MODIFIED | PP_ATTR_REFERENCED;
5868 		} else {
5869 			pte |= ARM_PTE_AP(AP_RONA);
5870 			*pp_attr_bits |= PP_ATTR_REFERENCED;
5871 		}
5872 #if     (__ARM_VMSA__ == 7)
5873 		if ((_COMM_PAGE_BASE_ADDRESS <= va) && (va < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
5874 			pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
5875 		}
5876 #endif
5877 	} else {
5878 		if (pmap->type != PMAP_TYPE_NESTED) {
5879 			pte |= ARM_PTE_NG;
5880 		} else if ((pmap->nested_region_asid_bitmap)
5881 		    && (va >= pmap->nested_region_addr)
5882 		    && (va < (pmap->nested_region_addr + pmap->nested_region_size))) {
5883 			unsigned int index = (unsigned int)((va - pmap->nested_region_addr)  >> pt_attr_twig_shift(pt_attr));
5884 
5885 			if ((pmap->nested_region_asid_bitmap)
5886 			    && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
5887 				pte |= ARM_PTE_NG;
5888 			}
5889 		}
5890 #if MACH_ASSERT
5891 		if (pmap->nested_pmap != NULL) {
5892 			vm_map_address_t nest_vaddr;
5893 			pt_entry_t *nest_pte_p;
5894 
5895 			nest_vaddr = va;
5896 
5897 			if ((nest_vaddr >= pmap->nested_region_addr)
5898 			    && (nest_vaddr < (pmap->nested_region_addr + pmap->nested_region_size))
5899 			    && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
5900 			    && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
5901 			    && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
5902 			    && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
5903 				unsigned int index = (unsigned int)((va - pmap->nested_region_addr)  >> pt_attr_twig_shift(pt_attr));
5904 
5905 				if ((pmap->nested_pmap->nested_region_asid_bitmap)
5906 				    && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
5907 					panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p va=0x%llx spte=0x%llx",
5908 					    nest_pte_p, pmap, (uint64_t)va, (uint64_t)*nest_pte_p);
5909 				}
5910 			}
5911 		}
5912 #endif
5913 		if (prot & VM_PROT_WRITE) {
5914 			assert(pmap->type != PMAP_TYPE_NESTED);
5915 			if (pa_valid(pa) && (!ppattr_pa_test_bits(pa, PP_ATTR_MODIFIED))) {
5916 				if (fault_type & VM_PROT_WRITE) {
5917 					if (set_XO) {
5918 						pte |= pt_attr_leaf_rwna(pt_attr);
5919 					} else {
5920 						pte |= pt_attr_leaf_rw(pt_attr);
5921 					}
5922 					*pp_attr_bits |= PP_ATTR_REFERENCED | PP_ATTR_MODIFIED;
5923 				} else {
5924 					if (set_XO) {
5925 						pte |= pt_attr_leaf_rona(pt_attr);
5926 					} else {
5927 						pte |= pt_attr_leaf_ro(pt_attr);
5928 					}
5929 					/*
5930 					 * Mark the page as MODFAULT so that a subsequent write
5931 					 * may be handled through arm_fast_fault().
5932 					 */
5933 					*pp_attr_bits |= PP_ATTR_REFERENCED | PP_ATTR_MODFAULT;
5934 					pte_set_was_writeable(pte, true);
5935 				}
5936 			} else {
5937 				if (set_XO) {
5938 					pte |= pt_attr_leaf_rwna(pt_attr);
5939 				} else {
5940 					pte |= pt_attr_leaf_rw(pt_attr);
5941 				}
5942 				*pp_attr_bits |= PP_ATTR_REFERENCED;
5943 			}
5944 		} else {
5945 			if (set_XO) {
5946 				pte |= pt_attr_leaf_rona(pt_attr);
5947 			} else {
5948 				pte |= pt_attr_leaf_ro(pt_attr);
5949 			}
5950 			*pp_attr_bits |= PP_ATTR_REFERENCED;
5951 		}
5952 	}
5953 
5954 	pte |= ARM_PTE_AF;
5955 	return pte;
5956 }
5957 
5958 MARK_AS_PMAP_TEXT kern_return_t
5959 pmap_enter_options_internal(
5960 	pmap_t pmap,
5961 	vm_map_address_t v,
5962 	pmap_paddr_t pa,
5963 	vm_prot_t prot,
5964 	vm_prot_t fault_type,
5965 	unsigned int flags,
5966 	boolean_t wired,
5967 	unsigned int options)
5968 {
5969 	ppnum_t         pn = (ppnum_t)atop(pa);
5970 	pt_entry_t      pte;
5971 	pt_entry_t      spte;
5972 	pt_entry_t      *pte_p;
5973 	bool            refcnt_updated;
5974 	bool            wiredcnt_updated;
5975 	bool            ro_va = false;
5976 	unsigned int    wimg_bits;
5977 	bool            committed = false, drop_refcnt = false, had_valid_mapping = false, skip_footprint_debit = false;
5978 	pmap_lock_mode_t lock_mode = PMAP_LOCK_SHARED;
5979 	kern_return_t   kr = KERN_SUCCESS;
5980 	uint16_t pp_attr_bits;
5981 	volatile uint16_t *refcnt;
5982 	volatile uint16_t *wiredcnt;
5983 	pv_free_list_t *local_pv_free;
5984 
5985 	validate_pmap_mutable(pmap);
5986 
5987 #if XNU_MONITOR
5988 	if (__improbable((options & PMAP_OPTIONS_NOWAIT) == 0)) {
5989 		panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
5990 	}
5991 #endif
5992 
5993 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5994 
5995 	if ((v) & pt_attr_leaf_offmask(pt_attr)) {
5996 		panic("pmap_enter_options() pmap %p v 0x%llx",
5997 		    pmap, (uint64_t)v);
5998 	}
5999 
6000 	if ((pa) & pt_attr_leaf_offmask(pt_attr)) {
6001 		panic("pmap_enter_options() pmap %p pa 0x%llx",
6002 		    pmap, (uint64_t)pa);
6003 	}
6004 
6005 	/* The PA should not extend beyond the architected physical address space */
6006 	pa &= ARM_PTE_PAGE_MASK;
6007 
6008 	if ((prot & VM_PROT_EXECUTE) && (pmap == kernel_pmap)) {
6009 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
6010 		extern vm_offset_t ctrr_test_page;
6011 		if (__probable(v != ctrr_test_page))
6012 #endif
6013 		panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
6014 	}
6015 	if (__improbable((pmap == kernel_pmap) && zone_spans_ro_va(v, v + pt_attr_page_size(pt_attr)))) {
6016 		if (__improbable(prot != VM_PROT_READ)) {
6017 			panic("%s: attempt to map RO zone VA 0x%llx with prot 0x%x",
6018 			    __func__, (unsigned long long)v, prot);
6019 		}
6020 		ro_va = true;
6021 	}
6022 	assert(pn != vm_page_fictitious_addr);
6023 
6024 	refcnt_updated = false;
6025 	wiredcnt_updated = false;
6026 
6027 	if ((prot & VM_PROT_EXECUTE) || TEST_PAGE_RATIO_4) {
6028 		/*
6029 		 * We need to take the lock exclusive here because of SPLAY_FIND in pmap_cs_enforce.
6030 		 *
6031 		 * See rdar://problem/59655632 for thoughts on synchronization and the splay tree
6032 		 */
6033 		lock_mode = PMAP_LOCK_EXCLUSIVE;
6034 	}
6035 	pmap_lock(pmap, lock_mode);
6036 
6037 	/*
6038 	 *	Expand pmap to include this pte.  Assume that
6039 	 *	pmap is always expanded to include enough hardware
6040 	 *	pages to map one VM page.
6041 	 */
6042 	while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
6043 		/* Must unlock to expand the pmap. */
6044 		pmap_unlock(pmap, lock_mode);
6045 
6046 		kr = pmap_expand(pmap, v, options, pt_attr_leaf_level(pt_attr));
6047 
6048 		if (kr != KERN_SUCCESS) {
6049 			return kr;
6050 		}
6051 
6052 		pmap_lock(pmap, lock_mode);
6053 	}
6054 
6055 	if (options & PMAP_OPTIONS_NOENTER) {
6056 		pmap_unlock(pmap, lock_mode);
6057 		return KERN_SUCCESS;
6058 	}
6059 
6060 	/*
6061 	 * Since we may not hold the pmap lock exclusive, updating the pte is
6062 	 * done via a cmpxchg loop.
6063 	 * We need to be careful about modifying non-local data structures before commiting
6064 	 * the new pte since we may need to re-do the transaction.
6065 	 */
6066 	spte = os_atomic_load(pte_p, relaxed);
6067 	while (!committed) {
6068 		refcnt = NULL;
6069 		wiredcnt = NULL;
6070 		pv_alloc_return_t pv_status = PV_ALLOC_SUCCESS;
6071 		had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
6072 
6073 		if (pmap != kernel_pmap) {
6074 			ptd_info_t *ptd_info = ptep_get_info(pte_p);
6075 			refcnt = &ptd_info->refcnt;
6076 			wiredcnt = &ptd_info->wiredcnt;
6077 			/*
6078 			 * Bump the wired count to keep the PTE page from being reclaimed.  We need this because
6079 			 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6080 			 * or acquire the pmap lock exclusive.
6081 			 */
6082 			if (!wiredcnt_updated) {
6083 				OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
6084 				wiredcnt_updated = true;
6085 			}
6086 			if (!refcnt_updated) {
6087 				OSAddAtomic16(1, (volatile int16_t*)refcnt);
6088 				refcnt_updated = true;
6089 				drop_refcnt = true;
6090 			}
6091 		}
6092 
6093 		if (had_valid_mapping && (pte_to_pa(spte) != pa)) {
6094 			/*
6095 			 * There is already a mapping here & it's for a different physical page.
6096 			 * First remove that mapping.
6097 			 *
6098 			 * This requires that we take the pmap lock exclusive in order to call pmap_remove_range.
6099 			 */
6100 			if (lock_mode == PMAP_LOCK_SHARED) {
6101 				if (pmap_lock_shared_to_exclusive(pmap)) {
6102 					lock_mode = PMAP_LOCK_EXCLUSIVE;
6103 				} else {
6104 					/*
6105 					 * We failed to upgrade to an exclusive lock.
6106 					 * As a result we no longer hold the lock at all,
6107 					 * so we need to re-acquire it and restart the transaction.
6108 					 */
6109 					pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6110 					lock_mode = PMAP_LOCK_EXCLUSIVE;
6111 					/* pmap might have changed after we dropped the lock. Try again. */
6112 					spte = os_atomic_load(pte_p, relaxed);
6113 					continue;
6114 				}
6115 			}
6116 			pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO);
6117 			spte = ARM_PTE_TYPE_FAULT;
6118 			assert(os_atomic_load(pte_p, acquire) == ARM_PTE_TYPE_FAULT);
6119 		}
6120 
6121 		pte = pmap_construct_pte(pmap, v, pa, prot, fault_type, wired, pt_attr, &pp_attr_bits);
6122 
6123 		if (pa_valid(pa)) {
6124 			unsigned int pai;
6125 			boolean_t   is_altacct = FALSE, is_internal = FALSE, is_reusable = FALSE, is_external = FALSE;
6126 
6127 			is_internal = FALSE;
6128 			is_altacct = FALSE;
6129 
6130 			pai = pa_index(pa);
6131 
6132 			pvh_lock(pai);
6133 
6134 			/*
6135 			 * Make sure that the current per-cpu PV free list has
6136 			 * enough entries (2 in the worst-case scenario) to handle the enter_pv
6137 			 * if the transaction succeeds. We're either in the
6138 			 * PPL (which can't be preempted) or we've explicitly disabled preemptions.
6139 			 * Note that we can still be interrupted, but a primary
6140 			 * interrupt handler can never enter the pmap.
6141 			 */
6142 #if !XNU_MONITOR
6143 			assert(get_preemption_level() > 0);
6144 #endif
6145 			local_pv_free = &pmap_get_cpu_data()->pv_free;
6146 			pv_entry_t **pv_h = pai_to_pvh(pai);
6147 			const bool allocation_required = !pvh_test_type(pv_h, PVH_TYPE_NULL) &&
6148 			    !(pvh_test_type(pv_h, PVH_TYPE_PTEP) && pvh_ptep(pv_h) == pte_p);
6149 
6150 			if (__improbable(allocation_required && (local_pv_free->count < 2))) {
6151 				pv_entry_t *new_pve_p[2] = {PV_ENTRY_NULL};
6152 				int new_allocated_pves = 0;
6153 
6154 				while (new_allocated_pves < 2) {
6155 					local_pv_free = &pmap_get_cpu_data()->pv_free;
6156 					pv_status = pv_alloc(pmap, pai, lock_mode, &new_pve_p[new_allocated_pves]);
6157 					if (pv_status == PV_ALLOC_FAIL) {
6158 						break;
6159 					} else if (pv_status == PV_ALLOC_RETRY) {
6160 						/*
6161 						 * In the case that pv_alloc() had to grab a new page of PVEs,
6162 						 * it will have dropped the pmap lock while doing so.
6163 						 * On non-PPL devices, dropping the lock re-enables preemption so we may
6164 						 * be on a different CPU now.
6165 						 */
6166 						local_pv_free = &pmap_get_cpu_data()->pv_free;
6167 					} else {
6168 						/* If we've gotten this far then a node should've been allocated. */
6169 						assert(new_pve_p[new_allocated_pves] != PV_ENTRY_NULL);
6170 
6171 						new_allocated_pves++;
6172 					}
6173 				}
6174 
6175 				for (int i = 0; i < new_allocated_pves; i++) {
6176 					pv_free(new_pve_p[i]);
6177 				}
6178 			}
6179 
6180 			if (pv_status == PV_ALLOC_FAIL) {
6181 				pvh_unlock(pai);
6182 				kr = KERN_RESOURCE_SHORTAGE;
6183 				break;
6184 			} else if (pv_status == PV_ALLOC_RETRY) {
6185 				pvh_unlock(pai);
6186 				/* We dropped the pmap and PVH locks to allocate. Retry transaction. */
6187 				spte = os_atomic_load(pte_p, relaxed);
6188 				continue;
6189 			}
6190 
6191 			if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6192 				wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6193 			} else {
6194 				wimg_bits = pmap_cache_attributes(pn);
6195 			}
6196 
6197 			/* We may be retrying this operation after dropping the PVH lock.
6198 			 * Cache attributes for the physical page may have changed while the lock
6199 			 * was dropped, so clear any cache attributes we may have previously set
6200 			 * in the PTE template. */
6201 			pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6202 			pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits, pa);
6203 
6204 #if XNU_MONITOR
6205 			/* The regular old kernel is not allowed to remap PPL pages. */
6206 			if (__improbable(ppattr_pa_test_monitor(pa))) {
6207 				panic("%s: page belongs to PPL, "
6208 				    "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
6209 				    __FUNCTION__,
6210 				    pmap, v, (void*)pa, prot, fault_type, flags, wired, options);
6211 			}
6212 
6213 			if (__improbable(pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN_MASK)) {
6214 				panic("%s: page locked down, "
6215 				    "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
6216 				    __FUNCTION__,
6217 				    pmap, v, (void *)pa, prot, fault_type, flags, wired, options);
6218 			}
6219 #endif
6220 
6221 
6222 			committed = pmap_enter_pte(pmap, pte_p, &spte, pte, v);
6223 			if (!committed) {
6224 				pvh_unlock(pai);
6225 				continue;
6226 			}
6227 			had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
6228 			/* End of transaction. Commit pv changes, pa bits, and memory accounting. */
6229 
6230 			assert(!had_valid_mapping || (pte_to_pa(spte) == pa));
6231 			/*
6232 			 * If there was already a valid pte here then we reuse its reference
6233 			 * on the ptd and drop the one that we took above.
6234 			 */
6235 			drop_refcnt = had_valid_mapping;
6236 
6237 			if (!had_valid_mapping) {
6238 				pv_entry_t *new_pve_p = PV_ENTRY_NULL;
6239 				int pve_ptep_idx = 0;
6240 				pv_status = pmap_enter_pv(pmap, pte_p, pai, options, lock_mode, &new_pve_p, &pve_ptep_idx);
6241 				/* We did all the allocations up top. So this shouldn't be able to fail. */
6242 				if (pv_status != PV_ALLOC_SUCCESS) {
6243 					panic("%s: unexpected pmap_enter_pv ret code: %d. new_pve_p=%p pmap=%p",
6244 					    __func__, pv_status, new_pve_p, pmap);
6245 				}
6246 
6247 				if (pmap != kernel_pmap) {
6248 					if (((options & PMAP_OPTIONS_ALT_ACCT) ||
6249 					    PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6250 					    ppattr_test_internal(pai)) {
6251 						/*
6252 						 * Make a note to ourselves that this
6253 						 * mapping is using alternative
6254 						 * accounting. We'll need this in order
6255 						 * to know which ledger to debit when
6256 						 * the mapping is removed.
6257 						 *
6258 						 * The altacct bit must be set while
6259 						 * the pv head is locked. Defer the
6260 						 * ledger accounting until after we've
6261 						 * dropped the lock.
6262 						 */
6263 						ppattr_pve_set_altacct(pai, new_pve_p, pve_ptep_idx);
6264 						is_altacct = TRUE;
6265 					}
6266 					if (ppattr_test_reusable(pai) &&
6267 					    !is_altacct) {
6268 						assert(ppattr_test_internal(pai));
6269 						is_reusable = TRUE;
6270 					} else if (ppattr_test_internal(pai)) {
6271 						is_internal = TRUE;
6272 					} else {
6273 						is_external = TRUE;
6274 					}
6275 				}
6276 			}
6277 
6278 			pvh_unlock(pai);
6279 
6280 			if (pp_attr_bits != 0) {
6281 				ppattr_pa_set_bits(pa, pp_attr_bits);
6282 			}
6283 
6284 			if (!had_valid_mapping && (pmap != kernel_pmap)) {
6285 				pmap_ledger_credit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6286 
6287 				if (is_internal) {
6288 					/*
6289 					 * Make corresponding adjustments to
6290 					 * phys_footprint statistics.
6291 					 */
6292 					pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6293 					if (is_altacct) {
6294 						/*
6295 						 * If this page is internal and
6296 						 * in an IOKit region, credit
6297 						 * the task's total count of
6298 						 * dirty, internal IOKit pages.
6299 						 * It should *not* count towards
6300 						 * the task's total physical
6301 						 * memory footprint, because
6302 						 * this entire region was
6303 						 * already billed to the task
6304 						 * at the time the mapping was
6305 						 * created.
6306 						 *
6307 						 * Put another way, this is
6308 						 * internal++ and
6309 						 * alternate_accounting++, so
6310 						 * net effect on phys_footprint
6311 						 * is 0. That means: don't
6312 						 * touch phys_footprint here.
6313 						 */
6314 						pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6315 					} else {
6316 						if (ARM_PTE_IS_COMPRESSED(spte, pte_p) && !(spte & ARM_PTE_COMPRESSED_ALT)) {
6317 							/* Replacing a compressed page (with internal accounting). No change to phys_footprint. */
6318 							skip_footprint_debit = true;
6319 						} else {
6320 							pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6321 						}
6322 					}
6323 				}
6324 				if (is_reusable) {
6325 					pmap_ledger_credit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6326 				} else if (is_external) {
6327 					pmap_ledger_credit(pmap, task_ledgers.external, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6328 				}
6329 			}
6330 		} else {
6331 			if (prot & VM_PROT_EXECUTE) {
6332 				kr = KERN_FAILURE;
6333 				break;
6334 			}
6335 
6336 			wimg_bits = pmap_cache_attributes(pn);
6337 			if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6338 				wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6339 			}
6340 
6341 			pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits, pa);
6342 
6343 #if XNU_MONITOR
6344 			if ((wimg_bits & PP_ATTR_MONITOR) && !pmap_ppl_disable) {
6345 				uint64_t xprr_perm = pte_to_xprr_perm(pte);
6346 				switch (xprr_perm) {
6347 				case XPRR_KERN_RO_PERM:
6348 					break;
6349 				case XPRR_KERN_RW_PERM:
6350 					pte &= ~ARM_PTE_XPRR_MASK;
6351 					pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
6352 					break;
6353 				default:
6354 					panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
6355 				}
6356 			}
6357 #endif
6358 			committed = pmap_enter_pte(pmap, pte_p, &spte, pte, v);
6359 			if (committed) {
6360 				had_valid_mapping = (spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE;
6361 				assert(!had_valid_mapping || (pte_to_pa(spte) == pa));
6362 
6363 				/**
6364 				 * If there was already a valid pte here then we reuse its
6365 				 * reference on the ptd and drop the one that we took above.
6366 				 */
6367 				drop_refcnt = had_valid_mapping;
6368 			}
6369 		}
6370 		if (committed) {
6371 			if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
6372 				assert(pmap != kernel_pmap);
6373 
6374 				/* One less "compressed" */
6375 				pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
6376 				    pt_attr_page_size(pt_attr) * PAGE_RATIO);
6377 
6378 				if (spte & ARM_PTE_COMPRESSED_ALT) {
6379 					pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6380 				} else if (!skip_footprint_debit) {
6381 					/* Was part of the footprint */
6382 					pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6383 				}
6384 				/* The old entry held a reference so drop the extra one that we took above. */
6385 				drop_refcnt = true;
6386 			}
6387 		}
6388 	}
6389 
6390 	if (drop_refcnt && refcnt != NULL) {
6391 		assert(refcnt_updated);
6392 		if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
6393 			panic("pmap_enter(): over-release of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
6394 		}
6395 	}
6396 
6397 	if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
6398 		panic("pmap_enter(): over-unwire of ptdp %p for pte %p", ptep_get_ptd(pte_p), pte_p);
6399 	}
6400 
6401 	pmap_unlock(pmap, lock_mode);
6402 
6403 	if (__improbable(ro_va && kr == KERN_SUCCESS)) {
6404 		pmap_phys_write_disable(v);
6405 	}
6406 
6407 	return kr;
6408 }
6409 
6410 kern_return_t
6411 pmap_enter_options_addr(
6412 	pmap_t pmap,
6413 	vm_map_address_t v,
6414 	pmap_paddr_t pa,
6415 	vm_prot_t prot,
6416 	vm_prot_t fault_type,
6417 	unsigned int flags,
6418 	boolean_t wired,
6419 	unsigned int options,
6420 	__unused void   *arg)
6421 {
6422 	kern_return_t kr = KERN_FAILURE;
6423 
6424 
6425 	PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6426 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pa, prot);
6427 
6428 
6429 #if XNU_MONITOR
6430 	/*
6431 	 * If NOWAIT was not requested, loop until the enter does not
6432 	 * fail due to lack of resources.
6433 	 */
6434 	while ((kr = pmap_enter_options_ppl(pmap, v, pa, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
6435 		pmap_alloc_page_for_ppl((options & PMAP_OPTIONS_NOWAIT) ? PMAP_PAGES_ALLOCATE_NOWAIT : 0);
6436 		if (options & PMAP_OPTIONS_NOWAIT) {
6437 			break;
6438 		}
6439 	}
6440 
6441 	pmap_ledger_check_balance(pmap);
6442 #else
6443 	kr = pmap_enter_options_internal(pmap, v, pa, prot, fault_type, flags, wired, options);
6444 #endif
6445 
6446 	PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6447 
6448 	return kr;
6449 }
6450 
6451 kern_return_t
6452 pmap_enter_options(
6453 	pmap_t pmap,
6454 	vm_map_address_t v,
6455 	ppnum_t pn,
6456 	vm_prot_t prot,
6457 	vm_prot_t fault_type,
6458 	unsigned int flags,
6459 	boolean_t wired,
6460 	unsigned int options,
6461 	__unused void   *arg)
6462 {
6463 	return pmap_enter_options_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired, options, arg);
6464 }
6465 
6466 /*
6467  *	Routine:	pmap_change_wiring
6468  *	Function:	Change the wiring attribute for a map/virtual-address
6469  *			pair.
6470  *	In/out conditions:
6471  *			The mapping must already exist in the pmap.
6472  */
6473 MARK_AS_PMAP_TEXT void
6474 pmap_change_wiring_internal(
6475 	pmap_t pmap,
6476 	vm_map_address_t v,
6477 	boolean_t wired)
6478 {
6479 	pt_entry_t     *pte_p;
6480 	pmap_paddr_t    pa;
6481 
6482 	validate_pmap_mutable(pmap);
6483 
6484 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6485 
6486 	const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
6487 
6488 	pte_p = pmap_pte(pmap, v);
6489 	if (pte_p == PT_ENTRY_NULL) {
6490 		if (!wired) {
6491 			/*
6492 			 * The PTE may have already been cleared by a disconnect/remove operation, and the L3 table
6493 			 * may have been freed by a remove operation.
6494 			 */
6495 			goto pmap_change_wiring_return;
6496 		} else {
6497 			panic("%s: Attempt to wire nonexistent PTE for pmap %p", __func__, pmap);
6498 		}
6499 	}
6500 	/*
6501 	 * Use volatile loads to prevent the compiler from collapsing references to 'pa' back to loads of pte_p
6502 	 * until we've grabbed the final PVH lock; PTE contents may change during this time.
6503 	 */
6504 	pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
6505 
6506 	while (pa_valid(pa)) {
6507 		pmap_paddr_t new_pa;
6508 
6509 		pvh_lock(pa_index(pa));
6510 		new_pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
6511 
6512 		if (pa == new_pa) {
6513 			break;
6514 		}
6515 
6516 		pvh_unlock(pa_index(pa));
6517 		pa = new_pa;
6518 	}
6519 
6520 	/* PTE checks must be performed after acquiring the PVH lock (if applicable for the PA) */
6521 	if ((*pte_p == ARM_PTE_EMPTY) || (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
6522 		if (!wired) {
6523 			/* PTE cleared by prior remove/disconnect operation */
6524 			goto pmap_change_wiring_cleanup;
6525 		} else {
6526 			panic("%s: Attempt to wire empty/compressed PTE %p (=0x%llx) for pmap %p",
6527 			    __func__, pte_p, (uint64_t)*pte_p, pmap);
6528 		}
6529 	}
6530 
6531 	assertf((*pte_p & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", pte_p, (uint64_t)*pte_p);
6532 	if (wired != pte_is_wired(*pte_p)) {
6533 		pte_set_wired(pmap, pte_p, wired);
6534 		if (pmap != kernel_pmap) {
6535 			if (wired) {
6536 				pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6537 			} else if (!wired) {
6538 				pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
6539 			}
6540 		}
6541 	}
6542 
6543 pmap_change_wiring_cleanup:
6544 	if (pa_valid(pa)) {
6545 		pvh_unlock(pa_index(pa));
6546 	}
6547 
6548 pmap_change_wiring_return:
6549 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6550 }
6551 
6552 void
6553 pmap_change_wiring(
6554 	pmap_t pmap,
6555 	vm_map_address_t v,
6556 	boolean_t wired)
6557 {
6558 #if XNU_MONITOR
6559 	pmap_change_wiring_ppl(pmap, v, wired);
6560 
6561 	pmap_ledger_check_balance(pmap);
6562 #else
6563 	pmap_change_wiring_internal(pmap, v, wired);
6564 #endif
6565 }
6566 
6567 MARK_AS_PMAP_TEXT pmap_paddr_t
6568 pmap_find_pa_internal(
6569 	pmap_t pmap,
6570 	addr64_t va)
6571 {
6572 	pmap_paddr_t    pa = 0;
6573 
6574 	validate_pmap(pmap);
6575 
6576 	if (pmap != kernel_pmap) {
6577 		pmap_lock(pmap, PMAP_LOCK_SHARED);
6578 	}
6579 
6580 	pa = pmap_vtophys(pmap, va);
6581 
6582 	if (pmap != kernel_pmap) {
6583 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
6584 	}
6585 
6586 	return pa;
6587 }
6588 
6589 pmap_paddr_t
6590 pmap_find_pa_nofault(pmap_t pmap, addr64_t va)
6591 {
6592 	pmap_paddr_t pa = 0;
6593 
6594 	if (pmap == kernel_pmap) {
6595 		pa = mmu_kvtop(va);
6596 	} else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
6597 		/*
6598 		 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
6599 		 * translation even if PAN would prevent kernel access through the translation.
6600 		 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
6601 		 */
6602 		pa = mmu_uvtop(va);
6603 	}
6604 	return pa;
6605 }
6606 
6607 pmap_paddr_t
6608 pmap_find_pa(
6609 	pmap_t pmap,
6610 	addr64_t va)
6611 {
6612 	pmap_paddr_t pa = pmap_find_pa_nofault(pmap, va);
6613 
6614 	if (pa != 0) {
6615 		return pa;
6616 	}
6617 
6618 	if (not_in_kdp) {
6619 #if XNU_MONITOR
6620 		return pmap_find_pa_ppl(pmap, va);
6621 #else
6622 		return pmap_find_pa_internal(pmap, va);
6623 #endif
6624 	} else {
6625 		return pmap_vtophys(pmap, va);
6626 	}
6627 }
6628 
6629 ppnum_t
6630 pmap_find_phys_nofault(
6631 	pmap_t pmap,
6632 	addr64_t va)
6633 {
6634 	ppnum_t ppn;
6635 	ppn = atop(pmap_find_pa_nofault(pmap, va));
6636 	return ppn;
6637 }
6638 
6639 ppnum_t
6640 pmap_find_phys(
6641 	pmap_t pmap,
6642 	addr64_t va)
6643 {
6644 	ppnum_t ppn;
6645 	ppn = atop(pmap_find_pa(pmap, va));
6646 	return ppn;
6647 }
6648 
6649 /**
6650  * Translate a kernel virtual address into a physical address.
6651  *
6652  * @param va The kernel virtual address to translate. Does not work on user
6653  *           virtual addresses.
6654  *
6655  * @return The physical address if the translation was successful, or zero if
6656  *         no valid mappings were found for the given virtual address.
6657  */
6658 pmap_paddr_t
6659 kvtophys(vm_offset_t va)
6660 {
6661 	/**
6662 	 * Attempt to do the translation first in hardware using the AT (address
6663 	 * translation) instruction. This will attempt to use the MMU to do the
6664 	 * translation for us.
6665 	 */
6666 	pmap_paddr_t pa = mmu_kvtop(va);
6667 
6668 	if (pa) {
6669 		return pa;
6670 	}
6671 
6672 	/* If the MMU can't find the mapping, then manually walk the page tables. */
6673 	return pmap_vtophys(kernel_pmap, va);
6674 }
6675 
6676 /**
6677  * Variant of kvtophys that can't fail. If no mapping is found or the mapping
6678  * points to a non-kernel-managed physical page, then this call will panic().
6679  *
6680  * @note The output of this function is guaranteed to be a kernel-managed
6681  *       physical page, which means it's safe to pass the output directly to
6682  *       pa_index() to create a physical address index for various pmap data
6683  *       structures.
6684  *
6685  * @param va The kernel virtual address to translate. Does not work on user
6686  *           virtual addresses.
6687  *
6688  * @return The translated physical address for the given virtual address.
6689  */
6690 pmap_paddr_t
6691 kvtophys_nofail(vm_offset_t va)
6692 {
6693 	pmap_paddr_t pa = kvtophys(va);
6694 
6695 	if (!pa_valid(pa)) {
6696 		panic("%s: Invalid or non-kernel-managed physical page returned, "
6697 		    "pa: %#llx, va: %p", __func__, (uint64_t)pa, (void *)va);
6698 	}
6699 
6700 	return pa;
6701 }
6702 
6703 pmap_paddr_t
6704 pmap_vtophys(
6705 	pmap_t pmap,
6706 	addr64_t va)
6707 {
6708 	if ((va < pmap->min) || (va >= pmap->max)) {
6709 		return 0;
6710 	}
6711 
6712 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6713 
6714 #if (__ARM_VMSA__ == 7)
6715 	tt_entry_t     *tte_p, tte;
6716 	pt_entry_t     *pte_p;
6717 	pmap_paddr_t    pa;
6718 
6719 	tte_p = pmap_tte(pmap, va);
6720 	if (tte_p == (tt_entry_t *) NULL) {
6721 		return (pmap_paddr_t) 0;
6722 	}
6723 
6724 	tte = *tte_p;
6725 	if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6726 		pte_p = (pt_entry_t *) ttetokv(tte) + pte_index(pt_attr, va);
6727 		pa = pte_to_pa(*pte_p) | (va & ARM_PGMASK);
6728 		//LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6729 #if DEVELOPMENT || DEBUG
6730 		if (atop(pa) != 0 &&
6731 		    ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
6732 			panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x",
6733 			    pmap, va, pte_p, (uint64_t) (*pte_p), atop(pa));
6734 		}
6735 #endif /* DEVELOPMENT || DEBUG */
6736 	} else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6737 		if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
6738 			pa = suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK);
6739 		} else {
6740 			pa = sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK);
6741 		}
6742 	} else {
6743 		pa = 0;
6744 	}
6745 #else
6746 	tt_entry_t * ttp = NULL;
6747 	tt_entry_t * ttep = NULL;
6748 	tt_entry_t   tte = ARM_TTE_EMPTY;
6749 	pmap_paddr_t pa = 0;
6750 	unsigned int cur_level;
6751 
6752 	ttp = pmap->tte;
6753 
6754 	for (cur_level = pt_attr_root_level(pt_attr); cur_level <= pt_attr_leaf_level(pt_attr); cur_level++) {
6755 		ttep = &ttp[ttn_index(pt_attr, va, cur_level)];
6756 
6757 		tte = *ttep;
6758 
6759 		const uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
6760 		const uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
6761 		const uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
6762 		const uint64_t offmask = pt_attr->pta_level_info[cur_level].offmask;
6763 
6764 		if ((tte & valid_mask) != valid_mask) {
6765 			return (pmap_paddr_t) 0;
6766 		}
6767 
6768 		/* This detects both leaf entries and intermediate block mappings. */
6769 		if ((tte & type_mask) == type_block) {
6770 			pa = ((tte & ARM_TTE_PA_MASK & ~offmask) | (va & offmask));
6771 			break;
6772 		}
6773 
6774 		ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
6775 	}
6776 #endif
6777 
6778 	return pa;
6779 }
6780 
6781 /*
6782  *	pmap_init_pte_page - Initialize a page table page.
6783  */
6784 MARK_AS_PMAP_TEXT void
6785 pmap_init_pte_page(
6786 	pmap_t pmap,
6787 	pt_entry_t *pte_p,
6788 	vm_offset_t va,
6789 	unsigned int ttlevel,
6790 	boolean_t alloc_ptd)
6791 {
6792 	pt_desc_t   *ptdp = NULL;
6793 	pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p)));
6794 
6795 	if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6796 		if (alloc_ptd) {
6797 			/*
6798 			 * This path should only be invoked from arm_vm_init.  If we are emulating 16KB pages
6799 			 * on 4KB hardware, we may already have allocated a page table descriptor for a
6800 			 * bootstrap request, so we check for an existing PTD here.
6801 			 */
6802 			ptdp = ptd_alloc(pmap);
6803 			if (ptdp == NULL) {
6804 				panic("%s: unable to allocate PTD", __func__);
6805 			}
6806 			pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
6807 		} else {
6808 			panic("pmap_init_pte_page(): pte_p %p", pte_p);
6809 		}
6810 	} else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
6811 		ptdp = pvh_ptd(pvh);
6812 	} else {
6813 		panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
6814 	}
6815 
6816 	// below barrier ensures previous updates to the page are visible to PTW before
6817 	// it is linked to the PTE of previous level
6818 	__builtin_arm_dmb(DMB_ISHST);
6819 	ptd_info_init(ptdp, pmap, va, ttlevel, pte_p);
6820 }
6821 
6822 /*
6823  *	Routine:	pmap_expand
6824  *
6825  *	Expands a pmap to be able to map the specified virtual address.
6826  *
6827  *	Allocates new memory for the default (COARSE) translation table
6828  *	entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6829  *	also allocates space for the corresponding pv entries.
6830  *
6831  *	Nothing should be locked.
6832  */
6833 MARK_AS_PMAP_TEXT static kern_return_t
6834 pmap_expand(
6835 	pmap_t pmap,
6836 	vm_map_address_t v,
6837 	unsigned int options,
6838 	unsigned int level)
6839 {
6840 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6841 
6842 	if (__improbable((v < pmap->min) || (v >= pmap->max))) {
6843 		return KERN_INVALID_ADDRESS;
6844 	}
6845 #if     (__ARM_VMSA__ == 7)
6846 	vm_offset_t     pa;
6847 	tt_entry_t              *tte_p;
6848 	tt_entry_t              *tt_p;
6849 	unsigned int    i;
6850 
6851 #if DEVELOPMENT || DEBUG
6852 	/*
6853 	 * We no longer support root level expansion; panic in case something
6854 	 * still attempts to trigger it.
6855 	 */
6856 	i = tte_index(pt_attr, v);
6857 
6858 	if (i >= pmap->tte_index_max) {
6859 		panic("%s: index out of range, index=%u, max=%u, "
6860 		    "pmap=%p, addr=%p, options=%u, level=%u",
6861 		    __func__, i, pmap->tte_index_max,
6862 		    pmap, (void *)v, options, level);
6863 	}
6864 #endif /* DEVELOPMENT || DEBUG */
6865 
6866 	if (level == 1) {
6867 		return KERN_SUCCESS;
6868 	}
6869 
6870 	{
6871 		tt_entry_t     *tte_next_p;
6872 
6873 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6874 		pa = 0;
6875 		if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
6876 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6877 			return KERN_SUCCESS;
6878 		}
6879 		tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
6880 		for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6881 			if (tte_to_pa(*tte_next_p)) {
6882 				pa = tte_to_pa(*tte_next_p);
6883 				break;
6884 			}
6885 			tte_next_p++;
6886 		}
6887 		pa = pa & ~PAGE_MASK;
6888 		if (pa) {
6889 			tte_p =  &pmap->tte[ttenum(v)];
6890 			*tte_p =  pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
6891 			FLUSH_PTE();
6892 			PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6893 			    VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6894 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6895 			return KERN_SUCCESS;
6896 		}
6897 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6898 	}
6899 	v = v & ~ARM_TT_L1_PT_OFFMASK;
6900 
6901 
6902 	while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6903 		/*
6904 		 *	Allocate a VM page for the level 2 page table entries.
6905 		 */
6906 		while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6907 			if (options & PMAP_OPTIONS_NOWAIT) {
6908 				return KERN_RESOURCE_SHORTAGE;
6909 			}
6910 			VM_PAGE_WAIT();
6911 		}
6912 
6913 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6914 		/*
6915 		 *	See if someone else expanded us first
6916 		 */
6917 		if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6918 			tt_entry_t     *tte_next_p;
6919 
6920 			pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
6921 			pa = kvtophys_nofail((vm_offset_t)tt_p);
6922 			tte_p = &pmap->tte[ttenum(v)];
6923 			for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6924 				*tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
6925 				PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
6926 				    VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
6927 				tte_next_p++;
6928 				pa = pa + 0x400;
6929 			}
6930 			FLUSH_PTE();
6931 
6932 			pa = 0x0ULL;
6933 			tt_p = (tt_entry_t *)NULL;
6934 		}
6935 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6936 		if (tt_p != (tt_entry_t *)NULL) {
6937 			pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
6938 			tt_p = (tt_entry_t *)NULL;
6939 		}
6940 	}
6941 	return KERN_SUCCESS;
6942 #else
6943 	pmap_paddr_t    pa;
6944 	unsigned int    ttlevel = pt_attr_root_level(pt_attr);
6945 	tt_entry_t              *tte_p;
6946 	tt_entry_t              *tt_p;
6947 
6948 	pa = 0x0ULL;
6949 	tt_p =  (tt_entry_t *)NULL;
6950 
6951 	for (; ttlevel < level; ttlevel++) {
6952 		pmap_lock(pmap, PMAP_LOCK_SHARED);
6953 
6954 		if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
6955 			pmap_unlock(pmap, PMAP_LOCK_SHARED);
6956 			while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6957 				if (options & PMAP_OPTIONS_NOWAIT) {
6958 					return KERN_RESOURCE_SHORTAGE;
6959 				}
6960 #if XNU_MONITOR
6961 				panic("%s: failed to allocate tt, "
6962 				    "pmap=%p, v=%p, options=0x%x, level=%u",
6963 				    __FUNCTION__,
6964 				    pmap, (void *)v, options, level);
6965 #else
6966 				VM_PAGE_WAIT();
6967 #endif
6968 			}
6969 			pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
6970 			if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
6971 				pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE);
6972 				pa = kvtophys_nofail((vm_offset_t)tt_p);
6973 				tte_p = pmap_ttne(pmap, ttlevel, v);
6974 				*tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6975 				PMAP_TRACE(4 + ttlevel, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
6976 				    VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
6977 				pa = 0x0ULL;
6978 				tt_p = (tt_entry_t *)NULL;
6979 			}
6980 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
6981 		} else {
6982 			pmap_unlock(pmap, PMAP_LOCK_SHARED);
6983 		}
6984 
6985 		if (tt_p != (tt_entry_t *)NULL) {
6986 			pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
6987 			tt_p = (tt_entry_t *)NULL;
6988 		}
6989 	}
6990 
6991 	return KERN_SUCCESS;
6992 #endif
6993 }
6994 
6995 /*
6996  *	Routine:	pmap_collect
6997  *	Function:
6998  *		Garbage collects the physical map system for
6999  *		pages which are no longer used.
7000  *		Success need not be guaranteed -- that is, there
7001  *		may well be pages which are not referenced, but
7002  *		others may be collected.
7003  */
7004 void
7005 pmap_collect(pmap_t pmap)
7006 {
7007 	if (pmap == PMAP_NULL) {
7008 		return;
7009 	}
7010 
7011 #if 0
7012 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
7013 	if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
7014 		/* TODO: Scan for vm page assigned to top level page tables with no reference */
7015 	}
7016 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
7017 #endif
7018 
7019 	return;
7020 }
7021 
7022 /*
7023  *	Routine:	pmap_gc
7024  *	Function:
7025  *              Pmap garbage collection
7026  *		Called by the pageout daemon when pages are scarce.
7027  *
7028  */
7029 void
7030 pmap_gc(
7031 	void)
7032 {
7033 #if XNU_MONITOR
7034 	/*
7035 	 * We cannot invoke the scheduler from the PPL, so for now we elide the
7036 	 * GC logic if the PPL is enabled.
7037 	 */
7038 #endif
7039 #if !XNU_MONITOR
7040 	pmap_t  pmap, pmap_next;
7041 	boolean_t       gc_wait;
7042 
7043 	if (pmap_gc_allowed &&
7044 	    (pmap_gc_allowed_by_time_throttle ||
7045 	    pmap_gc_forced)) {
7046 		pmap_gc_forced = FALSE;
7047 		pmap_gc_allowed_by_time_throttle = FALSE;
7048 		pmap_simple_lock(&pmaps_lock);
7049 		pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
7050 		while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
7051 			if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
7052 				pmap->gc_status |= PMAP_GC_INFLIGHT;
7053 			}
7054 			pmap_simple_unlock(&pmaps_lock);
7055 
7056 			pmap_collect(pmap);
7057 
7058 			pmap_simple_lock(&pmaps_lock);
7059 			gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
7060 			pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
7061 			pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
7062 			if (gc_wait) {
7063 				if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
7064 					pmap_next->gc_status |= PMAP_GC_INFLIGHT;
7065 				}
7066 				pmap_simple_unlock(&pmaps_lock);
7067 				thread_wakeup((event_t) &pmap->gc_status);
7068 				pmap_simple_lock(&pmaps_lock);
7069 			}
7070 			pmap = pmap_next;
7071 		}
7072 		pmap_simple_unlock(&pmaps_lock);
7073 	}
7074 #endif
7075 }
7076 
7077 /*
7078  *      By default, don't attempt pmap GC more frequently
7079  *      than once / 1 minutes.
7080  */
7081 
7082 void
7083 compute_pmap_gc_throttle(
7084 	void *arg __unused)
7085 {
7086 	pmap_gc_allowed_by_time_throttle = TRUE;
7087 }
7088 
7089 /*
7090  * pmap_attribute_cache_sync(vm_offset_t pa)
7091  *
7092  * Invalidates all of the instruction cache on a physical page and
7093  * pushes any dirty data from the data cache for the same physical page
7094  */
7095 
7096 kern_return_t
7097 pmap_attribute_cache_sync(
7098 	ppnum_t pp,
7099 	vm_size_t size,
7100 	__unused vm_machine_attribute_t attribute,
7101 	__unused vm_machine_attribute_val_t * value)
7102 {
7103 	if (size > PAGE_SIZE) {
7104 		panic("pmap_attribute_cache_sync size: 0x%llx", (uint64_t)size);
7105 	} else {
7106 		cache_sync_page(pp);
7107 	}
7108 
7109 	return KERN_SUCCESS;
7110 }
7111 
7112 /*
7113  * pmap_sync_page_data_phys(ppnum_t pp)
7114  *
7115  * Invalidates all of the instruction cache on a physical page and
7116  * pushes any dirty data from the data cache for the same physical page
7117  */
7118 void
7119 pmap_sync_page_data_phys(
7120 	ppnum_t pp)
7121 {
7122 	cache_sync_page(pp);
7123 }
7124 
7125 /*
7126  * pmap_sync_page_attributes_phys(ppnum_t pp)
7127  *
7128  * Write back and invalidate all cachelines on a physical page.
7129  */
7130 void
7131 pmap_sync_page_attributes_phys(
7132 	ppnum_t pp)
7133 {
7134 	flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
7135 }
7136 
7137 #if CONFIG_COREDUMP
7138 /* temporary workaround */
7139 boolean_t
7140 coredumpok(
7141 	vm_map_t map,
7142 	mach_vm_offset_t va)
7143 {
7144 	pt_entry_t     *pte_p;
7145 	pt_entry_t      spte;
7146 
7147 	pte_p = pmap_pte(map->pmap, va);
7148 	if (0 == pte_p) {
7149 		return FALSE;
7150 	}
7151 	spte = *pte_p;
7152 	return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7153 }
7154 #endif
7155 
7156 void
7157 fillPage(
7158 	ppnum_t pn,
7159 	unsigned int fill)
7160 {
7161 	unsigned int   *addr;
7162 	int             count;
7163 
7164 	addr = (unsigned int *) phystokv(ptoa(pn));
7165 	count = PAGE_SIZE / sizeof(unsigned int);
7166 	while (count--) {
7167 		*addr++ = fill;
7168 	}
7169 }
7170 
7171 extern void     mapping_set_mod(ppnum_t pn);
7172 
7173 void
7174 mapping_set_mod(
7175 	ppnum_t pn)
7176 {
7177 	pmap_set_modify(pn);
7178 }
7179 
7180 extern void     mapping_set_ref(ppnum_t pn);
7181 
7182 void
7183 mapping_set_ref(
7184 	ppnum_t pn)
7185 {
7186 	pmap_set_reference(pn);
7187 }
7188 
7189 /*
7190  * Clear specified attribute bits.
7191  *
7192  * Try to force an arm_fast_fault() for all mappings of
7193  * the page - to force attributes to be set again at fault time.
7194  * If the forcing succeeds, clear the cached bits at the head.
7195  * Otherwise, something must have been wired, so leave the cached
7196  * attributes alone.
7197  */
7198 MARK_AS_PMAP_TEXT static void
7199 phys_attribute_clear_with_flush_range(
7200 	ppnum_t         pn,
7201 	unsigned int    bits,
7202 	int             options,
7203 	void            *arg,
7204 	pmap_tlb_flush_range_t *flush_range)
7205 {
7206 	pmap_paddr_t    pa = ptoa(pn);
7207 	vm_prot_t       allow_mode = VM_PROT_ALL;
7208 
7209 #if XNU_MONITOR
7210 	if (__improbable(bits & PP_ATTR_PPL_OWNED_BITS)) {
7211 		panic("%s: illegal request, "
7212 		    "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
7213 		    __FUNCTION__,
7214 		    pn, bits, options, arg, flush_range);
7215 	}
7216 #endif
7217 	if ((arg != NULL) || (flush_range != NULL)) {
7218 		options = options & ~PMAP_OPTIONS_NOFLUSH;
7219 	}
7220 
7221 	if (__improbable((bits & PP_ATTR_MODIFIED) &&
7222 	    (options & PMAP_OPTIONS_NOFLUSH))) {
7223 		panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
7224 		    "should not clear 'modified' without flushing TLBs\n",
7225 		    pn, bits, options, arg, flush_range);
7226 	}
7227 
7228 	assert(pn != vm_page_fictitious_addr);
7229 
7230 	if (options & PMAP_OPTIONS_CLEAR_WRITE) {
7231 		assert(bits == PP_ATTR_MODIFIED);
7232 
7233 		pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), options, flush_range);
7234 		/*
7235 		 * We short circuit this case; it should not need to
7236 		 * invoke arm_force_fast_fault, so just clear the modified bit.
7237 		 * pmap_page_protect has taken care of resetting
7238 		 * the state so that we'll see the next write as a fault to
7239 		 * the VM (i.e. we don't want a fast fault).
7240 		 */
7241 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
7242 		return;
7243 	}
7244 	if (bits & PP_ATTR_REFERENCED) {
7245 		allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
7246 	}
7247 	if (bits & PP_ATTR_MODIFIED) {
7248 		allow_mode &= ~VM_PROT_WRITE;
7249 	}
7250 
7251 	if (bits == PP_ATTR_NOENCRYPT) {
7252 		/*
7253 		 * We short circuit this case; it should not need to
7254 		 * invoke arm_force_fast_fault, so just clear and
7255 		 * return.  On ARM, this bit is just a debugging aid.
7256 		 */
7257 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
7258 		return;
7259 	}
7260 
7261 	if (arm_force_fast_fault_with_flush_range(pn, allow_mode, options, flush_range)) {
7262 		ppattr_pa_clear_bits(pa, (pp_attr_t)bits);
7263 	}
7264 }
7265 
7266 MARK_AS_PMAP_TEXT void
7267 phys_attribute_clear_internal(
7268 	ppnum_t         pn,
7269 	unsigned int    bits,
7270 	int             options,
7271 	void            *arg)
7272 {
7273 	phys_attribute_clear_with_flush_range(pn, bits, options, arg, NULL);
7274 }
7275 
7276 #if __ARM_RANGE_TLBI__
7277 MARK_AS_PMAP_TEXT static vm_map_address_t
7278 phys_attribute_clear_twig_internal(
7279 	pmap_t pmap,
7280 	vm_map_address_t start,
7281 	vm_map_address_t end,
7282 	unsigned int bits,
7283 	unsigned int options,
7284 	pmap_tlb_flush_range_t *flush_range)
7285 {
7286 	pmap_assert_locked(pmap, PMAP_LOCK_SHARED);
7287 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7288 	assert(end >= start);
7289 	assert((end - start) <= pt_attr_twig_size(pt_attr));
7290 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
7291 	vm_map_address_t va = start;
7292 	pt_entry_t     *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
7293 	tt_entry_t     *tte_p;
7294 	tte_p = pmap_tte(pmap, start);
7295 	unsigned int npages = 0;
7296 
7297 	if (tte_p == (tt_entry_t *) NULL) {
7298 		return end;
7299 	}
7300 
7301 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7302 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
7303 
7304 		start_pte_p = &pte_p[pte_index(pt_attr, start)];
7305 		end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
7306 		assert(end_pte_p >= start_pte_p);
7307 		for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++, va += pmap_page_size) {
7308 			if (__improbable(npages++ && pmap_pending_preemption())) {
7309 				return va;
7310 			}
7311 			pmap_paddr_t pa = pte_to_pa(*((volatile pt_entry_t*)curr_pte_p));
7312 			if (pa_valid(pa)) {
7313 				ppnum_t pn = (ppnum_t) atop(pa);
7314 				phys_attribute_clear_with_flush_range(pn, bits, options, NULL, flush_range);
7315 			}
7316 		}
7317 	}
7318 	return end;
7319 }
7320 
7321 MARK_AS_PMAP_TEXT vm_map_address_t
7322 phys_attribute_clear_range_internal(
7323 	pmap_t pmap,
7324 	vm_map_address_t start,
7325 	vm_map_address_t end,
7326 	unsigned int bits,
7327 	unsigned int options)
7328 {
7329 	if (__improbable(end < start)) {
7330 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
7331 	}
7332 	validate_pmap_mutable(pmap);
7333 
7334 	vm_map_address_t va = start;
7335 	pmap_tlb_flush_range_t flush_range = {
7336 		.ptfr_pmap = pmap,
7337 		.ptfr_start = start,
7338 		.ptfr_end = end,
7339 		.ptfr_flush_needed = false
7340 	};
7341 
7342 	pmap_lock(pmap, PMAP_LOCK_SHARED);
7343 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7344 
7345 	while (va < end) {
7346 		vm_map_address_t curr_end;
7347 
7348 		curr_end = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
7349 		if (curr_end > end) {
7350 			curr_end = end;
7351 		}
7352 
7353 		va = phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
7354 		if ((va < curr_end) || pmap_pending_preemption()) {
7355 			break;
7356 		}
7357 	}
7358 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
7359 	if (flush_range.ptfr_flush_needed) {
7360 		flush_range.ptfr_end = va;
7361 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(
7362 			flush_range.ptfr_start,
7363 			flush_range.ptfr_end - flush_range.ptfr_start,
7364 			flush_range.ptfr_pmap,
7365 			true);
7366 		sync_tlb_flush();
7367 	}
7368 	return va;
7369 }
7370 
7371 static void
7372 phys_attribute_clear_range(
7373 	pmap_t pmap,
7374 	vm_map_address_t start,
7375 	vm_map_address_t end,
7376 	unsigned int bits,
7377 	unsigned int options)
7378 {
7379 	/*
7380 	 * We allow single-page requests to execute non-preemptibly,
7381 	 * as it doesn't make sense to sample AST_URGENT for a single-page
7382 	 * operation, and there are a couple of special use cases that
7383 	 * require a non-preemptible single-page operation.
7384 	 */
7385 	if ((end - start) > pt_attr_page_size(pmap_get_pt_attr(pmap))) {
7386 		pmap_verify_preemptible();
7387 	}
7388 
7389 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
7390 
7391 	while (start < end) {
7392 #if XNU_MONITOR
7393 		start = phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
7394 #else
7395 		start = phys_attribute_clear_range_internal(pmap, start, end, bits, options);
7396 #endif
7397 	}
7398 
7399 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
7400 }
7401 #endif /* __ARM_RANGE_TLBI__ */
7402 
7403 static void
7404 phys_attribute_clear(
7405 	ppnum_t         pn,
7406 	unsigned int    bits,
7407 	int             options,
7408 	void            *arg)
7409 {
7410 	/*
7411 	 * Do we really want this tracepoint?  It will be extremely chatty.
7412 	 * Also, should we have a corresponding trace point for the set path?
7413 	 */
7414 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7415 
7416 #if XNU_MONITOR
7417 	phys_attribute_clear_ppl(pn, bits, options, arg);
7418 #else
7419 	phys_attribute_clear_internal(pn, bits, options, arg);
7420 #endif
7421 
7422 	PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7423 }
7424 
7425 /*
7426  *	Set specified attribute bits.
7427  *
7428  *	Set cached value in the pv head because we have
7429  *	no per-mapping hardware support for referenced and
7430  *	modify bits.
7431  */
7432 MARK_AS_PMAP_TEXT void
7433 phys_attribute_set_internal(
7434 	ppnum_t pn,
7435 	unsigned int bits)
7436 {
7437 	pmap_paddr_t    pa = ptoa(pn);
7438 	assert(pn != vm_page_fictitious_addr);
7439 
7440 #if XNU_MONITOR
7441 	if (bits & PP_ATTR_PPL_OWNED_BITS) {
7442 		panic("%s: illegal request, "
7443 		    "pn=%u, bits=%#x",
7444 		    __FUNCTION__,
7445 		    pn, bits);
7446 	}
7447 #endif
7448 
7449 	ppattr_pa_set_bits(pa, (uint16_t)bits);
7450 
7451 	return;
7452 }
7453 
7454 static void
7455 phys_attribute_set(
7456 	ppnum_t pn,
7457 	unsigned int bits)
7458 {
7459 #if XNU_MONITOR
7460 	phys_attribute_set_ppl(pn, bits);
7461 #else
7462 	phys_attribute_set_internal(pn, bits);
7463 #endif
7464 }
7465 
7466 
7467 /*
7468  *	Check specified attribute bits.
7469  *
7470  *	use the software cached bits (since no hw support).
7471  */
7472 static boolean_t
7473 phys_attribute_test(
7474 	ppnum_t pn,
7475 	unsigned int bits)
7476 {
7477 	pmap_paddr_t    pa = ptoa(pn);
7478 	assert(pn != vm_page_fictitious_addr);
7479 	return ppattr_pa_test_bits(pa, (pp_attr_t)bits);
7480 }
7481 
7482 
7483 /*
7484  *	Set the modify/reference bits on the specified physical page.
7485  */
7486 void
7487 pmap_set_modify(ppnum_t pn)
7488 {
7489 	phys_attribute_set(pn, PP_ATTR_MODIFIED);
7490 }
7491 
7492 
7493 /*
7494  *	Clear the modify bits on the specified physical page.
7495  */
7496 void
7497 pmap_clear_modify(
7498 	ppnum_t pn)
7499 {
7500 	phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7501 }
7502 
7503 
7504 /*
7505  *	pmap_is_modified:
7506  *
7507  *	Return whether or not the specified physical page is modified
7508  *	by any physical maps.
7509  */
7510 boolean_t
7511 pmap_is_modified(
7512 	ppnum_t pn)
7513 {
7514 	return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7515 }
7516 
7517 
7518 /*
7519  *	Set the reference bit on the specified physical page.
7520  */
7521 static void
7522 pmap_set_reference(
7523 	ppnum_t pn)
7524 {
7525 	phys_attribute_set(pn, PP_ATTR_REFERENCED);
7526 }
7527 
7528 /*
7529  *	Clear the reference bits on the specified physical page.
7530  */
7531 void
7532 pmap_clear_reference(
7533 	ppnum_t pn)
7534 {
7535 	phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7536 }
7537 
7538 
7539 /*
7540  *	pmap_is_referenced:
7541  *
7542  *	Return whether or not the specified physical page is referenced
7543  *	by any physical maps.
7544  */
7545 boolean_t
7546 pmap_is_referenced(
7547 	ppnum_t pn)
7548 {
7549 	return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7550 }
7551 
7552 /*
7553  * pmap_get_refmod(phys)
7554  *  returns the referenced and modified bits of the specified
7555  *  physical page.
7556  */
7557 unsigned int
7558 pmap_get_refmod(
7559 	ppnum_t pn)
7560 {
7561 	return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7562 	       | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
7563 }
7564 
7565 static inline unsigned int
7566 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask)
7567 {
7568 	return ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7569 	       ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7570 }
7571 
7572 /*
7573  * pmap_clear_refmod(phys, mask)
7574  *  clears the referenced and modified bits as specified by the mask
7575  *  of the specified physical page.
7576  */
7577 void
7578 pmap_clear_refmod_options(
7579 	ppnum_t         pn,
7580 	unsigned int    mask,
7581 	unsigned int    options,
7582 	void            *arg)
7583 {
7584 	unsigned int    bits;
7585 
7586 	bits = pmap_clear_refmod_mask_to_modified_bits(mask);
7587 	phys_attribute_clear(pn, bits, options, arg);
7588 }
7589 
7590 /*
7591  * Perform pmap_clear_refmod_options on a virtual address range.
7592  * The operation will be performed in bulk & tlb flushes will be coalesced
7593  * if possible.
7594  *
7595  * Returns true if the operation is supported on this platform.
7596  * If this function returns false, the operation is not supported and
7597  * nothing has been modified in the pmap.
7598  */
7599 bool
7600 pmap_clear_refmod_range_options(
7601 	pmap_t pmap __unused,
7602 	vm_map_address_t start __unused,
7603 	vm_map_address_t end __unused,
7604 	unsigned int mask __unused,
7605 	unsigned int options __unused)
7606 {
7607 #if __ARM_RANGE_TLBI__
7608 	unsigned int    bits;
7609 	bits = pmap_clear_refmod_mask_to_modified_bits(mask);
7610 	phys_attribute_clear_range(pmap, start, end, bits, options);
7611 	return true;
7612 #else /* __ARM_RANGE_TLBI__ */
7613 #pragma unused(pmap, start, end, mask, options)
7614 	/*
7615 	 * This operation allows the VM to bulk modify refmod bits on a virtually
7616 	 * contiguous range of addresses. This is large performance improvement on
7617 	 * platforms that support ranged tlbi instructions. But on older platforms,
7618 	 * we can only flush per-page or the entire asid. So we currently
7619 	 * only support this operation on platforms that support ranged tlbi.
7620 	 * instructions. On other platforms, we require that
7621 	 * the VM modify the bits on a per-page basis.
7622 	 */
7623 	return false;
7624 #endif /* __ARM_RANGE_TLBI__ */
7625 }
7626 
7627 void
7628 pmap_clear_refmod(
7629 	ppnum_t pn,
7630 	unsigned int mask)
7631 {
7632 	pmap_clear_refmod_options(pn, mask, 0, NULL);
7633 }
7634 
7635 unsigned int
7636 pmap_disconnect_options(
7637 	ppnum_t pn,
7638 	unsigned int options,
7639 	void *arg)
7640 {
7641 	if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7642 		/*
7643 		 * On ARM, the "modified" bit is managed by software, so
7644 		 * we know up-front if the physical page is "modified",
7645 		 * without having to scan all the PTEs pointing to it.
7646 		 * The caller should have made the VM page "busy" so noone
7647 		 * should be able to establish any new mapping and "modify"
7648 		 * the page behind us.
7649 		 */
7650 		if (pmap_is_modified(pn)) {
7651 			/*
7652 			 * The page has been modified and will be sent to
7653 			 * the VM compressor.
7654 			 */
7655 			options |= PMAP_OPTIONS_COMPRESSOR;
7656 		} else {
7657 			/*
7658 			 * The page hasn't been modified and will be freed
7659 			 * instead of compressed.
7660 			 */
7661 		}
7662 	}
7663 
7664 	/* disconnect the page */
7665 	pmap_page_protect_options(pn, 0, options, arg);
7666 
7667 	/* return ref/chg status */
7668 	return pmap_get_refmod(pn);
7669 }
7670 
7671 /*
7672  *	Routine:
7673  *		pmap_disconnect
7674  *
7675  *	Function:
7676  *		Disconnect all mappings for this page and return reference and change status
7677  *		in generic format.
7678  *
7679  */
7680 unsigned int
7681 pmap_disconnect(
7682 	ppnum_t pn)
7683 {
7684 	pmap_page_protect(pn, 0);       /* disconnect the page */
7685 	return pmap_get_refmod(pn);   /* return ref/chg status */
7686 }
7687 
7688 boolean_t
7689 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7690 {
7691 	if (ptoa(first) >= vm_last_phys) {
7692 		return FALSE;
7693 	}
7694 	if (ptoa(last) < vm_first_phys) {
7695 		return FALSE;
7696 	}
7697 
7698 	return TRUE;
7699 }
7700 
7701 /*
7702  * The state maintained by the noencrypt functions is used as a
7703  * debugging aid on ARM.  This incurs some overhead on the part
7704  * of the caller.  A special case check in phys_attribute_clear
7705  * (the most expensive path) currently minimizes this overhead,
7706  * but stubbing these functions out on RELEASE kernels yields
7707  * further wins.
7708  */
7709 boolean_t
7710 pmap_is_noencrypt(
7711 	ppnum_t pn)
7712 {
7713 #if DEVELOPMENT || DEBUG
7714 	boolean_t result = FALSE;
7715 
7716 	if (!pa_valid(ptoa(pn))) {
7717 		return FALSE;
7718 	}
7719 
7720 	result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7721 
7722 	return result;
7723 #else
7724 #pragma unused(pn)
7725 	return FALSE;
7726 #endif
7727 }
7728 
7729 void
7730 pmap_set_noencrypt(
7731 	ppnum_t pn)
7732 {
7733 #if DEVELOPMENT || DEBUG
7734 	if (!pa_valid(ptoa(pn))) {
7735 		return;
7736 	}
7737 
7738 	phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7739 #else
7740 #pragma unused(pn)
7741 #endif
7742 }
7743 
7744 void
7745 pmap_clear_noencrypt(
7746 	ppnum_t pn)
7747 {
7748 #if DEVELOPMENT || DEBUG
7749 	if (!pa_valid(ptoa(pn))) {
7750 		return;
7751 	}
7752 
7753 	phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7754 #else
7755 #pragma unused(pn)
7756 #endif
7757 }
7758 
7759 #if XNU_MONITOR
7760 boolean_t
7761 pmap_is_monitor(ppnum_t pn)
7762 {
7763 	assert(pa_valid(ptoa(pn)));
7764 	return phys_attribute_test(pn, PP_ATTR_MONITOR);
7765 }
7766 #endif
7767 
7768 void
7769 pmap_lock_phys_page(ppnum_t pn)
7770 {
7771 #if !XNU_MONITOR
7772 	unsigned int    pai;
7773 	pmap_paddr_t    phys = ptoa(pn);
7774 
7775 	if (pa_valid(phys)) {
7776 		pai = pa_index(phys);
7777 		pvh_lock(pai);
7778 	} else
7779 #else
7780 	(void)pn;
7781 #endif
7782 	{ simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
7783 }
7784 
7785 
7786 void
7787 pmap_unlock_phys_page(ppnum_t pn)
7788 {
7789 #if !XNU_MONITOR
7790 	unsigned int    pai;
7791 	pmap_paddr_t    phys = ptoa(pn);
7792 
7793 	if (pa_valid(phys)) {
7794 		pai = pa_index(phys);
7795 		pvh_unlock(pai);
7796 	} else
7797 #else
7798 	(void)pn;
7799 #endif
7800 	{ simple_unlock(&phys_backup_lock);}
7801 }
7802 
7803 MARK_AS_PMAP_TEXT static void
7804 pmap_switch_user_ttb(pmap_t pmap, pmap_cpu_data_t *cpu_data_ptr)
7805 {
7806 #if     (__ARM_VMSA__ == 7)
7807 	cpu_data_ptr->cpu_user_pmap = pmap;
7808 	cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7809 	if (pmap != kernel_pmap) {
7810 		cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7811 	}
7812 
7813 #if     MACH_ASSERT && __ARM_USER_PROTECT__
7814 	{
7815 		unsigned int ttbr0_val, ttbr1_val;
7816 		__asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7817 		__asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7818 		if (ttbr0_val != ttbr1_val) {
7819 			panic("Misaligned ttbr0  %08X", ttbr0_val);
7820 		}
7821 		if (pmap->ttep & 0x1000) {
7822 			panic("Misaligned ttbr0  %08X", pmap->ttep);
7823 		}
7824 	}
7825 #endif
7826 #if !__ARM_USER_PROTECT__
7827 	set_mmu_ttb(pmap->ttep);
7828 	set_context_id(pmap->hw_asid);
7829 #endif
7830 
7831 #else /* (__ARM_VMSA__ == 7) */
7832 
7833 	if (pmap != kernel_pmap) {
7834 		cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7835 		cpu_data_ptr->cpu_nested_pmap_attr = (cpu_data_ptr->cpu_nested_pmap == NULL) ?
7836 		    NULL : pmap_get_pt_attr(cpu_data_ptr->cpu_nested_pmap);
7837 		cpu_data_ptr->cpu_nested_region_addr = pmap->nested_region_addr;
7838 		cpu_data_ptr->cpu_nested_region_size = pmap->nested_region_size;
7839 #if __ARM_MIXED_PAGE_SIZE__
7840 		cpu_data_ptr->commpage_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
7841 #endif
7842 	}
7843 
7844 
7845 #if __ARM_MIXED_PAGE_SIZE__
7846 	if ((pmap != kernel_pmap) && (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr())) {
7847 		set_tcr(pmap_get_pt_attr(pmap)->pta_tcr_value);
7848 	}
7849 #endif /* __ARM_MIXED_PAGE_SIZE__ */
7850 
7851 
7852 	if (pmap != kernel_pmap) {
7853 		set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
7854 	} else if (!pmap_user_ttb_is_clear()) {
7855 		pmap_clear_user_ttb_internal();
7856 	}
7857 #endif /* (__ARM_VMSA__ == 7) */
7858 }
7859 
7860 MARK_AS_PMAP_TEXT void
7861 pmap_clear_user_ttb_internal(void)
7862 {
7863 #if (__ARM_VMSA__ > 7)
7864 	set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7865 #else
7866 	set_mmu_ttb(kernel_pmap->ttep);
7867 #endif
7868 }
7869 
7870 void
7871 pmap_clear_user_ttb(void)
7872 {
7873 	PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_START, NULL, 0, 0);
7874 #if XNU_MONITOR
7875 	pmap_clear_user_ttb_ppl();
7876 #else
7877 	pmap_clear_user_ttb_internal();
7878 #endif
7879 	PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_END);
7880 }
7881 
7882 
7883 #if defined(__arm64__)
7884 /*
7885  * Marker for use in multi-pass fast-fault PV list processing.
7886  * ARM_PTE_COMPRESSED should never otherwise be set on PTEs processed by
7887  * these functions, as compressed PTEs should never be present in PV lists.
7888  * Note that this only holds true for arm64; for arm32 we don't have enough
7889  * SW bits in the PTE, so the same bit does double-duty as the COMPRESSED
7890  * and WRITEABLE marker depending on whether the PTE is valid.
7891  */
7892 #define ARM_PTE_FF_MARKER ARM_PTE_COMPRESSED
7893 _Static_assert(ARM_PTE_COMPRESSED != ARM_PTE_WRITEABLE, "compressed bit aliases writeable");
7894 _Static_assert(ARM_PTE_COMPRESSED != ARM_PTE_WIRED, "compressed bit aliases wired");
7895 #endif
7896 
7897 
7898 MARK_AS_PMAP_TEXT static boolean_t
7899 arm_force_fast_fault_with_flush_range(
7900 	ppnum_t         ppnum,
7901 	vm_prot_t       allow_mode,
7902 	int             options,
7903 	pmap_tlb_flush_range_t *flush_range)
7904 {
7905 	pmap_paddr_t     phys = ptoa(ppnum);
7906 	pv_entry_t      *pve_p;
7907 	pt_entry_t      *pte_p;
7908 	unsigned int     pai;
7909 	unsigned int     pass1_updated = 0;
7910 	unsigned int     pass2_updated = 0;
7911 	boolean_t        result;
7912 	pv_entry_t     **pv_h;
7913 	bool             is_reusable, is_internal;
7914 	bool             ref_fault;
7915 	bool             mod_fault;
7916 	bool             clear_write_fault = false;
7917 	bool             ref_aliases_mod = false;
7918 	bool             mustsynch = ((options & PMAP_OPTIONS_FF_LOCKED) == 0);
7919 
7920 	assert(ppnum != vm_page_fictitious_addr);
7921 
7922 	if (!pa_valid(phys)) {
7923 		return FALSE;   /* Not a managed page. */
7924 	}
7925 
7926 	result = TRUE;
7927 	ref_fault = false;
7928 	mod_fault = false;
7929 	pai = pa_index(phys);
7930 	if (__probable(mustsynch)) {
7931 		pvh_lock(pai);
7932 	}
7933 	pv_h = pai_to_pvh(pai);
7934 
7935 #if XNU_MONITOR
7936 	if (__improbable(ppattr_pa_test_monitor(phys))) {
7937 		panic("%s: PA 0x%llx belongs to PPL.", __func__, (uint64_t)phys);
7938 	}
7939 #endif
7940 	pte_p = PT_ENTRY_NULL;
7941 	pve_p = PV_ENTRY_NULL;
7942 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7943 		pte_p = pvh_ptep(pv_h);
7944 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7945 		pve_p = pvh_pve_list(pv_h);
7946 	} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
7947 		panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)phys);
7948 	}
7949 
7950 	is_reusable = ppattr_test_reusable(pai);
7951 	is_internal = ppattr_test_internal(pai);
7952 
7953 	/*
7954 	 * issue_tlbi is used to indicate that this function will need to issue at least one TLB
7955 	 * invalidation during pass 2.  tlb_flush_needed only indicates that PTE permissions have
7956 	 * changed and that a TLB flush will be needed *at some point*, so we'll need to call
7957 	 * FLUSH_PTE_STRONG() to synchronize prior PTE updates.  In the case of a flush_range
7958 	 * operation, TLB invalidation may be handled by the caller so it's possible for
7959 	 * tlb_flush_needed to be true while issue_tlbi is false.
7960 	 */
7961 	bool issue_tlbi = false;
7962 	bool tlb_flush_needed = false;
7963 
7964 	pv_entry_t *orig_pve_p = pve_p;
7965 	pt_entry_t *orig_pte_p = pte_p;
7966 	int pve_ptep_idx = 0;
7967 
7968 	/*
7969 	 * Pass 1: Make any necessary PTE updates, marking PTEs that will require
7970 	 * TLB invalidation in pass 2.
7971 	 */
7972 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7973 		pt_entry_t       spte;
7974 		pt_entry_t       tmplate;
7975 
7976 		if (pve_p != PV_ENTRY_NULL) {
7977 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
7978 			if (pte_p == PT_ENTRY_NULL) {
7979 				goto fff_skip_pve_pass1;
7980 			}
7981 		}
7982 
7983 #ifdef PVH_FLAG_IOMMU
7984 		if (pvh_ptep_is_iommu(pte_p)) {
7985 			goto fff_skip_pve_pass1;
7986 		}
7987 #endif
7988 		if (*pte_p == ARM_PTE_EMPTY) {
7989 			panic("pte is empty: pte_p=%p ppnum=0x%x", pte_p, ppnum);
7990 		}
7991 		if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
7992 			panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x", pte_p, ppnum);
7993 		}
7994 
7995 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
7996 		const pmap_t pmap = ptdp->pmap;
7997 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
7998 		const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7999 
8000 		assert(va >= pmap->min && va < pmap->max);
8001 
8002 		/* update pmap stats and ledgers */
8003 		const bool is_altacct = ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx);
8004 		if (is_altacct) {
8005 			/*
8006 			 * We do not track "reusable" status for
8007 			 * "alternate accounting" mappings.
8008 			 */
8009 		} else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
8010 		    is_reusable &&
8011 		    is_internal &&
8012 		    pmap != kernel_pmap) {
8013 			/* one less "reusable" */
8014 			pmap_ledger_debit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8015 			/* one more "internal" */
8016 			pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8017 			assert(ppattr_test_internal(pai));
8018 			pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8019 
8020 			/*
8021 			 * Since the page is being marked non-reusable, we assume that it will be
8022 			 * modified soon.  Avoid the cost of another trap to handle the fast
8023 			 * fault when we next write to this page.
8024 			 */
8025 			clear_write_fault = true;
8026 		} else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
8027 		    !is_reusable &&
8028 		    is_internal &&
8029 		    pmap != kernel_pmap) {
8030 			/* one more "reusable" */
8031 			pmap_ledger_credit(pmap, task_ledgers.reusable, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8032 			pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8033 			assert(ppattr_test_internal(pai));
8034 			pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8035 		}
8036 
8037 		bool wiredskip = pte_is_wired(*pte_p) &&
8038 		    ((options & PMAP_OPTIONS_FF_WIRED) == 0);
8039 
8040 		if (wiredskip) {
8041 			result = FALSE;
8042 			goto fff_skip_pve_pass1;
8043 		}
8044 
8045 		spte = *pte_p;
8046 		tmplate = spte;
8047 
8048 		if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
8049 			/* read protection sets the pte to fault */
8050 			tmplate =  tmplate & ~ARM_PTE_AF;
8051 			ref_fault = true;
8052 		}
8053 		if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
8054 			/* take away write permission if set */
8055 			if (pmap == kernel_pmap) {
8056 				if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
8057 					tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
8058 					pte_set_was_writeable(tmplate, true);
8059 					mod_fault = true;
8060 				}
8061 			} else {
8062 				if ((tmplate & ARM_PTE_APMASK) == pt_attr_leaf_rw(pt_attr)) {
8063 					tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
8064 					pte_set_was_writeable(tmplate, true);
8065 					mod_fault = true;
8066 				}
8067 			}
8068 		}
8069 
8070 #if MACH_ASSERT && XNU_MONITOR
8071 		if (is_pte_xprr_protected(pmap, spte)) {
8072 			if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
8073 				panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
8074 				    "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
8075 				    __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
8076 				    ppnum, options, allow_mode);
8077 			}
8078 		}
8079 #endif /* MACH_ASSERT && XNU_MONITOR */
8080 
8081 		if (result && (tmplate != spte)) {
8082 			if ((spte & (~ARM_PTE_WRITEABLE)) != (tmplate & (~ARM_PTE_WRITEABLE)) &&
8083 			    !(options & PMAP_OPTIONS_NOFLUSH)) {
8084 				tlb_flush_needed = true;
8085 				if (!flush_range || (flush_range->ptfr_pmap != pmap) ||
8086 				    va >= flush_range->ptfr_end || va < flush_range->ptfr_start) {
8087 #ifdef ARM_PTE_FF_MARKER
8088 					assert(!(spte & ARM_PTE_FF_MARKER));
8089 					tmplate |= ARM_PTE_FF_MARKER;
8090 					++pass1_updated;
8091 #endif
8092 					issue_tlbi = true;
8093 				}
8094 			}
8095 			write_pte_fast(pte_p, tmplate);
8096 		}
8097 
8098 fff_skip_pve_pass1:
8099 		pte_p = PT_ENTRY_NULL;
8100 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8101 			pve_ptep_idx = 0;
8102 			pve_p = pve_next(pve_p);
8103 		}
8104 	}
8105 
8106 	if (tlb_flush_needed) {
8107 		FLUSH_PTE_STRONG();
8108 	}
8109 
8110 	if (!issue_tlbi) {
8111 		goto fff_finish;
8112 	}
8113 
8114 	/* Pass 2: Issue any required TLB invalidations */
8115 	pve_p = orig_pve_p;
8116 	pte_p = orig_pte_p;
8117 	pve_ptep_idx = 0;
8118 
8119 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8120 		if (pve_p != PV_ENTRY_NULL) {
8121 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
8122 			if (pte_p == PT_ENTRY_NULL) {
8123 				goto fff_skip_pve_pass2;
8124 			}
8125 		}
8126 
8127 #ifdef PVH_FLAG_IOMMU
8128 		if (pvh_ptep_is_iommu(pte_p)) {
8129 			goto fff_skip_pve_pass2;
8130 		}
8131 #endif
8132 
8133 #ifdef ARM_PTE_FF_MARKER
8134 		pt_entry_t spte = *pte_p;
8135 
8136 		if (!(spte & ARM_PTE_FF_MARKER)) {
8137 			goto fff_skip_pve_pass2;
8138 		} else {
8139 			spte &= (~ARM_PTE_FF_MARKER);
8140 			/* No need to synchronize with the TLB flush; we're changing a SW-managed bit */
8141 			write_pte_fast(pte_p, spte);
8142 			++pass2_updated;
8143 		}
8144 #endif
8145 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
8146 		const pmap_t pmap = ptdp->pmap;
8147 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
8148 
8149 		if (!flush_range || (flush_range->ptfr_pmap != pmap) ||
8150 		    (va >= flush_range->ptfr_end) || (va < flush_range->ptfr_start)) {
8151 			pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
8152 			    pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
8153 		}
8154 
8155 fff_skip_pve_pass2:
8156 		pte_p = PT_ENTRY_NULL;
8157 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8158 			pve_ptep_idx = 0;
8159 			pve_p = pve_next(pve_p);
8160 		}
8161 	}
8162 
8163 fff_finish:
8164 	if (__improbable(pass1_updated != pass2_updated)) {
8165 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
8166 		    __func__, pass1_updated, pass2_updated);
8167 	}
8168 
8169 	/*
8170 	 * If we are using the same approach for ref and mod
8171 	 * faults on this PTE, do not clear the write fault;
8172 	 * this would cause both ref and mod to be set on the
8173 	 * page again, and prevent us from taking ANY read/write
8174 	 * fault on the mapping.
8175 	 */
8176 	if (clear_write_fault && !ref_aliases_mod) {
8177 		arm_clear_fast_fault(ppnum, VM_PROT_WRITE, PT_ENTRY_NULL);
8178 	}
8179 	if (tlb_flush_needed) {
8180 		if (flush_range) {
8181 			/* Delayed flush. Signal to the caller that the flush is needed. */
8182 			flush_range->ptfr_flush_needed = true;
8183 		} else {
8184 			sync_tlb_flush();
8185 		}
8186 	}
8187 
8188 	/* update global "reusable" status for this page */
8189 	if (is_internal) {
8190 		if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
8191 		    is_reusable) {
8192 			ppattr_clear_reusable(pai);
8193 		} else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
8194 		    !is_reusable) {
8195 			ppattr_set_reusable(pai);
8196 		}
8197 	}
8198 
8199 	if (mod_fault) {
8200 		ppattr_set_modfault(pai);
8201 	}
8202 	if (ref_fault) {
8203 		ppattr_set_reffault(pai);
8204 	}
8205 	if (__probable(mustsynch)) {
8206 		pvh_unlock(pai);
8207 	}
8208 	return result;
8209 }
8210 
8211 MARK_AS_PMAP_TEXT boolean_t
8212 arm_force_fast_fault_internal(
8213 	ppnum_t         ppnum,
8214 	vm_prot_t       allow_mode,
8215 	int             options)
8216 {
8217 	if (__improbable((options & (PMAP_OPTIONS_FF_LOCKED | PMAP_OPTIONS_NOFLUSH)) != 0)) {
8218 		panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
8219 	}
8220 	return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
8221 }
8222 
8223 /*
8224  *	Routine:	arm_force_fast_fault
8225  *
8226  *	Function:
8227  *		Force all mappings for this page to fault according
8228  *		to the access modes allowed, so we can gather ref/modify
8229  *		bits again.
8230  */
8231 
8232 boolean_t
8233 arm_force_fast_fault(
8234 	ppnum_t         ppnum,
8235 	vm_prot_t       allow_mode,
8236 	int             options,
8237 	__unused void   *arg)
8238 {
8239 	pmap_paddr_t    phys = ptoa(ppnum);
8240 
8241 	assert(ppnum != vm_page_fictitious_addr);
8242 
8243 	if (!pa_valid(phys)) {
8244 		return FALSE;   /* Not a managed page. */
8245 	}
8246 
8247 #if XNU_MONITOR
8248 	return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
8249 #else
8250 	return arm_force_fast_fault_internal(ppnum, allow_mode, options);
8251 #endif
8252 }
8253 
8254 /*
8255  *	Routine:	arm_clear_fast_fault
8256  *
8257  *	Function:
8258  *		Clear pending force fault for all mappings for this page based on
8259  *		the observed fault type, update ref/modify bits.
8260  */
8261 MARK_AS_PMAP_TEXT static boolean_t
8262 arm_clear_fast_fault(
8263 	ppnum_t ppnum,
8264 	vm_prot_t fault_type,
8265 	pt_entry_t *pte_p)
8266 {
8267 	pmap_paddr_t    pa = ptoa(ppnum);
8268 	pv_entry_t     *pve_p;
8269 	unsigned int    pai;
8270 	boolean_t       result;
8271 	bool            tlb_flush_needed = false;
8272 	pv_entry_t    **pv_h;
8273 	unsigned int    npve = 0;
8274 	unsigned int    pass1_updated = 0;
8275 	unsigned int    pass2_updated = 0;
8276 
8277 	assert(ppnum != vm_page_fictitious_addr);
8278 
8279 	if (!pa_valid(pa)) {
8280 		return FALSE;   /* Not a managed page. */
8281 	}
8282 
8283 	result = FALSE;
8284 	pai = pa_index(pa);
8285 	pvh_assert_locked(pai);
8286 	pv_h = pai_to_pvh(pai);
8287 
8288 	pve_p = PV_ENTRY_NULL;
8289 	if (pte_p == PT_ENTRY_NULL) {
8290 		if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
8291 			pte_p = pvh_ptep(pv_h);
8292 		} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
8293 			pve_p = pvh_pve_list(pv_h);
8294 		} else if (__improbable(!pvh_test_type(pv_h, PVH_TYPE_NULL))) {
8295 			panic("%s: invalid PV head 0x%llx for PA 0x%llx", __func__, (uint64_t)(*pv_h), (uint64_t)pa);
8296 		}
8297 	}
8298 
8299 	pv_entry_t *orig_pve_p = pve_p;
8300 	pt_entry_t *orig_pte_p = pte_p;
8301 	int pve_ptep_idx = 0;
8302 
8303 	/*
8304 	 * Pass 1: Make any necessary PTE updates, marking PTEs that will require
8305 	 * TLB invalidation in pass 2.
8306 	 */
8307 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8308 		pt_entry_t spte;
8309 		pt_entry_t tmplate;
8310 
8311 		if (pve_p != PV_ENTRY_NULL) {
8312 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
8313 			if (pte_p == PT_ENTRY_NULL) {
8314 				goto cff_skip_pve_pass1;
8315 			}
8316 		}
8317 
8318 #ifdef PVH_FLAG_IOMMU
8319 		if (pvh_ptep_is_iommu(pte_p)) {
8320 			goto cff_skip_pve_pass1;
8321 		}
8322 #endif
8323 		if (*pte_p == ARM_PTE_EMPTY) {
8324 			panic("pte is empty: pte_p=%p ppnum=0x%x", pte_p, ppnum);
8325 		}
8326 
8327 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
8328 		const pmap_t pmap = ptdp->pmap;
8329 		__assert_only const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
8330 
8331 		assert(va >= pmap->min && va < pmap->max);
8332 
8333 		spte = *pte_p;
8334 		tmplate = spte;
8335 
8336 		if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
8337 			{
8338 				if (pmap == kernel_pmap) {
8339 					tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
8340 				} else {
8341 					assert(pmap->type != PMAP_TYPE_NESTED);
8342 					tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
8343 				}
8344 			}
8345 
8346 			tmplate |= ARM_PTE_AF;
8347 
8348 			pte_set_was_writeable(tmplate, false);
8349 			ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8350 		} else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
8351 			tmplate = spte | ARM_PTE_AF;
8352 
8353 			{
8354 				ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
8355 			}
8356 		}
8357 
8358 #if MACH_ASSERT && XNU_MONITOR
8359 		if (is_pte_xprr_protected(pmap, spte)) {
8360 			if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
8361 				panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
8362 				    "ppnum=0x%x, fault_type=0x%x",
8363 				    __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
8364 				    ppnum, fault_type);
8365 			}
8366 		}
8367 #endif /* MACH_ASSERT && XNU_MONITOR */
8368 
8369 		assert(spte != ARM_PTE_TYPE_FAULT);
8370 		if (spte != tmplate) {
8371 			if ((spte & (~ARM_PTE_WRITEABLE)) != (tmplate & (~ARM_PTE_WRITEABLE))) {
8372 #ifdef ARM_PTE_FF_MARKER
8373 				assert(!(spte & ARM_PTE_FF_MARKER));
8374 				tmplate |= ARM_PTE_FF_MARKER;
8375 				++pass1_updated;
8376 #endif
8377 				tlb_flush_needed = true;
8378 			}
8379 			write_pte_fast(pte_p, tmplate);
8380 			result = TRUE;
8381 		}
8382 
8383 cff_skip_pve_pass1:
8384 		pte_p = PT_ENTRY_NULL;
8385 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8386 			pve_ptep_idx = 0;
8387 			pve_p = pve_next(pve_p);
8388 			++npve;
8389 			if (__improbable(npve == PMAP_MAX_PV_LIST_CHUNK_SIZE)) {
8390 				break;
8391 			}
8392 		}
8393 	}
8394 
8395 	if (!tlb_flush_needed) {
8396 		goto cff_finish;
8397 	}
8398 
8399 	FLUSH_PTE_STRONG();
8400 
8401 	/* Pass 2: Issue any required TLB invalidations */
8402 	pve_p = orig_pve_p;
8403 	pte_p = orig_pte_p;
8404 	pve_ptep_idx = 0;
8405 	npve = 0;
8406 
8407 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8408 		if (pve_p != PV_ENTRY_NULL) {
8409 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
8410 			if (pte_p == PT_ENTRY_NULL) {
8411 				goto cff_skip_pve_pass2;
8412 			}
8413 		}
8414 
8415 #ifdef PVH_FLAG_IOMMU
8416 		if (pvh_ptep_is_iommu(pte_p)) {
8417 			goto cff_skip_pve_pass2;
8418 		}
8419 #endif
8420 
8421 #ifdef ARM_PTE_FF_MARKER
8422 		pt_entry_t spte = *pte_p;
8423 
8424 		if (!(spte & ARM_PTE_FF_MARKER)) {
8425 			goto cff_skip_pve_pass2;
8426 		} else {
8427 			spte &= (~ARM_PTE_FF_MARKER);
8428 			/* No need to synchronize with the TLB flush; we're changing a SW-managed bit */
8429 			write_pte_fast(pte_p, spte);
8430 			++pass2_updated;
8431 		}
8432 #endif
8433 		const pt_desc_t * const ptdp = ptep_get_ptd(pte_p);
8434 		const pmap_t pmap = ptdp->pmap;
8435 		const vm_map_address_t va = ptd_get_va(ptdp, pte_p);
8436 
8437 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
8438 
8439 cff_skip_pve_pass2:
8440 		pte_p = PT_ENTRY_NULL;
8441 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
8442 			pve_ptep_idx = 0;
8443 			pve_p = pve_next(pve_p);
8444 			++npve;
8445 			if (__improbable(npve == PMAP_MAX_PV_LIST_CHUNK_SIZE)) {
8446 				break;
8447 			}
8448 		}
8449 	}
8450 
8451 cff_finish:
8452 	if (__improbable(pass1_updated != pass2_updated)) {
8453 		panic("%s: first pass (%u) and second pass (%u) disagree on updated mappings",
8454 		    __func__, pass1_updated, pass2_updated);
8455 	}
8456 	if (tlb_flush_needed) {
8457 		sync_tlb_flush();
8458 	}
8459 	return result;
8460 }
8461 
8462 /*
8463  * Determine if the fault was induced by software tracking of
8464  * modify/reference bits.  If so, re-enable the mapping (and set
8465  * the appropriate bits).
8466  *
8467  * Returns KERN_SUCCESS if the fault was induced and was
8468  * successfully handled.
8469  *
8470  * Returns KERN_FAILURE if the fault was not induced and
8471  * the function was unable to deal with it.
8472  *
8473  * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8474  * disallows this type of access.
8475  */
8476 MARK_AS_PMAP_TEXT kern_return_t
8477 arm_fast_fault_internal(
8478 	pmap_t pmap,
8479 	vm_map_address_t va,
8480 	vm_prot_t fault_type,
8481 	__unused bool was_af_fault,
8482 	__unused bool from_user)
8483 {
8484 	kern_return_t   result = KERN_FAILURE;
8485 	pt_entry_t     *ptep;
8486 	pt_entry_t      spte = ARM_PTE_TYPE_FAULT;
8487 	unsigned int    pai;
8488 	pmap_paddr_t    pa;
8489 	validate_pmap_mutable(pmap);
8490 
8491 	pmap_lock(pmap, PMAP_LOCK_SHARED);
8492 
8493 	/*
8494 	 * If the entry doesn't exist, is completely invalid, or is already
8495 	 * valid, we can't fix it here.
8496 	 */
8497 
8498 	const uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO;
8499 	ptep = pmap_pte(pmap, va & ~(pmap_page_size - 1));
8500 	if (ptep != PT_ENTRY_NULL) {
8501 		while (true) {
8502 			spte = *((volatile pt_entry_t*)ptep);
8503 
8504 			pa = pte_to_pa(spte);
8505 
8506 			if ((spte == ARM_PTE_TYPE_FAULT) ||
8507 			    ARM_PTE_IS_COMPRESSED(spte, ptep)) {
8508 				pmap_unlock(pmap, PMAP_LOCK_SHARED);
8509 				return result;
8510 			}
8511 
8512 			if (!pa_valid(pa)) {
8513 				pmap_unlock(pmap, PMAP_LOCK_SHARED);
8514 #if XNU_MONITOR
8515 				if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
8516 					return KERN_PROTECTION_FAILURE;
8517 				} else
8518 #endif
8519 				return result;
8520 			}
8521 			pai = pa_index(pa);
8522 			pvh_lock(pai);
8523 			if (*ptep == spte) {
8524 				/*
8525 				 * Double-check the spte value, as we care about the AF bit.
8526 				 * It's also possible that pmap_page_protect() transitioned the
8527 				 * PTE to compressed/empty before we grabbed the PVH lock.
8528 				 */
8529 				break;
8530 			}
8531 			pvh_unlock(pai);
8532 		}
8533 	} else {
8534 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
8535 		return result;
8536 	}
8537 
8538 
8539 	if ((result != KERN_SUCCESS) &&
8540 	    ((ppattr_test_reffault(pai)) || ((fault_type & VM_PROT_WRITE) && ppattr_test_modfault(pai)))) {
8541 		/*
8542 		 * An attempted access will always clear ref/mod fault state, as
8543 		 * appropriate for the fault type.  arm_clear_fast_fault will
8544 		 * update the associated PTEs for the page as appropriate; if
8545 		 * any PTEs are updated, we redrive the access.  If the mapping
8546 		 * does not actually allow for the attempted access, the
8547 		 * following fault will (hopefully) fail to update any PTEs, and
8548 		 * thus cause arm_fast_fault to decide that it failed to handle
8549 		 * the fault.
8550 		 */
8551 		if (ppattr_test_reffault(pai)) {
8552 			ppattr_clear_reffault(pai);
8553 		}
8554 		if ((fault_type & VM_PROT_WRITE) && ppattr_test_modfault(pai)) {
8555 			ppattr_clear_modfault(pai);
8556 		}
8557 
8558 		if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type, PT_ENTRY_NULL)) {
8559 			/*
8560 			 * Should this preserve KERN_PROTECTION_FAILURE?  The
8561 			 * cost of not doing so is a another fault in a case
8562 			 * that should already result in an exception.
8563 			 */
8564 			result = KERN_SUCCESS;
8565 		}
8566 	}
8567 
8568 	/*
8569 	 * If the PTE already has sufficient permissions, we can report the fault as handled.
8570 	 * This may happen, for example, if multiple threads trigger roughly simultaneous faults
8571 	 * on mappings of the same page
8572 	 */
8573 	if ((result == KERN_FAILURE) && (spte & ARM_PTE_AF)) {
8574 		uintptr_t ap_ro, ap_rw, ap_x;
8575 		if (pmap == kernel_pmap) {
8576 			ap_ro = ARM_PTE_AP(AP_RONA);
8577 			ap_rw = ARM_PTE_AP(AP_RWNA);
8578 			ap_x = ARM_PTE_NX;
8579 		} else {
8580 			ap_ro = pt_attr_leaf_ro(pmap_get_pt_attr(pmap));
8581 			ap_rw = pt_attr_leaf_rw(pmap_get_pt_attr(pmap));
8582 			ap_x = pt_attr_leaf_x(pmap_get_pt_attr(pmap));
8583 		}
8584 		/*
8585 		 * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
8586 		 * hardware they may be xPRR-protected, in which case they'll be handled
8587 		 * by the is_pte_xprr_protected() case above.  Additionally, the exception
8588 		 * handling path currently does not call arm_fast_fault() without at least
8589 		 * VM_PROT_READ in fault_type.
8590 		 */
8591 		if (((spte & ARM_PTE_APMASK) == ap_rw) ||
8592 		    (!(fault_type & VM_PROT_WRITE) && ((spte & ARM_PTE_APMASK) == ap_ro))) {
8593 			if (!(fault_type & VM_PROT_EXECUTE) || ((spte & ARM_PTE_XMASK) == ap_x)) {
8594 				result = KERN_SUCCESS;
8595 			}
8596 		}
8597 	}
8598 
8599 	if ((result == KERN_FAILURE) && arm_clear_fast_fault((ppnum_t)atop(pa), fault_type, ptep)) {
8600 		/*
8601 		 * A prior arm_clear_fast_fault() operation may have returned early due to
8602 		 * another pending PV list operation or an excessively large PV list.
8603 		 * Attempt a targeted fixup of the PTE that caused the fault to avoid repeatedly
8604 		 * taking a fault on the same mapping.
8605 		 */
8606 		result = KERN_SUCCESS;
8607 	}
8608 
8609 	pvh_unlock(pai);
8610 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
8611 	return result;
8612 }
8613 
8614 kern_return_t
8615 arm_fast_fault(
8616 	pmap_t pmap,
8617 	vm_map_address_t va,
8618 	vm_prot_t fault_type,
8619 	bool was_af_fault,
8620 	__unused bool from_user)
8621 {
8622 	kern_return_t   result = KERN_FAILURE;
8623 
8624 	if (va < pmap->min || va >= pmap->max) {
8625 		return result;
8626 	}
8627 
8628 	PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
8629 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
8630 	    from_user);
8631 
8632 #if     (__ARM_VMSA__ == 7)
8633 	if (pmap != kernel_pmap) {
8634 		pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
8635 		pmap_t          cur_pmap;
8636 		pmap_t          cur_user_pmap;
8637 
8638 		cur_pmap = current_pmap();
8639 		cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
8640 
8641 		if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
8642 			if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
8643 				pmap_set_pmap(pmap, current_thread());
8644 				result = KERN_SUCCESS;
8645 				goto done;
8646 			}
8647 		}
8648 	}
8649 #endif
8650 
8651 #if XNU_MONITOR
8652 	result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
8653 #else
8654 	result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
8655 #endif
8656 
8657 #if (__ARM_VMSA__ == 7)
8658 done:
8659 #endif
8660 
8661 	PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
8662 
8663 	return result;
8664 }
8665 
8666 void
8667 pmap_copy_page(
8668 	ppnum_t psrc,
8669 	ppnum_t pdst)
8670 {
8671 	bcopy_phys((addr64_t) (ptoa(psrc)),
8672 	    (addr64_t) (ptoa(pdst)),
8673 	    PAGE_SIZE);
8674 }
8675 
8676 
8677 /*
8678  *	pmap_copy_page copies the specified (machine independent) pages.
8679  */
8680 void
8681 pmap_copy_part_page(
8682 	ppnum_t psrc,
8683 	vm_offset_t src_offset,
8684 	ppnum_t pdst,
8685 	vm_offset_t dst_offset,
8686 	vm_size_t len)
8687 {
8688 	bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8689 	    (addr64_t) (ptoa(pdst) + dst_offset),
8690 	    len);
8691 }
8692 
8693 
8694 /*
8695  *	pmap_zero_page zeros the specified (machine independent) page.
8696  */
8697 void
8698 pmap_zero_page(
8699 	ppnum_t pn)
8700 {
8701 	assert(pn != vm_page_fictitious_addr);
8702 	bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8703 }
8704 
8705 /*
8706  *	pmap_zero_part_page
8707  *	zeros the specified (machine independent) part of a page.
8708  */
8709 void
8710 pmap_zero_part_page(
8711 	ppnum_t pn,
8712 	vm_offset_t offset,
8713 	vm_size_t len)
8714 {
8715 	assert(pn != vm_page_fictitious_addr);
8716 	assert(offset + len <= PAGE_SIZE);
8717 	bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8718 }
8719 
8720 void
8721 pmap_map_globals(
8722 	void)
8723 {
8724 	pt_entry_t      *ptep, pte;
8725 
8726 	ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8727 	assert(ptep != PT_ENTRY_NULL);
8728 	assert(*ptep == ARM_PTE_EMPTY);
8729 
8730 	pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8731 #if __ARM_KERNEL_PROTECT__
8732 	pte |= ARM_PTE_NG;
8733 #endif /* __ARM_KERNEL_PROTECT__ */
8734 	pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8735 #if     (__ARM_VMSA__ > 7)
8736 	pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8737 #else
8738 	pte |= ARM_PTE_SH;
8739 #endif
8740 	*ptep = pte;
8741 	FLUSH_PTE();
8742 	PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false, true);
8743 
8744 #if KASAN
8745 	kasan_notify_address(LOWGLOBAL_ALIAS, PAGE_SIZE);
8746 #endif
8747 }
8748 
8749 vm_offset_t
8750 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8751 {
8752 	if (__improbable(index >= CPUWINDOWS_MAX)) {
8753 		panic("%s: invalid index %u", __func__, index);
8754 	}
8755 	return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8756 }
8757 
8758 MARK_AS_PMAP_TEXT unsigned int
8759 pmap_map_cpu_windows_copy_internal(
8760 	ppnum_t pn,
8761 	vm_prot_t prot,
8762 	unsigned int wimg_bits)
8763 {
8764 	pt_entry_t      *ptep = NULL, pte;
8765 	pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8766 	unsigned int    cpu_num;
8767 	unsigned int    i;
8768 	vm_offset_t     cpu_copywindow_vaddr = 0;
8769 	bool            need_strong_sync = false;
8770 
8771 #if XNU_MONITOR
8772 	unsigned int    cacheattr = (!pa_valid(ptoa(pn) & ARM_PTE_PAGE_MASK) ? pmap_cache_attributes(pn) : 0);
8773 	need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
8774 #endif
8775 
8776 #if XNU_MONITOR
8777 #ifdef  __ARM_COHERENT_IO__
8778 	if (__improbable(pa_valid(ptoa(pn) & ARM_PTE_PAGE_MASK) && !pmap_ppl_disable)) {
8779 		panic("%s: attempted to map a managed page, "
8780 		    "pn=%u, prot=0x%x, wimg_bits=0x%x",
8781 		    __FUNCTION__,
8782 		    pn, prot, wimg_bits);
8783 	}
8784 	if (__improbable((cacheattr & PP_ATTR_MONITOR) && (prot != VM_PROT_READ) && !pmap_ppl_disable)) {
8785 		panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__, (uint64_t)ptoa(pn));
8786 	}
8787 
8788 #else /* __ARM_COHERENT_IO__ */
8789 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
8790 #endif /* __ARM_COHERENT_IO__ */
8791 #endif /* XNU_MONITOR */
8792 	cpu_num = pmap_cpu_data->cpu_number;
8793 
8794 	for (i = 0; i < CPUWINDOWS_MAX; i++) {
8795 		cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8796 		ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8797 		assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
8798 		if (*ptep == ARM_PTE_TYPE_FAULT) {
8799 			break;
8800 		}
8801 	}
8802 	if (i == CPUWINDOWS_MAX) {
8803 		panic("pmap_map_cpu_windows_copy: out of window");
8804 	}
8805 
8806 	pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8807 #if __ARM_KERNEL_PROTECT__
8808 	pte |= ARM_PTE_NG;
8809 #endif /* __ARM_KERNEL_PROTECT__ */
8810 
8811 	pte |= wimg_to_pte(wimg_bits, ptoa(pn));
8812 
8813 	if (prot & VM_PROT_WRITE) {
8814 		pte |= ARM_PTE_AP(AP_RWNA);
8815 	} else {
8816 		pte |= ARM_PTE_AP(AP_RONA);
8817 	}
8818 
8819 	write_pte_fast(ptep, pte);
8820 	/*
8821 	 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8822 	 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8823 	 */
8824 	FLUSH_PTE_STRONG();
8825 	PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i], true);
8826 	pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
8827 
8828 	return i;
8829 }
8830 
8831 unsigned int
8832 pmap_map_cpu_windows_copy(
8833 	ppnum_t pn,
8834 	vm_prot_t prot,
8835 	unsigned int wimg_bits)
8836 {
8837 #if XNU_MONITOR
8838 	return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
8839 #else
8840 	return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8841 #endif
8842 }
8843 
8844 MARK_AS_PMAP_TEXT void
8845 pmap_unmap_cpu_windows_copy_internal(
8846 	unsigned int index)
8847 {
8848 	pt_entry_t      *ptep;
8849 	unsigned int    cpu_num;
8850 	vm_offset_t     cpu_copywindow_vaddr = 0;
8851 	pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8852 
8853 	cpu_num = pmap_cpu_data->cpu_number;
8854 
8855 	cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8856 	/* Issue full-system DSB to ensure prior operations on the per-CPU window
8857 	 * (which are likely to have been on I/O memory) are complete before
8858 	 * tearing down the mapping. */
8859 	__builtin_arm_dsb(DSB_SY);
8860 	ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8861 	write_pte_strong(ptep, ARM_PTE_TYPE_FAULT);
8862 	PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index], true);
8863 }
8864 
8865 void
8866 pmap_unmap_cpu_windows_copy(
8867 	unsigned int index)
8868 {
8869 #if XNU_MONITOR
8870 	return pmap_unmap_cpu_windows_copy_ppl(index);
8871 #else
8872 	return pmap_unmap_cpu_windows_copy_internal(index);
8873 #endif
8874 }
8875 
8876 #if XNU_MONITOR
8877 
8878 MARK_AS_PMAP_TEXT void
8879 pmap_invoke_with_page(
8880 	ppnum_t page_number,
8881 	void *ctx,
8882 	void (*callback)(void *ctx, ppnum_t page_number, const void *page))
8883 {
8884 	#pragma unused(page_number, ctx, callback)
8885 }
8886 
8887 /*
8888  * Loop over every pmap_io_range (I/O ranges marked as owned by
8889  * the PPL in the device tree) and conditionally call callback() on each range
8890  * that needs to be included in the hibernation image.
8891  *
8892  * @param ctx      Will be passed as-is into the callback method. Use NULL if no
8893  *                 context is needed in the callback.
8894  * @param callback Callback function invoked on each range (gated by flag).
8895  */
8896 MARK_AS_PMAP_TEXT void
8897 pmap_hibernate_invoke(void *ctx, void (*callback)(void *ctx, uint64_t addr, uint64_t len))
8898 {
8899 	extern const pmap_io_range_t* io_attr_table;
8900 	extern const unsigned int num_io_rgns;
8901 	for (unsigned int i = 0; i < num_io_rgns; ++i) {
8902 		if (io_attr_table[i].wimg & PMAP_IO_RANGE_NEEDS_HIBERNATING) {
8903 			callback(ctx, io_attr_table[i].addr, io_attr_table[i].len);
8904 		}
8905 	}
8906 }
8907 
8908 /**
8909  * Set the HASHED pv_head_table flag for the passed in physical page if it's a
8910  * PPL-owned page. Otherwise, do nothing.
8911  *
8912  * @param addr Physical address of the page to set the HASHED flag on.
8913  */
8914 MARK_AS_PMAP_TEXT void
8915 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr)
8916 {
8917 	/* Ignore non-managed kernel memory. */
8918 	if (!pa_valid(addr)) {
8919 		return;
8920 	}
8921 
8922 	const unsigned int pai = pa_index(addr);
8923 	if (pp_attr_table[pai] & PP_ATTR_MONITOR) {
8924 		pv_entry_t **pv_h = pai_to_pvh(pai);
8925 
8926 		/* Mark that the PPL-owned page has been hashed into the hibernation image. */
8927 		pvh_lock(pai);
8928 		pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_HASHED);
8929 		pvh_unlock(pai);
8930 	}
8931 }
8932 
8933 /**
8934  * Loop through every physical page in the system and clear out the HASHED flag
8935  * on every PPL-owned page. That flag is used to keep track of which pages have
8936  * been hashed into the hibernation image during the hibernation entry process.
8937  *
8938  * The HASHED flag needs to be cleared out between hibernation cycles because the
8939  * pv_head_table and pp_attr_table's might have been copied into the hibernation
8940  * image with the HASHED flag set on certain pages. It's important to clear the
8941  * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
8942  * into the hibernation image can't be compromised across hibernation cycles.
8943  */
8944 MARK_AS_PMAP_TEXT void
8945 pmap_clear_ppl_hashed_flag_all(void)
8946 {
8947 	const unsigned int last_index = pa_index(vm_last_phys);
8948 	pv_entry_t **pv_h = NULL;
8949 
8950 	for (int pai = 0; pai < last_index; ++pai) {
8951 		pv_h = pai_to_pvh(pai);
8952 
8953 		/* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
8954 		if ((pvh_get_flags(pv_h) & PVH_FLAG_HASHED) &&
8955 		    (pp_attr_table[pai] & PP_ATTR_MONITOR)) {
8956 			pvh_lock(pai);
8957 			pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_HASHED);
8958 			pvh_unlock(pai);
8959 		}
8960 	}
8961 }
8962 
8963 /**
8964  * Enforce that all PPL-owned pages were hashed into the hibernation image. The
8965  * ppl_hib driver will call this after all wired pages have been copied into the
8966  * hibernation image.
8967  */
8968 MARK_AS_PMAP_TEXT void
8969 pmap_check_ppl_hashed_flag_all(void)
8970 {
8971 	const unsigned int last_index = pa_index(vm_last_phys);
8972 	pv_entry_t **pv_h = NULL;
8973 
8974 	for (int pai = 0; pai < last_index; ++pai) {
8975 		pv_h = pai_to_pvh(pai);
8976 
8977 		/**
8978 		 * The PMAP stacks are explicitly not saved into the image so skip checking
8979 		 * the pages that contain the PMAP stacks.
8980 		 */
8981 		const bool is_pmap_stack = (pai >= pa_index(pmap_stacks_start_pa)) &&
8982 		    (pai < pa_index(pmap_stacks_end_pa));
8983 
8984 		if (!is_pmap_stack &&
8985 		    (pp_attr_table[pai] & PP_ATTR_MONITOR) &&
8986 		    !(pvh_get_flags(pv_h) & PVH_FLAG_HASHED)) {
8987 			panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai);
8988 		}
8989 	}
8990 }
8991 
8992 #endif /* XNU_MONITOR */
8993 
8994 /*
8995  * Indicate that a pmap is intended to be used as a nested pmap
8996  * within one or more larger address spaces.  This must be set
8997  * before pmap_nest() is called with this pmap as the 'subordinate'.
8998  */
8999 MARK_AS_PMAP_TEXT void
9000 pmap_set_nested_internal(
9001 	pmap_t pmap)
9002 {
9003 	validate_pmap_mutable(pmap);
9004 	if (__improbable(pmap->type != PMAP_TYPE_USER)) {
9005 		panic("%s: attempt to nest unsupported pmap %p of type 0x%hhx",
9006 		    __func__, pmap, pmap->type);
9007 	}
9008 	pmap->type = PMAP_TYPE_NESTED;
9009 	pmap_get_pt_ops(pmap)->free_id(pmap);
9010 }
9011 
9012 void
9013 pmap_set_nested(
9014 	pmap_t pmap)
9015 {
9016 #if XNU_MONITOR
9017 	pmap_set_nested_ppl(pmap);
9018 #else
9019 	pmap_set_nested_internal(pmap);
9020 #endif
9021 }
9022 
9023 /*
9024  * pmap_trim_range(pmap, start, end)
9025  *
9026  * pmap  = pmap to operate on
9027  * start = start of the range
9028  * end   = end of the range
9029  *
9030  * Attempts to deallocate TTEs for the given range in the nested range.
9031  */
9032 MARK_AS_PMAP_TEXT static void
9033 pmap_trim_range(
9034 	pmap_t pmap,
9035 	addr64_t start,
9036 	addr64_t end)
9037 {
9038 	addr64_t cur;
9039 	addr64_t nested_region_start;
9040 	addr64_t nested_region_end;
9041 	addr64_t adjusted_start;
9042 	addr64_t adjusted_end;
9043 	addr64_t adjust_offmask;
9044 	tt_entry_t * tte_p;
9045 	pt_entry_t * pte_p;
9046 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9047 
9048 	if (__improbable(end < start)) {
9049 		panic("%s: invalid address range, "
9050 		    "pmap=%p, start=%p, end=%p",
9051 		    __func__,
9052 		    pmap, (void*)start, (void*)end);
9053 	}
9054 
9055 	nested_region_start = pmap->nested_region_addr;
9056 	nested_region_end = nested_region_start + pmap->nested_region_size;
9057 
9058 	if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
9059 		panic("%s: range outside nested region %p-%p, "
9060 		    "pmap=%p, start=%p, end=%p",
9061 		    __func__, (void *)nested_region_start, (void *)nested_region_end,
9062 		    pmap, (void*)start, (void*)end);
9063 	}
9064 
9065 	/* Contract the range to TT page boundaries. */
9066 	adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
9067 	adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
9068 	adjusted_end = end & ~adjust_offmask;
9069 
9070 	/* Iterate over the range, trying to remove TTEs. */
9071 	for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
9072 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
9073 
9074 		tte_p = pmap_tte(pmap, cur);
9075 
9076 		if (tte_p == (tt_entry_t *) NULL) {
9077 			goto done;
9078 		}
9079 
9080 		if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
9081 			pte_p = (pt_entry_t *) ttetokv(*tte_p);
9082 
9083 			if ((pmap->type == PMAP_TYPE_NESTED) && (ptep_get_info(pte_p)->refcnt == 0)) {
9084 				/* Deallocate for the nested map. */
9085 				pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
9086 			} else if (pmap->type == PMAP_TYPE_USER) {
9087 				/**
9088 				 * Just remove for the parent map. If the leaf table pointed
9089 				 * to by the TTE being removed (owned by the nested pmap)
9090 				 * has any mappings, then this call will panic. This
9091 				 * enforces the policy that tables being trimmed must be
9092 				 * empty to prevent possible use-after-free attacks.
9093 				 */
9094 				pmap_tte_remove(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
9095 			} else {
9096 				panic("%s: Unsupported pmap type for nesting %p %d", __func__, pmap, pmap->type);
9097 			}
9098 		}
9099 
9100 done:
9101 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
9102 	}
9103 
9104 #if (__ARM_VMSA__ > 7)
9105 	/* Remove empty L2 TTs. */
9106 	adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
9107 	adjusted_end = end & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL);
9108 
9109 	for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_ln_size(pt_attr, PMAP_TT_L1_LEVEL)) {
9110 		/* For each L1 entry in our range... */
9111 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
9112 
9113 		bool remove_tt1e = true;
9114 		tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
9115 		tt_entry_t * tt2e_start;
9116 		tt_entry_t * tt2e_end;
9117 		tt_entry_t * tt2e_p;
9118 		tt_entry_t tt1e;
9119 
9120 		if (tt1e_p == NULL) {
9121 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
9122 			continue;
9123 		}
9124 
9125 		tt1e = *tt1e_p;
9126 
9127 		if (tt1e == ARM_TTE_TYPE_FAULT) {
9128 			pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
9129 			continue;
9130 		}
9131 
9132 		tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
9133 		tt2e_end = &tt2e_start[pt_attr_page_size(pt_attr) / sizeof(*tt2e_start)];
9134 
9135 		for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
9136 			if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
9137 				/*
9138 				 * If any TTEs are populated, don't remove the
9139 				 * L1 TT.
9140 				 */
9141 				remove_tt1e = false;
9142 			}
9143 		}
9144 
9145 		if (remove_tt1e) {
9146 			pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tt1e_p, PMAP_TT_L1_LEVEL);
9147 		}
9148 
9149 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
9150 	}
9151 #endif /* (__ARM_VMSA__ > 7) */
9152 }
9153 
9154 /*
9155  * pmap_trim_internal(grand, subord, vstart, size)
9156  *
9157  * grand  = pmap subord is nested in
9158  * subord = nested pmap
9159  * vstart = start of the used range in grand
9160  * size   = size of the used range
9161  *
9162  * Attempts to trim the shared region page tables down to only cover the given
9163  * range in subord and grand.
9164  */
9165 MARK_AS_PMAP_TEXT void
9166 pmap_trim_internal(
9167 	pmap_t grand,
9168 	pmap_t subord,
9169 	addr64_t vstart,
9170 	uint64_t size)
9171 {
9172 	addr64_t vend;
9173 	addr64_t adjust_offmask;
9174 
9175 	if (__improbable(os_add_overflow(vstart, size, &vend))) {
9176 		panic("%s: grand addr wraps around, "
9177 		    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9178 		    __func__, grand, subord, (void*)vstart, size);
9179 	}
9180 
9181 	validate_pmap_mutable(grand);
9182 	validate_pmap(subord);
9183 
9184 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9185 
9186 	pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9187 
9188 	if (__improbable(subord->type != PMAP_TYPE_NESTED)) {
9189 		panic("%s: subord is of non-nestable type 0x%hhx, "
9190 		    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9191 		    __func__, subord->type, grand, subord, (void*)vstart, size);
9192 	}
9193 
9194 	if (__improbable(grand->type != PMAP_TYPE_USER)) {
9195 		panic("%s: grand is of unsupprted type 0x%hhx for nesting, "
9196 		    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9197 		    __func__, grand->type, grand, subord, (void*)vstart, size);
9198 	}
9199 
9200 	if (__improbable(grand->nested_pmap != subord)) {
9201 		panic("%s: grand->nested != subord, "
9202 		    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9203 		    __func__, grand, subord, (void*)vstart, size);
9204 	}
9205 
9206 	if (__improbable((size != 0) &&
9207 	    ((vstart < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))))) {
9208 		panic("%s: grand range not in nested region, "
9209 		    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9210 		    __func__, grand, subord, (void*)vstart, size);
9211 	}
9212 
9213 
9214 	if (!grand->nested_has_no_bounds_ref) {
9215 		assert(subord->nested_bounds_set);
9216 
9217 		if (!grand->nested_bounds_set) {
9218 			/* Inherit the bounds from subord. */
9219 			grand->nested_region_true_start = subord->nested_region_true_start;
9220 			grand->nested_region_true_end = subord->nested_region_true_end;
9221 			grand->nested_bounds_set = true;
9222 		}
9223 
9224 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9225 		return;
9226 	}
9227 
9228 	if ((!subord->nested_bounds_set) && size) {
9229 		adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
9230 
9231 		subord->nested_region_true_start = vstart;
9232 		subord->nested_region_true_end = vend;
9233 		subord->nested_region_true_start &= ~adjust_offmask;
9234 
9235 		if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
9236 			panic("%s: padded true end wraps around, "
9237 			    "grand=%p, subord=%p, vstart=%p, size=%#llx",
9238 			    __func__, grand, subord, (void*)vstart, size);
9239 		}
9240 
9241 		subord->nested_region_true_end &= ~adjust_offmask;
9242 		subord->nested_bounds_set = true;
9243 	}
9244 
9245 	if (subord->nested_bounds_set) {
9246 		/* Inherit the bounds from subord. */
9247 		grand->nested_region_true_start = subord->nested_region_true_start;
9248 		grand->nested_region_true_end = subord->nested_region_true_end;
9249 		grand->nested_bounds_set = true;
9250 
9251 		/* If we know the bounds, we can trim the pmap. */
9252 		grand->nested_has_no_bounds_ref = false;
9253 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9254 	} else {
9255 		/* Don't trim if we don't know the bounds. */
9256 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9257 		return;
9258 	}
9259 
9260 	/* Trim grand to only cover the given range. */
9261 	pmap_trim_range(grand, grand->nested_region_addr, grand->nested_region_true_start);
9262 	pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_addr + grand->nested_region_size));
9263 
9264 	/* Try to trim subord. */
9265 	pmap_trim_subord(subord);
9266 }
9267 
9268 MARK_AS_PMAP_TEXT static void
9269 pmap_trim_self(pmap_t pmap)
9270 {
9271 	if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
9272 		/* If we have a no bounds ref, we need to drop it. */
9273 		pmap_lock(pmap->nested_pmap, PMAP_LOCK_SHARED);
9274 		pmap->nested_has_no_bounds_ref = false;
9275 		boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
9276 		vm_map_offset_t nested_region_true_start = pmap->nested_pmap->nested_region_true_start;
9277 		vm_map_offset_t nested_region_true_end = pmap->nested_pmap->nested_region_true_end;
9278 		pmap_unlock(pmap->nested_pmap, PMAP_LOCK_SHARED);
9279 
9280 		if (nested_bounds_set) {
9281 			pmap_trim_range(pmap, pmap->nested_region_addr, nested_region_true_start);
9282 			pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_addr + pmap->nested_region_size));
9283 		}
9284 		/*
9285 		 * Try trimming the nested pmap, in case we had the
9286 		 * last reference.
9287 		 */
9288 		pmap_trim_subord(pmap->nested_pmap);
9289 	}
9290 }
9291 
9292 /*
9293  * pmap_trim_subord(grand, subord)
9294  *
9295  * grand  = pmap that we have nested subord in
9296  * subord = nested pmap we are attempting to trim
9297  *
9298  * Trims subord if possible
9299  */
9300 MARK_AS_PMAP_TEXT static void
9301 pmap_trim_subord(pmap_t subord)
9302 {
9303 	bool contract_subord = false;
9304 
9305 	pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9306 
9307 	subord->nested_no_bounds_refcnt--;
9308 
9309 	if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
9310 		/* If this was the last no bounds reference, trim subord. */
9311 		contract_subord = true;
9312 	}
9313 
9314 	pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9315 
9316 	if (contract_subord) {
9317 		pmap_trim_range(subord, subord->nested_region_addr, subord->nested_region_true_start);
9318 		pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_addr + subord->nested_region_size);
9319 	}
9320 }
9321 
9322 void
9323 pmap_trim(
9324 	pmap_t grand,
9325 	pmap_t subord,
9326 	addr64_t vstart,
9327 	uint64_t size)
9328 {
9329 #if XNU_MONITOR
9330 	pmap_trim_ppl(grand, subord, vstart, size);
9331 
9332 	pmap_ledger_check_balance(grand);
9333 	pmap_ledger_check_balance(subord);
9334 #else
9335 	pmap_trim_internal(grand, subord, vstart, size);
9336 #endif
9337 }
9338 
9339 #if HAS_APPLE_PAC
9340 void *
9341 pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9342 {
9343 	void *res = NULL;
9344 	uint64_t current_intr_state = pmap_interrupts_disable();
9345 
9346 	uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
9347 	switch (key) {
9348 	case ptrauth_key_asia:
9349 		res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
9350 		break;
9351 	case ptrauth_key_asda:
9352 		res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
9353 		break;
9354 	default:
9355 		panic("attempt to sign user pointer without process independent key");
9356 	}
9357 	ml_disable_user_jop_key(jop_key, saved_jop_state);
9358 
9359 	pmap_interrupts_restore(current_intr_state);
9360 
9361 	return res;
9362 }
9363 
9364 void *
9365 pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9366 {
9367 	return pmap_sign_user_ptr_internal(value, key, discriminator, jop_key);
9368 }
9369 
9370 void *
9371 pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9372 {
9373 	if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
9374 		panic("attempt to auth user pointer without process independent key");
9375 	}
9376 
9377 	void *res = NULL;
9378 	uint64_t current_intr_state = pmap_interrupts_disable();
9379 
9380 	uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
9381 	res = ml_auth_ptr_unchecked(value, key, discriminator);
9382 	ml_disable_user_jop_key(jop_key, saved_jop_state);
9383 
9384 	pmap_interrupts_restore(current_intr_state);
9385 
9386 	return res;
9387 }
9388 
9389 void *
9390 pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
9391 {
9392 	return pmap_auth_user_ptr_internal(value, key, discriminator, jop_key);
9393 }
9394 #endif /* HAS_APPLE_PAC */
9395 
9396 /*
9397  *	kern_return_t pmap_nest(grand, subord, vstart, size)
9398  *
9399  *	grand  = the pmap that we will nest subord into
9400  *	subord = the pmap that goes into the grand
9401  *	vstart  = start of range in pmap to be inserted
9402  *	size   = Size of nest area (up to 16TB)
9403  *
9404  *	Inserts a pmap into another.  This is used to implement shared segments.
9405  *
9406  */
9407 
9408 MARK_AS_PMAP_TEXT kern_return_t
9409 pmap_nest_internal(
9410 	pmap_t grand,
9411 	pmap_t subord,
9412 	addr64_t vstart,
9413 	uint64_t size)
9414 {
9415 	kern_return_t kr = KERN_FAILURE;
9416 	vm_map_offset_t vaddr;
9417 	tt_entry_t     *stte_p;
9418 	tt_entry_t     *gtte_p;
9419 	unsigned int    i;
9420 	unsigned int    num_tte;
9421 	unsigned int    nested_region_asid_bitmap_size;
9422 	unsigned int*   nested_region_asid_bitmap;
9423 	int             expand_options = 0;
9424 	bool            deref_subord = true;
9425 
9426 	addr64_t vend;
9427 	if (__improbable(os_add_overflow(vstart, size, &vend))) {
9428 		panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
9429 	}
9430 
9431 	validate_pmap_mutable(grand);
9432 	validate_pmap(subord);
9433 #if XNU_MONITOR
9434 	/*
9435 	 * Ordering is important here.  validate_pmap() has already ensured subord is a
9436 	 * PPL-controlled pmap pointer, but it could have already been destroyed or could
9437 	 * be in the process of being destroyed.  If destruction is already committed,
9438 	 * then the check of ref_count below will cover us.  If destruction is initiated
9439 	 * during or after this call, then pmap_destroy() will catch the non-zero
9440 	 * nested_count.
9441 	 */
9442 	os_atomic_inc(&subord->nested_count, relaxed);
9443 	os_atomic_thread_fence(seq_cst);
9444 #endif
9445 	if (__improbable(os_atomic_inc_orig(&subord->ref_count, relaxed) <= 0)) {
9446 		panic("%s: invalid subordinate pmap %p", __func__, subord);
9447 	}
9448 
9449 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9450 	if (__improbable(pmap_get_pt_attr(subord) != pt_attr)) {
9451 		panic("%s: attempt to nest pmap %p into pmap %p with mismatched attributes", __func__, subord, grand);
9452 	}
9453 
9454 #if XNU_MONITOR
9455 	expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
9456 #endif
9457 
9458 	if (__improbable(((size | vstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL)) {
9459 		panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx", grand, vstart, size);
9460 	}
9461 
9462 	if (__improbable(subord->type != PMAP_TYPE_NESTED)) {
9463 		panic("%s: subordinate pmap %p is of non-nestable type 0x%hhx", __func__, subord, subord->type);
9464 	}
9465 
9466 	if (__improbable(grand->type != PMAP_TYPE_USER)) {
9467 		panic("%s: grand pmap %p is of unsupported type 0x%hhx for nesting", __func__, grand, grand->type);
9468 	}
9469 
9470 	if (subord->nested_region_asid_bitmap == NULL) {
9471 		nested_region_asid_bitmap_size  = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
9472 
9473 #if XNU_MONITOR
9474 		pmap_paddr_t pa = 0;
9475 
9476 		if (__improbable((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
9477 			panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
9478 			    "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
9479 			    __FUNCTION__, nested_region_asid_bitmap_size,
9480 			    grand, subord, vstart, size);
9481 		}
9482 
9483 		kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
9484 
9485 		if (kr != KERN_SUCCESS) {
9486 			goto nest_cleanup;
9487 		}
9488 
9489 		assert(pa);
9490 
9491 		nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
9492 #else
9493 		nested_region_asid_bitmap = kalloc_data(
9494 			nested_region_asid_bitmap_size * sizeof(unsigned int),
9495 			Z_WAITOK | Z_ZERO);
9496 #endif
9497 
9498 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9499 		if (subord->nested_region_asid_bitmap == NULL) {
9500 			subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
9501 			subord->nested_region_addr = vstart;
9502 			subord->nested_region_size = (mach_vm_offset_t) size;
9503 
9504 			/**
9505 			 * Ensure that the rest of the subord->nested_region_* fields are
9506 			 * initialized and visible before setting the nested_region_asid_bitmap
9507 			 * field (which is used as the flag to say that the rest are initialized).
9508 			 */
9509 			__builtin_arm_dmb(DMB_ISHST);
9510 			subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
9511 			nested_region_asid_bitmap = NULL;
9512 		}
9513 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9514 		if (nested_region_asid_bitmap != NULL) {
9515 #if XNU_MONITOR
9516 			pmap_pages_free(kvtophys_nofail((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
9517 #else
9518 			kfree_data(nested_region_asid_bitmap,
9519 			    nested_region_asid_bitmap_size * sizeof(unsigned int));
9520 #endif
9521 		}
9522 	}
9523 
9524 	/**
9525 	 * Ensure subsequent reads of the subord->nested_region_* fields don't get
9526 	 * speculated before their initialization.
9527 	 */
9528 	__builtin_arm_dmb(DMB_ISHLD);
9529 
9530 	if ((subord->nested_region_addr + subord->nested_region_size) < vend) {
9531 		uint64_t        new_size;
9532 		unsigned int    new_nested_region_asid_bitmap_size;
9533 		unsigned int*   new_nested_region_asid_bitmap;
9534 
9535 		nested_region_asid_bitmap = NULL;
9536 		nested_region_asid_bitmap_size = 0;
9537 		new_size =  vend - subord->nested_region_addr;
9538 
9539 		/* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
9540 		new_nested_region_asid_bitmap_size  = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
9541 
9542 #if XNU_MONITOR
9543 		pmap_paddr_t pa = 0;
9544 
9545 		if (__improbable((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
9546 			panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
9547 			    "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
9548 			    __FUNCTION__, new_nested_region_asid_bitmap_size,
9549 			    grand, subord, vstart, new_size);
9550 		}
9551 
9552 		kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
9553 
9554 		if (kr != KERN_SUCCESS) {
9555 			goto nest_cleanup;
9556 		}
9557 
9558 		assert(pa);
9559 
9560 		new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
9561 #else
9562 		new_nested_region_asid_bitmap = kalloc_data(
9563 			new_nested_region_asid_bitmap_size * sizeof(unsigned int),
9564 			Z_WAITOK | Z_ZERO);
9565 #endif
9566 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9567 		if (subord->nested_region_size < new_size) {
9568 			bcopy(subord->nested_region_asid_bitmap,
9569 			    new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
9570 			nested_region_asid_bitmap_size  = subord->nested_region_asid_bitmap_size;
9571 			nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
9572 			subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
9573 			subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
9574 			subord->nested_region_size = new_size;
9575 			new_nested_region_asid_bitmap = NULL;
9576 		}
9577 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9578 		if (nested_region_asid_bitmap != NULL) {
9579 #if XNU_MONITOR
9580 			pmap_pages_free(kvtophys_nofail((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
9581 #else
9582 			kfree_data(nested_region_asid_bitmap,
9583 			    nested_region_asid_bitmap_size * sizeof(unsigned int));
9584 #endif
9585 		}
9586 		if (new_nested_region_asid_bitmap != NULL) {
9587 #if XNU_MONITOR
9588 			pmap_pages_free(kvtophys_nofail((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);
9589 #else
9590 			kfree_data(new_nested_region_asid_bitmap,
9591 			    new_nested_region_asid_bitmap_size * sizeof(unsigned int));
9592 #endif
9593 		}
9594 	}
9595 
9596 	pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9597 
9598 	if (os_atomic_cmpxchg(&grand->nested_pmap, PMAP_NULL, subord, relaxed)) {
9599 		/*
9600 		 * If this is grand's first nesting operation, keep the reference on subord.
9601 		 * It will be released by pmap_destroy_internal() when grand is destroyed.
9602 		 */
9603 		deref_subord = false;
9604 
9605 		if (!subord->nested_bounds_set) {
9606 			/*
9607 			 * We are nesting without the shared regions bounds
9608 			 * being known.  We'll have to trim the pmap later.
9609 			 */
9610 			grand->nested_has_no_bounds_ref = true;
9611 			subord->nested_no_bounds_refcnt++;
9612 		}
9613 
9614 		grand->nested_region_addr = vstart;
9615 		grand->nested_region_size = (mach_vm_offset_t) size;
9616 	} else {
9617 		if (__improbable(grand->nested_pmap != subord)) {
9618 			panic("pmap_nest() pmap %p has a nested pmap", grand);
9619 		} else if (__improbable(grand->nested_region_addr > vstart)) {
9620 			panic("pmap_nest() pmap %p : attempt to nest outside the nested region", grand);
9621 		} else if ((grand->nested_region_addr + grand->nested_region_size) < vend) {
9622 			grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_addr + size);
9623 		}
9624 	}
9625 
9626 #if     (__ARM_VMSA__ == 7)
9627 	vaddr = (vm_map_offset_t) vstart;
9628 	num_tte = size >> ARM_TT_L1_SHIFT;
9629 
9630 	for (i = 0; i < num_tte; i++) {
9631 		if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
9632 			goto expand_next;
9633 		}
9634 
9635 		stte_p = pmap_tte(subord, vaddr);
9636 		if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
9637 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9638 			kr = pmap_expand(subord, vaddr, expand_options, PMAP_TT_L2_LEVEL);
9639 
9640 			if (kr != KERN_SUCCESS) {
9641 				pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9642 				goto done;
9643 			}
9644 
9645 			pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9646 		}
9647 		pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9648 		pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9649 		stte_p = pmap_tte(grand, vaddr);
9650 		if (stte_p == (tt_entry_t *)NULL) {
9651 			pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9652 			kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
9653 
9654 			if (kr != KERN_SUCCESS) {
9655 				pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9656 				goto done;
9657 			}
9658 		} else {
9659 			pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9660 			kr = KERN_SUCCESS;
9661 		}
9662 		pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9663 
9664 expand_next:
9665 		vaddr += ARM_TT_L1_SIZE;
9666 	}
9667 
9668 #else
9669 	vaddr = (vm_map_offset_t) vstart;
9670 	num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9671 
9672 	for (i = 0; i < num_tte; i++) {
9673 		if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
9674 			goto expand_next;
9675 		}
9676 
9677 		stte_p = pmap_tte(subord, vaddr);
9678 		if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
9679 			pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9680 			kr = pmap_expand(subord, vaddr, expand_options, pt_attr_leaf_level(pt_attr));
9681 
9682 			if (kr != KERN_SUCCESS) {
9683 				pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9684 				goto done;
9685 			}
9686 
9687 			pmap_lock(subord, PMAP_LOCK_EXCLUSIVE);
9688 		}
9689 expand_next:
9690 		vaddr += pt_attr_twig_size(pt_attr);
9691 	}
9692 #endif
9693 	pmap_unlock(subord, PMAP_LOCK_EXCLUSIVE);
9694 
9695 	/*
9696 	 * copy tte's from subord pmap into grand pmap
9697 	 */
9698 
9699 	pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9700 	vaddr = (vm_map_offset_t) vstart;
9701 
9702 
9703 #if     (__ARM_VMSA__ == 7)
9704 	for (i = 0; i < num_tte; i++) {
9705 		if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
9706 			goto nest_next;
9707 		}
9708 
9709 		stte_p = pmap_tte(subord, vaddr);
9710 		gtte_p = pmap_tte(grand, vaddr);
9711 		*gtte_p = *stte_p;
9712 
9713 nest_next:
9714 		vaddr += ARM_TT_L1_SIZE;
9715 	}
9716 #else
9717 	for (i = 0; i < num_tte; i++) {
9718 		if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
9719 			goto nest_next;
9720 		}
9721 
9722 		stte_p = pmap_tte(subord, vaddr);
9723 		gtte_p = pmap_tte(grand, vaddr);
9724 		if (gtte_p == PT_ENTRY_NULL) {
9725 			pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9726 			kr = pmap_expand(grand, vaddr, expand_options, pt_attr_twig_level(pt_attr));
9727 			pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9728 
9729 			if (kr != KERN_SUCCESS) {
9730 				goto done;
9731 			}
9732 
9733 			gtte_p = pmap_tt2e(grand, vaddr);
9734 		}
9735 		*gtte_p = *stte_p;
9736 
9737 nest_next:
9738 		vaddr += pt_attr_twig_size(pt_attr);
9739 	}
9740 #endif
9741 
9742 	kr = KERN_SUCCESS;
9743 done:
9744 
9745 	stte_p = pmap_tte(grand, vstart);
9746 	FLUSH_PTE_STRONG();
9747 	PMAP_UPDATE_TLBS(grand, vstart, vend, false, false);
9748 
9749 	pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9750 #if XNU_MONITOR
9751 nest_cleanup:
9752 #endif
9753 	if (deref_subord) {
9754 #if XNU_MONITOR
9755 		os_atomic_dec(&subord->nested_count, relaxed);
9756 #endif
9757 		pmap_destroy_internal(subord);
9758 	}
9759 	return kr;
9760 }
9761 
9762 kern_return_t
9763 pmap_nest(
9764 	pmap_t grand,
9765 	pmap_t subord,
9766 	addr64_t vstart,
9767 	uint64_t size)
9768 {
9769 	kern_return_t kr = KERN_FAILURE;
9770 
9771 	PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
9772 	    VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
9773 	    VM_KERNEL_ADDRHIDE(vstart));
9774 
9775 #if XNU_MONITOR
9776 	while ((kr = pmap_nest_ppl(grand, subord, vstart, size)) == KERN_RESOURCE_SHORTAGE) {
9777 		pmap_alloc_page_for_ppl(0);
9778 	}
9779 
9780 	pmap_ledger_check_balance(grand);
9781 	pmap_ledger_check_balance(subord);
9782 #else
9783 	kr = pmap_nest_internal(grand, subord, vstart, size);
9784 #endif
9785 
9786 	PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
9787 
9788 	return kr;
9789 }
9790 
9791 /*
9792  *	kern_return_t pmap_unnest(grand, vaddr)
9793  *
9794  *	grand  = the pmap that will have the virtual range unnested
9795  *	vaddr  = start of range in pmap to be unnested
9796  *	size   = size of range in pmap to be unnested
9797  *
9798  */
9799 
9800 kern_return_t
9801 pmap_unnest(
9802 	pmap_t grand,
9803 	addr64_t vaddr,
9804 	uint64_t size)
9805 {
9806 	return pmap_unnest_options(grand, vaddr, size, 0);
9807 }
9808 
9809 MARK_AS_PMAP_TEXT kern_return_t
9810 pmap_unnest_options_internal(
9811 	pmap_t grand,
9812 	addr64_t vaddr,
9813 	uint64_t size,
9814 	unsigned int option)
9815 {
9816 	vm_map_offset_t start;
9817 	vm_map_offset_t addr;
9818 	tt_entry_t     *tte_p;
9819 	unsigned int    current_index;
9820 	unsigned int    start_index;
9821 	unsigned int    max_index;
9822 	unsigned int    num_tte;
9823 	unsigned int    i;
9824 
9825 	addr64_t vend;
9826 	if (__improbable(os_add_overflow(vaddr, size, &vend))) {
9827 		panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
9828 	}
9829 
9830 	validate_pmap_mutable(grand);
9831 
9832 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9833 
9834 	if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
9835 		panic("pmap_unnest(): unaligned request");
9836 	}
9837 
9838 	if ((option & PMAP_UNNEST_CLEAN) == 0) {
9839 		if (grand->nested_pmap == NULL) {
9840 			panic("%s: %p has no nested pmap", __func__, grand);
9841 		}
9842 
9843 		if ((vaddr < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))) {
9844 			panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
9845 		}
9846 
9847 		pmap_lock(grand->nested_pmap, PMAP_LOCK_EXCLUSIVE);
9848 
9849 		start = vaddr;
9850 		start_index = (unsigned int)((vaddr - grand->nested_region_addr)  >> pt_attr_twig_shift(pt_attr));
9851 		max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
9852 		num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9853 
9854 		for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
9855 			pt_entry_t  *bpte, *epte, *cpte;
9856 
9857 			if (addr < grand->nested_pmap->nested_region_true_start) {
9858 				/* We haven't reached the interesting range. */
9859 				continue;
9860 			}
9861 
9862 			if (addr >= grand->nested_pmap->nested_region_true_end) {
9863 				/* We're done with the interesting range. */
9864 				break;
9865 			}
9866 
9867 			bpte = pmap_pte(grand->nested_pmap, addr);
9868 			epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
9869 
9870 			if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
9871 				setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
9872 
9873 				for (cpte = bpte; cpte <= epte; cpte++) {
9874 					pmap_paddr_t    pa;
9875 					unsigned int    pai = 0;
9876 					boolean_t               managed = FALSE;
9877 					pt_entry_t  spte;
9878 
9879 					if ((*cpte != ARM_PTE_TYPE_FAULT)
9880 					    && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
9881 						spte = *((volatile pt_entry_t*)cpte);
9882 						while (!managed) {
9883 							pa = pte_to_pa(spte);
9884 							if (!pa_valid(pa)) {
9885 								break;
9886 							}
9887 							pai = pa_index(pa);
9888 							pvh_lock(pai);
9889 							spte = *((volatile pt_entry_t*)cpte);
9890 							pa = pte_to_pa(spte);
9891 							if (pai == pa_index(pa)) {
9892 								managed = TRUE;
9893 								break; // Leave the PVH locked as we'll unlock it after we update the PTE
9894 							}
9895 							pvh_unlock(pai);
9896 						}
9897 
9898 						if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
9899 							write_pte_fast(cpte, (spte | ARM_PTE_NG));
9900 						}
9901 
9902 						if (managed) {
9903 							pvh_assert_locked(pai);
9904 							pvh_unlock(pai);
9905 						}
9906 					}
9907 				}
9908 			}
9909 
9910 			FLUSH_PTE_STRONG();
9911 		}
9912 
9913 		flush_mmu_tlb_region_asid_async(vaddr, (unsigned)size, grand->nested_pmap, true);
9914 		sync_tlb_flush();
9915 
9916 		pmap_unlock(grand->nested_pmap, PMAP_LOCK_EXCLUSIVE);
9917 	}
9918 
9919 	pmap_lock(grand, PMAP_LOCK_EXCLUSIVE);
9920 
9921 	/*
9922 	 * invalidate all pdes for segment at vaddr in pmap grand
9923 	 */
9924 	start = vaddr;
9925 	addr = vaddr;
9926 
9927 	num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9928 
9929 	for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
9930 		if (addr < grand->nested_pmap->nested_region_true_start) {
9931 			/* We haven't reached the interesting range. */
9932 			continue;
9933 		}
9934 
9935 		if (addr >= grand->nested_pmap->nested_region_true_end) {
9936 			/* We're done with the interesting range. */
9937 			break;
9938 		}
9939 
9940 		tte_p = pmap_tte(grand, addr);
9941 		*tte_p = ARM_TTE_TYPE_FAULT;
9942 	}
9943 
9944 	tte_p = pmap_tte(grand, start);
9945 	FLUSH_PTE_STRONG();
9946 	PMAP_UPDATE_TLBS(grand, start, vend, false, false);
9947 
9948 	pmap_unlock(grand, PMAP_LOCK_EXCLUSIVE);
9949 
9950 	return KERN_SUCCESS;
9951 }
9952 
9953 kern_return_t
9954 pmap_unnest_options(
9955 	pmap_t grand,
9956 	addr64_t vaddr,
9957 	uint64_t size,
9958 	unsigned int option)
9959 {
9960 	kern_return_t kr = KERN_FAILURE;
9961 
9962 	PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9963 	    VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9964 
9965 #if XNU_MONITOR
9966 	kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
9967 #else
9968 	kr = pmap_unnest_options_internal(grand, vaddr, size, option);
9969 #endif
9970 
9971 	PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
9972 
9973 	return kr;
9974 }
9975 
9976 boolean_t
9977 pmap_adjust_unnest_parameters(
9978 	__unused pmap_t p,
9979 	__unused vm_map_offset_t *s,
9980 	__unused vm_map_offset_t *e)
9981 {
9982 	return TRUE; /* to get to log_unnest_badness()... */
9983 }
9984 
9985 /*
9986  * disable no-execute capability on
9987  * the specified pmap
9988  */
9989 #if DEVELOPMENT || DEBUG
9990 void
9991 pmap_disable_NX(
9992 	pmap_t pmap)
9993 {
9994 	pmap->nx_enabled = FALSE;
9995 }
9996 #else
9997 void
9998 pmap_disable_NX(
9999 	__unused pmap_t pmap)
10000 {
10001 }
10002 #endif
10003 
10004 /*
10005  * flush a range of hardware TLB entries.
10006  * NOTE: assumes the smallest TLB entry in use will be for
10007  * an ARM small page (4K).
10008  */
10009 
10010 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
10011 
10012 #if __ARM_RANGE_TLBI__
10013 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
10014 #define ARM64_FULL_TLB_FLUSH_THRESHOLD  ARM64_TLB_RANGE_PAGES
10015 #else
10016 #define ARM64_FULL_TLB_FLUSH_THRESHOLD  256
10017 #endif // __ARM_RANGE_TLBI__
10018 
10019 static void
10020 flush_mmu_tlb_region_asid_async(
10021 	vm_offset_t va,
10022 	size_t length,
10023 	pmap_t pmap,
10024 	bool last_level_only __unused)
10025 {
10026 #if     (__ARM_VMSA__ == 7)
10027 	vm_offset_t     end = va + length;
10028 	uint32_t        asid;
10029 
10030 	asid = pmap->hw_asid;
10031 
10032 	if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
10033 		boolean_t       flush_all = FALSE;
10034 
10035 		if ((asid == 0) || (pmap->type == PMAP_TYPE_NESTED)) {
10036 			flush_all = TRUE;
10037 		}
10038 		if (flush_all) {
10039 			flush_mmu_tlb_async();
10040 		} else {
10041 			flush_mmu_tlb_asid_async(asid);
10042 		}
10043 
10044 		return;
10045 	}
10046 	if (pmap->type == PMAP_TYPE_NESTED) {
10047 #if     !__ARM_MP_EXT__
10048 		flush_mmu_tlb();
10049 #else
10050 		va = arm_trunc_page(va);
10051 		while (va < end) {
10052 			flush_mmu_tlb_mva_entries_async(va);
10053 			va += ARM_SMALL_PAGE_SIZE;
10054 		}
10055 #endif
10056 		return;
10057 	}
10058 	va = arm_trunc_page(va) | (asid & 0xff);
10059 	flush_mmu_tlb_entries_async(va, end);
10060 
10061 #else
10062 	unsigned long pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
10063 	const uint64_t pmap_page_size = 1ULL << pmap_page_shift;
10064 	ppnum_t npages = (ppnum_t)(length >> pmap_page_shift);
10065 	uint32_t    asid;
10066 
10067 	asid = pmap->hw_asid;
10068 
10069 	if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
10070 		boolean_t       flush_all = FALSE;
10071 
10072 		if ((asid == 0) || (pmap->type == PMAP_TYPE_NESTED)) {
10073 			flush_all = TRUE;
10074 		}
10075 		if (flush_all) {
10076 			flush_mmu_tlb_async();
10077 		} else {
10078 			flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
10079 		}
10080 		return;
10081 	}
10082 #if __ARM_RANGE_TLBI__
10083 	if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
10084 		va = generate_rtlbi_param(npages, asid, va, pmap_page_shift);
10085 		if (pmap->type == PMAP_TYPE_NESTED) {
10086 			flush_mmu_tlb_allrange_async(va, last_level_only);
10087 		} else {
10088 			flush_mmu_tlb_range_async(va, last_level_only);
10089 		}
10090 		return;
10091 	}
10092 #endif
10093 	vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
10094 	va = tlbi_asid(asid) | tlbi_addr(va);
10095 
10096 	if (pmap->type == PMAP_TYPE_NESTED) {
10097 		flush_mmu_tlb_allentries_async(va, end, pmap_page_size, last_level_only);
10098 	} else {
10099 		flush_mmu_tlb_entries_async(va, end, pmap_page_size, last_level_only);
10100 	}
10101 
10102 #endif
10103 }
10104 
10105 MARK_AS_PMAP_TEXT static void
10106 flush_mmu_tlb_full_asid_async(pmap_t pmap)
10107 {
10108 #if (__ARM_VMSA__ == 7)
10109 	flush_mmu_tlb_asid_async(pmap->hw_asid);
10110 #else /* (__ARM_VMSA__ == 7) */
10111 	flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
10112 #endif /* (__ARM_VMSA__ == 7) */
10113 }
10114 
10115 void
10116 flush_mmu_tlb_region(
10117 	vm_offset_t va,
10118 	unsigned length)
10119 {
10120 	flush_mmu_tlb_region_asid_async(va, length, kernel_pmap, true);
10121 	sync_tlb_flush();
10122 }
10123 
10124 unsigned int
10125 pmap_cache_attributes(
10126 	ppnum_t pn)
10127 {
10128 	pmap_paddr_t    paddr;
10129 	unsigned int    pai;
10130 	unsigned int    result;
10131 	pp_attr_t       pp_attr_current;
10132 
10133 	paddr = ptoa(pn);
10134 
10135 	assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
10136 
10137 	if (!pa_valid(paddr)) {
10138 		pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
10139 		return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
10140 	}
10141 
10142 	result = VM_WIMG_DEFAULT;
10143 
10144 	pai = pa_index(paddr);
10145 
10146 	pp_attr_current = pp_attr_table[pai];
10147 	if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10148 		result = pp_attr_current & PP_ATTR_WIMG_MASK;
10149 	}
10150 	return result;
10151 }
10152 
10153 MARK_AS_PMAP_TEXT static void
10154 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
10155 {
10156 	if ((wimg_bits_prev != wimg_bits_new)
10157 	    && ((wimg_bits_prev == VM_WIMG_COPYBACK)
10158 	    || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
10159 	    && (wimg_bits_new != VM_WIMG_COPYBACK))
10160 	    || ((wimg_bits_prev == VM_WIMG_WTHRU)
10161 	    && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
10162 		pmap_sync_page_attributes_phys(pn);
10163 	}
10164 
10165 	if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
10166 		pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
10167 	}
10168 }
10169 
10170 MARK_AS_PMAP_TEXT __unused void
10171 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
10172 {
10173 	pmap_paddr_t paddr = ptoa(pn);
10174 	const unsigned int pai = pa_index(paddr);
10175 
10176 	if (__improbable(!pa_valid(paddr))) {
10177 		panic("%s called on non-managed page 0x%08x", __func__, pn);
10178 	}
10179 
10180 	pvh_lock(pai);
10181 
10182 #if XNU_MONITOR
10183 	if (__improbable(ppattr_pa_test_monitor(paddr))) {
10184 		panic("%s invoked on PPL page 0x%08x", __func__, pn);
10185 	}
10186 #endif
10187 
10188 	pmap_update_cache_attributes_locked(pn, new_cacheattr);
10189 
10190 	pvh_unlock(pai);
10191 
10192 	pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
10193 }
10194 
10195 void *
10196 pmap_map_compressor_page(ppnum_t pn)
10197 {
10198 #if __ARM_PTE_PHYSMAP__
10199 	unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
10200 	if (cacheattr != VM_WIMG_DEFAULT) {
10201 #if XNU_MONITOR
10202 		pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
10203 #else
10204 		pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
10205 #endif
10206 	}
10207 #endif
10208 	return (void*)phystokv(ptoa(pn));
10209 }
10210 
10211 void
10212 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
10213 {
10214 #if __ARM_PTE_PHYSMAP__
10215 	unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
10216 	if (cacheattr != VM_WIMG_DEFAULT) {
10217 #if XNU_MONITOR
10218 		pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
10219 #else
10220 		pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
10221 #endif
10222 	}
10223 #endif
10224 }
10225 
10226 MARK_AS_PMAP_TEXT boolean_t
10227 pmap_batch_set_cache_attributes_internal(
10228 	ppnum_t pn,
10229 	unsigned int cacheattr,
10230 	unsigned int page_cnt,
10231 	unsigned int page_index,
10232 	boolean_t doit,
10233 	unsigned int *res)
10234 {
10235 	pmap_paddr_t    paddr;
10236 	unsigned int    pai;
10237 	pp_attr_t       pp_attr_current;
10238 	pp_attr_t       pp_attr_template;
10239 	unsigned int    wimg_bits_prev, wimg_bits_new;
10240 
10241 	if (cacheattr & VM_WIMG_USE_DEFAULT) {
10242 		cacheattr = VM_WIMG_DEFAULT;
10243 	}
10244 
10245 	if ((doit == FALSE) && (*res == 0)) {
10246 		pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
10247 		*res = page_cnt;
10248 		pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
10249 		if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
10250 			return FALSE;
10251 		}
10252 	}
10253 
10254 	paddr = ptoa(pn);
10255 
10256 	if (!pa_valid(paddr)) {
10257 		panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
10258 	}
10259 
10260 	pai = pa_index(paddr);
10261 
10262 	if (doit) {
10263 		pvh_lock(pai);
10264 #if XNU_MONITOR
10265 		if (ppattr_pa_test_monitor(paddr)) {
10266 			panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
10267 		}
10268 #endif
10269 	}
10270 
10271 	do {
10272 		pp_attr_current = pp_attr_table[pai];
10273 		wimg_bits_prev = VM_WIMG_DEFAULT;
10274 		if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10275 			wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
10276 		}
10277 
10278 		pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
10279 
10280 		if (!doit) {
10281 			break;
10282 		}
10283 
10284 		/* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
10285 		 * to avoid losing simultaneous updates to other bits like refmod. */
10286 	} while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
10287 
10288 	wimg_bits_new = VM_WIMG_DEFAULT;
10289 	if (pp_attr_template & PP_ATTR_WIMG_MASK) {
10290 		wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
10291 	}
10292 
10293 	if (doit) {
10294 		if (wimg_bits_new != wimg_bits_prev) {
10295 			pmap_update_cache_attributes_locked(pn, cacheattr);
10296 		}
10297 		pvh_unlock(pai);
10298 		if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
10299 			pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
10300 		}
10301 	} else {
10302 		if (wimg_bits_new == VM_WIMG_COPYBACK) {
10303 			return FALSE;
10304 		}
10305 		if (wimg_bits_prev == wimg_bits_new) {
10306 			pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
10307 			*res = *res - 1;
10308 			pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
10309 			if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
10310 				return FALSE;
10311 			}
10312 		}
10313 		return TRUE;
10314 	}
10315 
10316 	if (page_cnt == (page_index + 1)) {
10317 		wimg_bits_prev = VM_WIMG_COPYBACK;
10318 		if (((wimg_bits_prev != wimg_bits_new))
10319 		    && ((wimg_bits_prev == VM_WIMG_COPYBACK)
10320 		    || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
10321 		    && (wimg_bits_new != VM_WIMG_COPYBACK))
10322 		    || ((wimg_bits_prev == VM_WIMG_WTHRU)
10323 		    && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
10324 			platform_cache_flush_wimg(wimg_bits_new);
10325 		}
10326 	}
10327 
10328 	return TRUE;
10329 }
10330 
10331 boolean_t
10332 pmap_batch_set_cache_attributes(
10333 	ppnum_t pn,
10334 	unsigned int cacheattr,
10335 	unsigned int page_cnt,
10336 	unsigned int page_index,
10337 	boolean_t doit,
10338 	unsigned int *res)
10339 {
10340 #if XNU_MONITOR
10341 	return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
10342 #else
10343 	return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
10344 #endif
10345 }
10346 
10347 MARK_AS_PMAP_TEXT static void
10348 pmap_set_cache_attributes_priv(
10349 	ppnum_t pn,
10350 	unsigned int cacheattr,
10351 	boolean_t external __unused)
10352 {
10353 	pmap_paddr_t    paddr;
10354 	unsigned int    pai;
10355 	pp_attr_t       pp_attr_current;
10356 	pp_attr_t       pp_attr_template;
10357 	unsigned int    wimg_bits_prev, wimg_bits_new;
10358 
10359 	paddr = ptoa(pn);
10360 
10361 	if (!pa_valid(paddr)) {
10362 		return;                         /* Not a managed page. */
10363 	}
10364 
10365 	if (cacheattr & VM_WIMG_USE_DEFAULT) {
10366 		cacheattr = VM_WIMG_DEFAULT;
10367 	}
10368 
10369 	pai = pa_index(paddr);
10370 
10371 	pvh_lock(pai);
10372 
10373 #if XNU_MONITOR
10374 	if (external && ppattr_pa_test_monitor(paddr)) {
10375 		panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
10376 	} else if (!external && !ppattr_pa_test_monitor(paddr)) {
10377 		panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
10378 	}
10379 #endif
10380 
10381 	do {
10382 		pp_attr_current = pp_attr_table[pai];
10383 		wimg_bits_prev = VM_WIMG_DEFAULT;
10384 		if (pp_attr_current & PP_ATTR_WIMG_MASK) {
10385 			wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
10386 		}
10387 
10388 		pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
10389 
10390 		/* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
10391 		 * to avoid losing simultaneous updates to other bits like refmod. */
10392 	} while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
10393 
10394 	wimg_bits_new = VM_WIMG_DEFAULT;
10395 	if (pp_attr_template & PP_ATTR_WIMG_MASK) {
10396 		wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
10397 	}
10398 
10399 	if (wimg_bits_new != wimg_bits_prev) {
10400 		pmap_update_cache_attributes_locked(pn, cacheattr);
10401 	}
10402 
10403 	pvh_unlock(pai);
10404 
10405 	pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
10406 }
10407 
10408 MARK_AS_PMAP_TEXT void
10409 pmap_set_cache_attributes_internal(
10410 	ppnum_t pn,
10411 	unsigned int cacheattr)
10412 {
10413 	pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
10414 }
10415 
10416 void
10417 pmap_set_cache_attributes(
10418 	ppnum_t pn,
10419 	unsigned int cacheattr)
10420 {
10421 #if XNU_MONITOR
10422 	pmap_set_cache_attributes_ppl(pn, cacheattr);
10423 #else
10424 	pmap_set_cache_attributes_internal(pn, cacheattr);
10425 #endif
10426 }
10427 
10428 MARK_AS_PMAP_TEXT void
10429 pmap_update_cache_attributes_locked(
10430 	ppnum_t ppnum,
10431 	unsigned attributes)
10432 {
10433 	pmap_paddr_t    phys = ptoa(ppnum);
10434 	pv_entry_t      *pve_p;
10435 	pt_entry_t      *pte_p;
10436 	pv_entry_t      **pv_h;
10437 	pt_entry_t      tmplate;
10438 	unsigned int    pai;
10439 	boolean_t       tlb_flush_needed = FALSE;
10440 
10441 	PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
10442 
10443 	if (pmap_panic_dev_wimg_on_managed) {
10444 		switch (attributes & VM_WIMG_MASK) {
10445 		case VM_WIMG_IO:                        // nGnRnE
10446 		case VM_WIMG_POSTED:                    // nGnRE
10447 		/* supported on DRAM, but slow, so we disallow */
10448 
10449 		case VM_WIMG_POSTED_REORDERED:          // nGRE
10450 		case VM_WIMG_POSTED_COMBINED_REORDERED: // GRE
10451 			/* unsupported on DRAM */
10452 
10453 			panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
10454 			    __FUNCTION__, attributes & VM_WIMG_MASK, ppnum);
10455 			break;
10456 
10457 		default:
10458 			/* not device type memory, all good */
10459 
10460 			break;
10461 		}
10462 	}
10463 
10464 #if __ARM_PTE_PHYSMAP__
10465 	vm_offset_t kva = phystokv(phys);
10466 	pte_p = pmap_pte(kernel_pmap, kva);
10467 
10468 	tmplate = *pte_p;
10469 	tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
10470 #if XNU_MONITOR
10471 	tmplate |= (wimg_to_pte(attributes, phys) & ~ARM_PTE_XPRR_MASK);
10472 #else
10473 	tmplate |= wimg_to_pte(attributes, phys);
10474 #endif
10475 #if (__ARM_VMSA__ > 7)
10476 	if (tmplate & ARM_PTE_HINT_MASK) {
10477 		panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
10478 		    __FUNCTION__, pte_p, (void *)kva, tmplate);
10479 	}
10480 #endif
10481 	write_pte_strong(pte_p, tmplate);
10482 	flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap, true);
10483 	tlb_flush_needed = TRUE;
10484 #endif
10485 
10486 	pai = pa_index(phys);
10487 
10488 	pv_h = pai_to_pvh(pai);
10489 
10490 	pte_p = PT_ENTRY_NULL;
10491 	pve_p = PV_ENTRY_NULL;
10492 	if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10493 		pte_p = pvh_ptep(pv_h);
10494 	} else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10495 		pve_p = pvh_pve_list(pv_h);
10496 		pte_p = PT_ENTRY_NULL;
10497 	}
10498 
10499 	int pve_ptep_idx = 0;
10500 	while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10501 		vm_map_address_t va;
10502 		pmap_t          pmap;
10503 
10504 		if (pve_p != PV_ENTRY_NULL) {
10505 			pte_p = pve_get_ptep(pve_p, pve_ptep_idx);
10506 			if (pte_p == PT_ENTRY_NULL) {
10507 				goto cache_skip_pve;
10508 			}
10509 		}
10510 
10511 #ifdef PVH_FLAG_IOMMU
10512 		if (pvh_ptep_is_iommu(pte_p)) {
10513 			goto cache_skip_pve;
10514 		}
10515 #endif
10516 		pmap = ptep_get_pmap(pte_p);
10517 		va = ptep_get_va(pte_p);
10518 
10519 		tmplate = *pte_p;
10520 		tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
10521 		tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes, phys);
10522 
10523 		write_pte_strong(pte_p, tmplate);
10524 		pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap, true);
10525 		tlb_flush_needed = TRUE;
10526 
10527 cache_skip_pve:
10528 		pte_p = PT_ENTRY_NULL;
10529 		if ((pve_p != PV_ENTRY_NULL) && (++pve_ptep_idx == PTE_PER_PVE)) {
10530 			pve_ptep_idx = 0;
10531 			pve_p = pve_next(pve_p);
10532 		}
10533 	}
10534 	if (tlb_flush_needed) {
10535 		pmap_sync_tlb((attributes & VM_WIMG_MASK) == VM_WIMG_RT);
10536 	}
10537 
10538 	PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
10539 }
10540 
10541 #if (__ARM_VMSA__ == 7)
10542 void
10543 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
10544     vm_map_address_t *user_commpage_addr)
10545 {
10546 	pmap_paddr_t    pa;
10547 	kern_return_t   kr;
10548 
10549 	assert(kernel_data_addr != NULL);
10550 	assert(kernel_text_addr != NULL);
10551 	assert(user_commpage_addr != NULL);
10552 
10553 	(void) pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
10554 
10555 	kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10556 	assert(kr == KERN_SUCCESS);
10557 
10558 	*kernel_data_addr = phystokv(pa);
10559 	// We don't have PFZ for 32 bit arm, always NULL
10560 	*kernel_text_addr = 0;
10561 	*user_commpage_addr = 0;
10562 }
10563 
10564 #else /* __ARM_VMSA__ == 7 */
10565 
10566 /**
10567  * Mark a pmap as being dedicated to use for a commpage mapping.
10568  * The pmap itself will never be activated on a CPU; its mappings will
10569  * only be embedded in userspace pmaps at a fixed virtual address.
10570  *
10571  * @param pmap the pmap to mark as belonging to a commpage.
10572  */
10573 static void
10574 pmap_set_commpage(pmap_t pmap)
10575 {
10576 #if XNU_MONITOR
10577 	assert(!pmap_ppl_locked_down);
10578 #endif
10579 	assert(pmap->type == PMAP_TYPE_USER);
10580 	pmap->type = PMAP_TYPE_COMMPAGE;
10581 	/*
10582 	 * Free the pmap's ASID.  This pmap should not ever be directly
10583 	 * activated in a CPU's TTBR.  Freeing the ASID will not only reduce
10584 	 * ASID space contention but will also cause pmap_switch() to panic
10585 	 * if an attacker tries to activate this pmap.  Disable preemption to
10586 	 * accommodate the *_nopreempt spinlock in free_asid().
10587 	 */
10588 	mp_disable_preemption();
10589 	pmap_get_pt_ops(pmap)->free_id(pmap);
10590 	mp_enable_preemption();
10591 }
10592 
10593 static void
10594 pmap_update_tt3e(
10595 	pmap_t pmap,
10596 	vm_address_t address,
10597 	tt_entry_t template)
10598 {
10599 	tt_entry_t *ptep, pte;
10600 
10601 	ptep = pmap_tt3e(pmap, address);
10602 	if (ptep == NULL) {
10603 		panic("%s: no ptep?", __FUNCTION__);
10604 	}
10605 
10606 	pte = *ptep;
10607 	pte = tte_to_pa(pte) | template;
10608 	write_pte_strong(ptep, pte);
10609 }
10610 
10611 /* Note absence of non-global bit */
10612 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10613 	        | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10614 	        | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
10615 	        | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10616 
10617 /* Note absence of non-global bit and no-execute bit.  */
10618 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10619 	        | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10620 	        | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
10621 	        | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10622 
10623 void
10624 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
10625     vm_map_address_t *user_text_addr)
10626 {
10627 	kern_return_t kr;
10628 	pmap_paddr_t data_pa = 0; // data address
10629 	pmap_paddr_t text_pa = 0; // text address
10630 
10631 	*kernel_data_addr = 0;
10632 	*kernel_text_addr = 0;
10633 	*user_text_addr = 0;
10634 
10635 #if XNU_MONITOR
10636 	data_pa = pmap_alloc_page_for_kern(0);
10637 	assert(data_pa);
10638 	memset((char *) phystokv(data_pa), 0, PAGE_SIZE);
10639 #if CONFIG_ARM_PFZ
10640 	text_pa = pmap_alloc_page_for_kern(0);
10641 	assert(text_pa);
10642 	memset((char *) phystokv(text_pa), 0, PAGE_SIZE);
10643 #endif
10644 
10645 #else /* XNU_MONITOR */
10646 	(void) pmap_pages_alloc_zeroed(&data_pa, PAGE_SIZE, 0);
10647 #if CONFIG_ARM_PFZ
10648 	(void) pmap_pages_alloc_zeroed(&text_pa, PAGE_SIZE, 0);
10649 #endif
10650 
10651 #endif /* XNU_MONITOR */
10652 
10653 	/*
10654 	 * In order to avoid burning extra pages on mapping the shared page, we
10655 	 * create a dedicated pmap for the shared page.  We forcibly nest the
10656 	 * translation tables from this pmap into other pmaps.  The level we
10657 	 * will nest at depends on the MMU configuration (page size, TTBR range,
10658 	 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
10659 	 *
10660 	 * Note that this is NOT "the nested pmap" (which is used to nest the
10661 	 * shared cache).
10662 	 *
10663 	 * Note that we update parameters of the entry for our unique needs (NG
10664 	 * entry, etc.).
10665 	 */
10666 	sharedpage_pmap_default = pmap_create_options(NULL, 0x0, 0);
10667 	assert(sharedpage_pmap_default != NULL);
10668 	pmap_set_commpage(sharedpage_pmap_default);
10669 
10670 	/* The user 64-bit mapping... */
10671 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10672 	assert(kr == KERN_SUCCESS);
10673 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10674 #if CONFIG_ARM_PFZ
10675 	/* User mapping of comm page text section for 64 bit mapping only
10676 	 *
10677 	 * We don't insert it into the 32 bit mapping because we don't want 32 bit
10678 	 * user processes to get this page mapped in, they should never call into
10679 	 * this page.
10680 	 *
10681 	 * The data comm page is in a pre-reserved L3 VA range and the text commpage
10682 	 * is slid in the same L3 as the data commpage.  It is either outside the
10683 	 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
10684 	 * it is reserved and unavailable to mach VM for future mappings.
10685 	 */
10686 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(sharedpage_pmap_default);
10687 	int num_ptes = pt_attr_leaf_size(pt_attr) >> PTE_SHIFT;
10688 
10689 	vm_map_address_t commpage_text_va = 0;
10690 
10691 	do {
10692 		int text_leaf_index = random() % num_ptes;
10693 
10694 		// Generate a VA for the commpage text with the same root and twig index as data
10695 		// comm page, but with new leaf index we've just generated.
10696 		commpage_text_va = (_COMM_PAGE64_BASE_ADDRESS & ~pt_attr_leaf_index_mask(pt_attr));
10697 		commpage_text_va |= (text_leaf_index << pt_attr_leaf_shift(pt_attr));
10698 	} while (commpage_text_va == _COMM_PAGE64_BASE_ADDRESS); // Try again if we collide (should be unlikely)
10699 
10700 	// Assert that this is empty
10701 	__assert_only pt_entry_t *ptep = pmap_pte(sharedpage_pmap_default, commpage_text_va);
10702 	assert(ptep != PT_ENTRY_NULL);
10703 	assert(*ptep == ARM_TTE_EMPTY);
10704 
10705 	// At this point, we've found the address we want to insert our comm page at
10706 	kr = pmap_enter_addr(sharedpage_pmap_default, commpage_text_va, text_pa, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10707 	assert(kr == KERN_SUCCESS);
10708 	// Mark it as global page R/X so that it doesn't get thrown out on tlb flush
10709 	pmap_update_tt3e(sharedpage_pmap_default, commpage_text_va, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE);
10710 
10711 	*user_text_addr = commpage_text_va;
10712 #endif
10713 
10714 	/* ...and the user 32-bit mapping. */
10715 	kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10716 	assert(kr == KERN_SUCCESS);
10717 	pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10718 
10719 #if __ARM_MIXED_PAGE_SIZE__
10720 	/**
10721 	 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
10722 	 * new set of page tables that point to the exact same 16K shared page as
10723 	 * before. Only the first 4K of the 16K shared page is mapped since that's
10724 	 * the only part that contains relevant data.
10725 	 */
10726 	sharedpage_pmap_4k = pmap_create_options(NULL, 0x0, PMAP_CREATE_FORCE_4K_PAGES);
10727 	assert(sharedpage_pmap_4k != NULL);
10728 	pmap_set_commpage(sharedpage_pmap_4k);
10729 
10730 	/* The user 64-bit mapping... */
10731 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10732 	assert(kr == KERN_SUCCESS);
10733 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10734 
10735 	/* ...and the user 32-bit mapping. */
10736 	kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10737 	assert(kr == KERN_SUCCESS);
10738 	pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10739 
10740 #endif
10741 
10742 	/* For manipulation in kernel, go straight to physical page */
10743 	*kernel_data_addr = phystokv(data_pa);
10744 	*kernel_text_addr = (text_pa) ? phystokv(text_pa) : 0;
10745 }
10746 
10747 
10748 /*
10749  * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
10750  * with user controlled TTEs for regions that aren't explicitly reserved by the
10751  * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
10752  */
10753 #if (ARM_PGSHIFT == 14)
10754 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
10755 #elif (ARM_PGSHIFT == 12)
10756 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
10757 #else
10758 #error Nested shared page mapping is unsupported on this config
10759 #endif
10760 
10761 MARK_AS_PMAP_TEXT kern_return_t
10762 pmap_insert_sharedpage_internal(
10763 	pmap_t pmap)
10764 {
10765 	kern_return_t kr = KERN_SUCCESS;
10766 	vm_offset_t sharedpage_vaddr;
10767 	pt_entry_t *ttep, *src_ttep;
10768 	int options = 0;
10769 	pmap_t sharedpage_pmap = sharedpage_pmap_default;
10770 
10771 	/* Validate the pmap input before accessing its data. */
10772 	validate_pmap_mutable(pmap);
10773 
10774 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10775 	const unsigned int sharedpage_level = pt_attr_commpage_level(pt_attr);
10776 
10777 #if __ARM_MIXED_PAGE_SIZE__
10778 #if !__ARM_16K_PG__
10779 	/* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
10780 	#error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
10781 #endif /* !__ARM_16K_PG__ */
10782 
10783 	/* Choose the correct shared page pmap to use. */
10784 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
10785 	if (pmap_page_size == 16384) {
10786 		sharedpage_pmap = sharedpage_pmap_default;
10787 	} else if (pmap_page_size == 4096) {
10788 		sharedpage_pmap = sharedpage_pmap_4k;
10789 	} else {
10790 		panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
10791 	}
10792 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10793 
10794 #if XNU_MONITOR
10795 	options |= PMAP_OPTIONS_NOWAIT;
10796 #endif /* XNU_MONITOR */
10797 
10798 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10799 #error We assume a single page.
10800 #endif
10801 
10802 	if (pmap_is_64bit(pmap)) {
10803 		sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
10804 	} else {
10805 		sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
10806 	}
10807 
10808 
10809 	pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
10810 
10811 	/*
10812 	 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
10813 	 * two (2MB) depending on the address space layout. For 16KB pages, each level
10814 	 * one entry is 64GB, so we must go to the second level entry (32MB) in order
10815 	 * to "nest".
10816 	 *
10817 	 * Note: This is not "nesting" in the shared cache sense. This definition of
10818 	 * nesting just means inserting pointers to pre-allocated tables inside of
10819 	 * the passed in pmap to allow us to share page tables (which map the shared
10820 	 * page) for every task. This saves at least one page of memory per process
10821 	 * compared to creating new page tables in every process for mapping the
10822 	 * shared page.
10823 	 */
10824 
10825 	/**
10826 	 * Allocate the twig page tables if needed, and slam a pointer to the shared
10827 	 * page's tables into place.
10828 	 */
10829 	while ((ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr)) == TT_ENTRY_NULL) {
10830 		pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
10831 
10832 		kr = pmap_expand(pmap, sharedpage_vaddr, options, sharedpage_level);
10833 
10834 		if (kr != KERN_SUCCESS) {
10835 #if XNU_MONITOR
10836 			if (kr == KERN_RESOURCE_SHORTAGE) {
10837 				return kr;
10838 			} else
10839 #endif
10840 			{
10841 				panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
10842 			}
10843 		}
10844 
10845 		pmap_lock(pmap, PMAP_LOCK_EXCLUSIVE);
10846 	}
10847 
10848 	if (*ttep != ARM_PTE_EMPTY) {
10849 		panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
10850 	}
10851 
10852 	src_ttep = pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr);
10853 
10854 	*ttep = *src_ttep;
10855 	FLUSH_PTE_STRONG();
10856 
10857 	pmap_unlock(pmap, PMAP_LOCK_EXCLUSIVE);
10858 
10859 	return kr;
10860 }
10861 
10862 static void
10863 pmap_unmap_sharedpage(
10864 	pmap_t pmap)
10865 {
10866 	pt_entry_t *ttep;
10867 	vm_offset_t sharedpage_vaddr;
10868 	pmap_t sharedpage_pmap = sharedpage_pmap_default;
10869 
10870 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10871 	const unsigned int sharedpage_level = pt_attr_commpage_level(pt_attr);
10872 
10873 #if __ARM_MIXED_PAGE_SIZE__
10874 #if !__ARM_16K_PG__
10875 	/* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
10876 	#error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
10877 #endif /* !__ARM_16K_PG__ */
10878 
10879 	/* Choose the correct shared page pmap to use. */
10880 	const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
10881 	if (pmap_page_size == 16384) {
10882 		sharedpage_pmap = sharedpage_pmap_default;
10883 	} else if (pmap_page_size == 4096) {
10884 		sharedpage_pmap = sharedpage_pmap_4k;
10885 	} else {
10886 		panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
10887 	}
10888 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10889 
10890 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10891 #error We assume a single page.
10892 #endif
10893 
10894 	if (pmap_is_64bit(pmap)) {
10895 		sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
10896 	} else {
10897 		sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
10898 	}
10899 
10900 
10901 	ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr);
10902 
10903 	if (ttep == NULL) {
10904 		return;
10905 	}
10906 
10907 	/* It had better be mapped to the shared page. */
10908 	if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr)) {
10909 		panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
10910 	}
10911 
10912 	*ttep = ARM_TTE_EMPTY;
10913 	FLUSH_PTE_STRONG();
10914 
10915 	flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, pmap, false);
10916 	sync_tlb_flush();
10917 }
10918 
10919 void
10920 pmap_insert_sharedpage(
10921 	pmap_t pmap)
10922 {
10923 #if XNU_MONITOR
10924 	kern_return_t kr = KERN_FAILURE;
10925 
10926 	while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
10927 		pmap_alloc_page_for_ppl(0);
10928 	}
10929 
10930 	pmap_ledger_check_balance(pmap);
10931 
10932 	if (kr != KERN_SUCCESS) {
10933 		panic("%s: failed to insert the shared page, kr=%d, "
10934 		    "pmap=%p",
10935 		    __FUNCTION__, kr,
10936 		    pmap);
10937 	}
10938 #else
10939 	pmap_insert_sharedpage_internal(pmap);
10940 #endif
10941 }
10942 
10943 static boolean_t
10944 pmap_is_64bit(
10945 	pmap_t pmap)
10946 {
10947 	return pmap->is_64bit;
10948 }
10949 
10950 bool
10951 pmap_is_exotic(
10952 	pmap_t pmap __unused)
10953 {
10954 	return false;
10955 }
10956 
10957 #endif
10958 
10959 /* ARMTODO -- an implementation that accounts for
10960  * holes in the physical map, if any.
10961  */
10962 boolean_t
10963 pmap_valid_page(
10964 	ppnum_t pn)
10965 {
10966 	return pa_valid(ptoa(pn));
10967 }
10968 
10969 boolean_t
10970 pmap_bootloader_page(
10971 	ppnum_t pn)
10972 {
10973 	pmap_paddr_t paddr = ptoa(pn);
10974 
10975 	if (pa_valid(paddr)) {
10976 		return FALSE;
10977 	}
10978 	pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
10979 	return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
10980 }
10981 
10982 MARK_AS_PMAP_TEXT boolean_t
10983 pmap_is_empty_internal(
10984 	pmap_t pmap,
10985 	vm_map_offset_t va_start,
10986 	vm_map_offset_t va_end)
10987 {
10988 	vm_map_offset_t block_start, block_end;
10989 	tt_entry_t *tte_p;
10990 
10991 	if (pmap == NULL) {
10992 		return TRUE;
10993 	}
10994 
10995 	validate_pmap(pmap);
10996 
10997 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10998 	unsigned int initial_not_in_kdp = not_in_kdp;
10999 
11000 	if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11001 		pmap_lock(pmap, PMAP_LOCK_SHARED);
11002 	}
11003 
11004 #if     (__ARM_VMSA__ == 7)
11005 	if (tte_index(pt_attr, va_end) >= pmap->tte_index_max) {
11006 		if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11007 			pmap_unlock(pmap, PMAP_LOCK_SHARED);
11008 		}
11009 		return TRUE;
11010 	}
11011 #endif
11012 
11013 	/* TODO: This will be faster if we increment ttep at each level. */
11014 	block_start = va_start;
11015 
11016 	while (block_start < va_end) {
11017 		pt_entry_t     *bpte_p, *epte_p;
11018 		pt_entry_t     *pte_p;
11019 
11020 		block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
11021 		if (block_end > va_end) {
11022 			block_end = va_end;
11023 		}
11024 
11025 		tte_p = pmap_tte(pmap, block_start);
11026 		if ((tte_p != PT_ENTRY_NULL)
11027 		    && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
11028 			pte_p = (pt_entry_t *) ttetokv(*tte_p);
11029 			bpte_p = &pte_p[pte_index(pt_attr, block_start)];
11030 			epte_p = &pte_p[pte_index(pt_attr, block_end)];
11031 
11032 			for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
11033 				if (*pte_p != ARM_PTE_EMPTY) {
11034 					if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11035 						pmap_unlock(pmap, PMAP_LOCK_SHARED);
11036 					}
11037 					return FALSE;
11038 				}
11039 			}
11040 		}
11041 		block_start = block_end;
11042 	}
11043 
11044 	if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
11045 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
11046 	}
11047 
11048 	return TRUE;
11049 }
11050 
11051 boolean_t
11052 pmap_is_empty(
11053 	pmap_t pmap,
11054 	vm_map_offset_t va_start,
11055 	vm_map_offset_t va_end)
11056 {
11057 #if XNU_MONITOR
11058 	return pmap_is_empty_ppl(pmap, va_start, va_end);
11059 #else
11060 	return pmap_is_empty_internal(pmap, va_start, va_end);
11061 #endif
11062 }
11063 
11064 vm_map_offset_t
11065 pmap_max_offset(
11066 	boolean_t               is64,
11067 	unsigned int    option)
11068 {
11069 	return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
11070 }
11071 
11072 vm_map_offset_t
11073 pmap_max_64bit_offset(
11074 	__unused unsigned int option)
11075 {
11076 	vm_map_offset_t max_offset_ret = 0;
11077 
11078 #if defined(__arm64__)
11079 	#define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
11080 	_Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
11081 	    "Minimum address space size outside allowable range");
11082 	const vm_map_offset_t min_max_offset = ARM64_MIN_MAX_ADDRESS; // end of shared region + 512MB for various purposes
11083 	if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
11084 		max_offset_ret = arm64_pmap_max_offset_default;
11085 	} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
11086 		max_offset_ret = min_max_offset;
11087 	} else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
11088 		max_offset_ret = MACH_VM_MAX_ADDRESS;
11089 	} else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
11090 		if (arm64_pmap_max_offset_default) {
11091 			max_offset_ret = arm64_pmap_max_offset_default;
11092 		} else if (max_mem > 0xC0000000) {
11093 			max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
11094 		} else if (max_mem > 0x40000000) {
11095 			max_offset_ret = min_max_offset + 0x38000000;  // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
11096 		} else {
11097 			max_offset_ret = min_max_offset;
11098 		}
11099 	} else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
11100 		if (arm64_pmap_max_offset_default) {
11101 			// Allow the boot-arg to override jumbo size
11102 			max_offset_ret = arm64_pmap_max_offset_default;
11103 		} else {
11104 			max_offset_ret = MACH_VM_MAX_ADDRESS;     // Max offset is 64GB for pmaps with special "jumbo" blessing
11105 		}
11106 	} else {
11107 		panic("pmap_max_64bit_offset illegal option 0x%x", option);
11108 	}
11109 
11110 	assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
11111 	assert(max_offset_ret >= min_max_offset);
11112 #else
11113 	panic("Can't run pmap_max_64bit_offset on non-64bit architectures");
11114 #endif
11115 
11116 	return max_offset_ret;
11117 }
11118 
11119 vm_map_offset_t
11120 pmap_max_32bit_offset(
11121 	unsigned int option)
11122 {
11123 	vm_map_offset_t max_offset_ret = 0;
11124 
11125 	if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
11126 		max_offset_ret = arm_pmap_max_offset_default;
11127 	} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
11128 		max_offset_ret = 0x80000000;
11129 	} else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
11130 		max_offset_ret = VM_MAX_ADDRESS;
11131 	} else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
11132 		if (arm_pmap_max_offset_default) {
11133 			max_offset_ret = arm_pmap_max_offset_default;
11134 		} else if (max_mem > 0x20000000) {
11135 			max_offset_ret = 0x80000000;
11136 		} else {
11137 			max_offset_ret = 0x80000000;
11138 		}
11139 	} else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
11140 		max_offset_ret = 0x80000000;
11141 	} else {
11142 		panic("pmap_max_32bit_offset illegal option 0x%x", option);
11143 	}
11144 
11145 	assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
11146 	return max_offset_ret;
11147 }
11148 
11149 #if CONFIG_DTRACE
11150 /*
11151  * Constrain DTrace copyin/copyout actions
11152  */
11153 extern kern_return_t dtrace_copyio_preflight(addr64_t);
11154 extern kern_return_t dtrace_copyio_postflight(addr64_t);
11155 
11156 kern_return_t
11157 dtrace_copyio_preflight(
11158 	__unused addr64_t va)
11159 {
11160 	if (current_map() == kernel_map) {
11161 		return KERN_FAILURE;
11162 	} else {
11163 		return KERN_SUCCESS;
11164 	}
11165 }
11166 
11167 kern_return_t
11168 dtrace_copyio_postflight(
11169 	__unused addr64_t va)
11170 {
11171 	return KERN_SUCCESS;
11172 }
11173 #endif /* CONFIG_DTRACE */
11174 
11175 
11176 void
11177 pmap_flush_context_init(__unused pmap_flush_context *pfc)
11178 {
11179 }
11180 
11181 
11182 void
11183 pmap_flush(
11184 	__unused pmap_flush_context *cpus_to_flush)
11185 {
11186 	/* not implemented yet */
11187 	return;
11188 }
11189 
11190 #if XNU_MONITOR
11191 
11192 /*
11193  * Enforce that the address range described by kva and nbytes is not currently
11194  * PPL-owned, and won't become PPL-owned while pinned.  This is to prevent
11195  * unintentionally writing to PPL-owned memory.
11196  */
11197 static void
11198 pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
11199 {
11200 	vm_offset_t end;
11201 	if (os_add_overflow(kva, nbytes, &end)) {
11202 		panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
11203 	}
11204 	for (vm_offset_t ckva = trunc_page(kva); ckva < end; ckva = round_page(ckva + 1)) {
11205 		pmap_paddr_t pa = kvtophys_nofail(ckva);
11206 		pp_attr_t attr;
11207 		unsigned int pai = pa_index(pa);
11208 		if (ckva == phystokv(pa)) {
11209 			panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
11210 		}
11211 		do {
11212 			attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
11213 			if (attr & PP_ATTR_MONITOR) {
11214 				panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
11215 			}
11216 		} while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
11217 	}
11218 }
11219 
11220 static void
11221 pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
11222 {
11223 	vm_offset_t end;
11224 	if (os_add_overflow(kva, nbytes, &end)) {
11225 		panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
11226 	}
11227 	for (vm_offset_t ckva = trunc_page(kva); ckva < end; ckva = round_page(ckva + 1)) {
11228 		pmap_paddr_t pa = kvtophys_nofail(ckva);
11229 
11230 		if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
11231 			panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
11232 		}
11233 		assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
11234 		ppattr_pa_clear_no_monitor(pa);
11235 	}
11236 }
11237 
11238 /**
11239  * Lock down a page, making all mappings read-only, and preventing further
11240  * mappings or removal of this particular kva's mapping. Effectively, it makes
11241  * the physical page at kva immutable (see the ppl_writable parameter for an
11242  * exception to this).
11243  *
11244  * @param kva Valid address to any mapping of the physical page to lockdown.
11245  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11246  * @param ppl_writable True if the PPL should still be able to write to the page
11247  *                     using the physical aperture mapping. False will make the
11248  *                     page read-only for both the kernel and PPL in the
11249  *                     physical aperture.
11250  */
11251 MARK_AS_PMAP_TEXT static void
11252 pmap_ppl_lockdown_page(vm_address_t kva, uint64_t lockdown_flag, bool ppl_writable)
11253 {
11254 	const pmap_paddr_t pa = kvtophys_nofail(kva);
11255 	const unsigned int pai = pa_index(pa);
11256 
11257 	assert(lockdown_flag & PVH_FLAG_LOCKDOWN_MASK);
11258 	pvh_lock(pai);
11259 	pv_entry_t **pvh = pai_to_pvh(pai);
11260 	const vm_offset_t pvh_flags = pvh_get_flags(pvh);
11261 
11262 	if (__improbable(ppattr_pa_test_monitor(pa))) {
11263 		panic("%s: %#lx (page %llx) belongs to PPL", __func__, kva, pa);
11264 	}
11265 
11266 	if (__improbable(pvh_flags & (PVH_FLAG_LOCKDOWN_MASK | PVH_FLAG_EXEC))) {
11267 		panic("%s: %#lx already locked down/executable (%#llx)",
11268 		    __func__, kva, (uint64_t)pvh_flags);
11269 	}
11270 
11271 	pvh_set_flags(pvh, pvh_flags | lockdown_flag);
11272 
11273 	/* Update the physical aperture mapping to prevent kernel write access. */
11274 	const unsigned int new_xprr_perm =
11275 	    (ppl_writable) ? XPRR_PPL_RW_PERM : XPRR_KERN_RO_PERM;
11276 	pmap_set_xprr_perm(pai, XPRR_KERN_RW_PERM, new_xprr_perm);
11277 
11278 	pvh_unlock(pai);
11279 
11280 	pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0, NULL);
11281 
11282 	/**
11283 	 * Double-check that the mapping didn't change physical addresses before the
11284 	 * LOCKDOWN flag was set (there is a brief window between the above
11285 	 * kvtophys() and pvh_lock() calls where the mapping could have changed).
11286 	 *
11287 	 * This doesn't solve the ABA problem, but this doesn't have to since once
11288 	 * the pvh_lock() is grabbed no new mappings can be created on this physical
11289 	 * page without the LOCKDOWN flag already set (so any future mappings can
11290 	 * only be RO, and no existing mappings can be removed).
11291 	 */
11292 	if (kvtophys_nofail(kva) != pa) {
11293 		panic("%s: Physical address of mapping changed while setting LOCKDOWN "
11294 		    "flag %#lx %#llx", __func__, kva, (uint64_t)pa);
11295 	}
11296 }
11297 
11298 /**
11299  * Helper for releasing a page from being locked down to the PPL, making it writable to the
11300  * kernel once again.
11301  *
11302  * @note This must be paired with a pmap_ppl_lockdown_page() call. Any attempts
11303  *       to unlockdown a page that was never locked down, will panic.
11304  *
11305  * @param pai physical page index to release from lockdown.  PVH lock for this page must be held.
11306  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11307  * @param ppl_writable This must match whatever `ppl_writable` parameter was
11308  *                     passed to the paired pmap_ppl_lockdown_page() call. Any
11309  *                     deviation will result in a panic.
11310  */
11311 MARK_AS_PMAP_TEXT static void
11312 pmap_ppl_unlockdown_page_locked(unsigned int pai, uint64_t lockdown_flag, bool ppl_writable)
11313 {
11314 	pvh_assert_locked(pai);
11315 	pv_entry_t **pvh = pai_to_pvh(pai);
11316 	const vm_offset_t pvh_flags = pvh_get_flags(pvh);
11317 
11318 	if (__improbable(!(pvh_flags & lockdown_flag))) {
11319 		panic("%s: unlockdown attempt on not locked down pai %d, type=0x%llx, PVH flags=0x%llx",
11320 		    __func__, pai, (unsigned long long)lockdown_flag, (unsigned long long)pvh_flags);
11321 	}
11322 
11323 	pvh_set_flags(pvh, pvh_flags & ~lockdown_flag);
11324 
11325 	/* Restore the pre-lockdown physical aperture mapping permissions. */
11326 	const unsigned int old_xprr_perm =
11327 	    (ppl_writable) ? XPRR_PPL_RW_PERM : XPRR_KERN_RO_PERM;
11328 	pmap_set_xprr_perm(pai, old_xprr_perm, XPRR_KERN_RW_PERM);
11329 }
11330 
11331 /**
11332  * Release a page from being locked down to the PPL, making it writable to the
11333  * kernel once again.
11334  *
11335  * @note This must be paired with a pmap_ppl_lockdown_page() call. Any attempts
11336  *       to unlockdown a page that was never locked down, will panic.
11337  *
11338  * @param kva Valid address to any mapping of the physical page to unlockdown.
11339  * @param lockdown_flag Bit within PVH_FLAG_LOCKDOWN_MASK specifying the lockdown reason
11340  * @param ppl_writable This must match whatever `ppl_writable` parameter was
11341  *                     passed to the paired pmap_ppl_lockdown_page() call. Any
11342  *                     deviation will result in a panic.
11343  */
11344 MARK_AS_PMAP_TEXT static void
11345 pmap_ppl_unlockdown_page(vm_address_t kva, uint64_t lockdown_flag, bool ppl_writable)
11346 {
11347 	const pmap_paddr_t pa = kvtophys_nofail(kva);
11348 	const unsigned int pai = pa_index(pa);
11349 
11350 	assert(lockdown_flag & PVH_FLAG_LOCKDOWN_MASK);
11351 	pvh_lock(pai);
11352 	pmap_ppl_unlockdown_page_locked(pai, lockdown_flag, ppl_writable);
11353 	pvh_unlock(pai);
11354 }
11355 
11356 #else /* XNU_MONITOR */
11357 
11358 static void __unused
11359 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
11360 {
11361 }
11362 
11363 static void __unused
11364 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
11365 {
11366 }
11367 
11368 #endif /* !XNU_MONITOR */
11369 
11370 
11371 MARK_AS_PMAP_TEXT static inline void
11372 pmap_cs_lockdown_pages(vm_address_t kva, vm_size_t size, bool ppl_writable)
11373 {
11374 #if XNU_MONITOR
11375 	pmap_ppl_lockdown_pages(kva, size, PVH_FLAG_LOCKDOWN_CS, ppl_writable);
11376 #else
11377 	pmap_ppl_lockdown_pages(kva, size, 0, ppl_writable);
11378 #endif
11379 }
11380 
11381 MARK_AS_PMAP_TEXT static inline void
11382 pmap_cs_unlockdown_pages(vm_address_t kva, vm_size_t size, bool ppl_writable)
11383 {
11384 #if XNU_MONITOR
11385 	pmap_ppl_unlockdown_pages(kva, size, PVH_FLAG_LOCKDOWN_CS, ppl_writable);
11386 #else
11387 	pmap_ppl_unlockdown_pages(kva, size, 0, ppl_writable);
11388 #endif
11389 }
11390 
11391 /**
11392  * Perform basic validation checks on the source, destination and
11393  * corresponding offset/sizes prior to writing to a read only allocation.
11394  *
11395  * @note Should be called before writing to an allocation from the read
11396  * only allocator.
11397  *
11398  * @param zid The ID of the zone the allocation belongs to.
11399  * @param va VA of element being modified (destination).
11400  * @param offset Offset being written to, in the element.
11401  * @param new_data Pointer to new data (source).
11402  * @param new_data_size Size of modification.
11403  *
11404  */
11405 
11406 MARK_AS_PMAP_TEXT static void
11407 pmap_ro_zone_validate_element(
11408 	zone_id_t           zid,
11409 	vm_offset_t         va,
11410 	vm_offset_t         offset,
11411 	const vm_offset_t   new_data,
11412 	vm_size_t           new_data_size)
11413 {
11414 	vm_size_t elem_size = zone_elem_size_ro(zid);
11415 	vm_offset_t sum = 0, page = trunc_page(va);
11416 	if (__improbable(new_data_size > (elem_size - offset))) {
11417 		panic("%s: New data size %lu too large for elem size %lu at addr %p",
11418 		    __func__, (uintptr_t)new_data_size, (uintptr_t)elem_size, (void*)va);
11419 	}
11420 	if (__improbable(offset >= elem_size)) {
11421 		panic("%s: Offset %lu too large for elem size %lu at addr %p",
11422 		    __func__, (uintptr_t)offset, (uintptr_t)elem_size, (void*)va);
11423 	}
11424 	if (__improbable(os_add3_overflow(va, offset, new_data_size, &sum))) {
11425 		panic("%s: Integer addition overflow %p + %lu + %lu = %lu",
11426 		    __func__, (void*)va, (uintptr_t)offset, (uintptr_t) new_data_size,
11427 		    (uintptr_t)sum);
11428 	}
11429 	if (__improbable(os_add_overflow(new_data, new_data_size, &sum))) {
11430 		panic("%s: Integer addition overflow %p + %lu = %lu",
11431 		    __func__, (void*)new_data, (uintptr_t)new_data_size, (uintptr_t)sum);
11432 	}
11433 	if (__improbable((va - page) % elem_size)) {
11434 		panic("%s: Start of element %p is not aligned to element size %lu",
11435 		    __func__, (void *)va, (uintptr_t)elem_size);
11436 	}
11437 
11438 	/* Check element is from correct zone */
11439 	zone_require_ro(zid, elem_size, (void*)va);
11440 }
11441 
11442 /**
11443  * Ensure that physical page is locked down and pinned, before writing to it.
11444  *
11445  * @note Should be called before writing to an allocation from the read
11446  * only allocator. This function pairs with pmap_ro_zone_unlock_phy_page,
11447  * ensure that it is called after the modification.
11448  *
11449  *
11450  * @param pa Physical address of the element being modified.
11451  * @param va Virtual address of element being modified.
11452  * @param size Size of the modification.
11453  *
11454  */
11455 
11456 MARK_AS_PMAP_TEXT static void
11457 pmap_ro_zone_lock_phy_page(
11458 	const pmap_paddr_t  pa,
11459 	vm_offset_t         va,
11460 	vm_size_t           size)
11461 {
11462 	const unsigned int pai = pa_index(pa);
11463 	pvh_lock(pai);
11464 
11465 	/* Ensure that the physical page is locked down */
11466 #if XNU_MONITOR
11467 	pv_entry_t **pvh = pai_to_pvh(pai);
11468 	if (!(pvh_get_flags(pvh) & PVH_FLAG_LOCKDOWN_RO)) {
11469 		panic("%s: Physical page not locked down %llx", __func__, pa);
11470 	}
11471 #endif /* XNU_MONITOR */
11472 
11473 	/* Ensure page can't become PPL-owned memory before the memcpy occurs */
11474 	pmap_pin_kernel_pages(va, size);
11475 }
11476 
11477 /**
11478  * Unlock and unpin physical page after writing to it.
11479  *
11480  * @note Should be called after writing to an allocation from the read
11481  * only allocator. This function pairs with pmap_ro_zone_lock_phy_page,
11482  * ensure that it has been called prior to the modification.
11483  *
11484  * @param pa Physical address of the element that was modified.
11485  * @param va Virtual address of element that was modified.
11486  * @param size Size of the modification.
11487  *
11488  */
11489 
11490 MARK_AS_PMAP_TEXT static void
11491 pmap_ro_zone_unlock_phy_page(
11492 	const pmap_paddr_t  pa,
11493 	vm_offset_t         va,
11494 	vm_size_t           size)
11495 {
11496 	const unsigned int pai = pa_index(pa);
11497 	pmap_unpin_kernel_pages(va, size);
11498 	pvh_unlock(pai);
11499 }
11500 
11501 /**
11502  * Function to copy kauth_cred from new_data to kv.
11503  * Function defined in "kern_prot.c"
11504  *
11505  * @note Will be removed upon completion of
11506  * <rdar://problem/72635194> Compiler PAC support for memcpy.
11507  *
11508  * @param kv Address to copy new data to.
11509  * @param new_data Pointer to new data.
11510  *
11511  */
11512 
11513 extern void
11514 kauth_cred_copy(const uintptr_t kv, const uintptr_t new_data);
11515 
11516 /**
11517  * Zalloc-specific memcpy that writes through the physical aperture
11518  * and ensures the element being modified is from a read-only zone.
11519  *
11520  * @note Designed to work only with the zone allocator's read-only submap.
11521  *
11522  * @param zid The ID of the zone to allocate from.
11523  * @param va VA of element to be modified.
11524  * @param offset Offset from element.
11525  * @param new_data Pointer to new data.
11526  * @param new_data_size	Size of modification.
11527  *
11528  */
11529 
11530 void
11531 pmap_ro_zone_memcpy(
11532 	zone_id_t           zid,
11533 	vm_offset_t         va,
11534 	vm_offset_t         offset,
11535 	const vm_offset_t   new_data,
11536 	vm_size_t           new_data_size)
11537 {
11538 #if XNU_MONITOR
11539 	pmap_ro_zone_memcpy_ppl(zid, va, offset, new_data, new_data_size);
11540 #else /* XNU_MONITOR */
11541 	pmap_ro_zone_memcpy_internal(zid, va, offset, new_data, new_data_size);
11542 #endif /* XNU_MONITOR */
11543 }
11544 
11545 MARK_AS_PMAP_TEXT void
11546 pmap_ro_zone_memcpy_internal(
11547 	zone_id_t             zid,
11548 	vm_offset_t           va,
11549 	vm_offset_t           offset,
11550 	const vm_offset_t     new_data,
11551 	vm_size_t             new_data_size)
11552 {
11553 	const pmap_paddr_t pa = kvtophys_nofail(va + offset);
11554 
11555 	if (!new_data || new_data_size == 0) {
11556 		return;
11557 	}
11558 
11559 	pmap_ro_zone_validate_element(zid, va, offset, new_data, new_data_size);
11560 	pmap_ro_zone_lock_phy_page(pa, va, new_data_size);
11561 	memcpy((void*)phystokv(pa), (void*)new_data, new_data_size);
11562 	pmap_ro_zone_unlock_phy_page(pa, va, new_data_size);
11563 }
11564 
11565 /**
11566  * bzero for allocations from read only zones, that writes through the
11567  * physical aperture.
11568  *
11569  * @note This is called by the zfree path of all allocations from read
11570  * only zones.
11571  *
11572  * @param zid The ID of the zone the allocation belongs to.
11573  * @param va VA of element to be zeroed.
11574  * @param offset Offset in the element.
11575  * @param size	Size of allocation.
11576  *
11577  */
11578 
11579 void
11580 pmap_ro_zone_bzero(
11581 	zone_id_t       zid,
11582 	vm_offset_t     va,
11583 	vm_offset_t     offset,
11584 	vm_size_t       size)
11585 {
11586 #if XNU_MONITOR
11587 	pmap_ro_zone_bzero_ppl(zid, va, offset, size);
11588 #else /* XNU_MONITOR */
11589 	pmap_ro_zone_bzero_internal(zid, va, offset, size);
11590 #endif /* XNU_MONITOR */
11591 }
11592 
11593 MARK_AS_PMAP_TEXT void
11594 pmap_ro_zone_bzero_internal(
11595 	zone_id_t       zid,
11596 	vm_offset_t     va,
11597 	vm_offset_t     offset,
11598 	vm_size_t       size)
11599 {
11600 	const pmap_paddr_t pa = kvtophys_nofail(va + offset);
11601 	pmap_ro_zone_validate_element(zid, va, offset, 0, size);
11602 	pmap_ro_zone_lock_phy_page(pa, va, size);
11603 	bzero((void*)phystokv(pa), size);
11604 	pmap_ro_zone_unlock_phy_page(pa, va, size);
11605 }
11606 
11607 /**
11608  * Removes write access from the Physical Aperture.
11609  *
11610  * @note For non-PPL devices, it simply makes all virtual mappings RO.
11611  * @note Designed to work only with the zone allocator's read-only submap.
11612  *
11613  * @param va VA of the page to restore write access to.
11614  *
11615  */
11616 MARK_AS_PMAP_TEXT static void
11617 pmap_phys_write_disable(vm_address_t va)
11618 {
11619 #if XNU_MONITOR
11620 	pmap_ppl_lockdown_page(va, PVH_FLAG_LOCKDOWN_RO, true);
11621 #else /* XNU_MONITOR */
11622 	pmap_page_protect(atop_kernel(kvtophys(va)), VM_PROT_READ);
11623 #endif /* XNU_MONITOR */
11624 }
11625 
11626 #define PMAP_RESIDENT_INVALID   ((mach_vm_size_t)-1)
11627 
11628 MARK_AS_PMAP_TEXT mach_vm_size_t
11629 pmap_query_resident_internal(
11630 	pmap_t                  pmap,
11631 	vm_map_address_t        start,
11632 	vm_map_address_t        end,
11633 	mach_vm_size_t          *compressed_bytes_p)
11634 {
11635 	mach_vm_size_t  resident_bytes = 0;
11636 	mach_vm_size_t  compressed_bytes = 0;
11637 
11638 	pt_entry_t     *bpte, *epte;
11639 	pt_entry_t     *pte_p;
11640 	tt_entry_t     *tte_p;
11641 
11642 	if (pmap == NULL) {
11643 		return PMAP_RESIDENT_INVALID;
11644 	}
11645 
11646 	validate_pmap(pmap);
11647 
11648 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11649 
11650 	/* Ensure that this request is valid, and addresses exactly one TTE. */
11651 	if (__improbable((start % pt_attr_page_size(pt_attr)) ||
11652 	    (end % pt_attr_page_size(pt_attr)))) {
11653 		panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__, (void*)start, (void*)end, pt_attr_page_size(pt_attr));
11654 	}
11655 
11656 	if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
11657 		panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
11658 	}
11659 
11660 	pmap_lock(pmap, PMAP_LOCK_SHARED);
11661 	tte_p = pmap_tte(pmap, start);
11662 	if (tte_p == (tt_entry_t *) NULL) {
11663 		pmap_unlock(pmap, PMAP_LOCK_SHARED);
11664 		return PMAP_RESIDENT_INVALID;
11665 	}
11666 	if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
11667 		pte_p = (pt_entry_t *) ttetokv(*tte_p);
11668 		bpte = &pte_p[pte_index(pt_attr, start)];
11669 		epte = &pte_p[pte_index(pt_attr, end)];
11670 
11671 		for (; bpte < epte; bpte++) {
11672 			if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
11673 				compressed_bytes += pt_attr_page_size(pt_attr);
11674 			} else if (pa_valid(pte_to_pa(*bpte))) {
11675 				resident_bytes += pt_attr_page_size(pt_attr);
11676 			}
11677 		}
11678 	}
11679 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
11680 
11681 	if (compressed_bytes_p) {
11682 		pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
11683 		*compressed_bytes_p += compressed_bytes;
11684 		pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
11685 	}
11686 
11687 	return resident_bytes;
11688 }
11689 
11690 mach_vm_size_t
11691 pmap_query_resident(
11692 	pmap_t                  pmap,
11693 	vm_map_address_t        start,
11694 	vm_map_address_t        end,
11695 	mach_vm_size_t          *compressed_bytes_p)
11696 {
11697 	mach_vm_size_t          total_resident_bytes;
11698 	mach_vm_size_t          compressed_bytes;
11699 	vm_map_address_t        va;
11700 
11701 
11702 	if (pmap == PMAP_NULL) {
11703 		if (compressed_bytes_p) {
11704 			*compressed_bytes_p = 0;
11705 		}
11706 		return 0;
11707 	}
11708 
11709 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11710 
11711 	total_resident_bytes = 0;
11712 	compressed_bytes = 0;
11713 
11714 	PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
11715 	    VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
11716 	    VM_KERNEL_ADDRHIDE(end));
11717 
11718 	va = start;
11719 	while (va < end) {
11720 		vm_map_address_t l;
11721 		mach_vm_size_t resident_bytes;
11722 
11723 		l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
11724 
11725 		if (l > end) {
11726 			l = end;
11727 		}
11728 #if XNU_MONITOR
11729 		resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
11730 #else
11731 		resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
11732 #endif
11733 		if (resident_bytes == PMAP_RESIDENT_INVALID) {
11734 			break;
11735 		}
11736 
11737 		total_resident_bytes += resident_bytes;
11738 
11739 		va = l;
11740 	}
11741 
11742 	if (compressed_bytes_p) {
11743 		*compressed_bytes_p = compressed_bytes;
11744 	}
11745 
11746 	PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
11747 	    total_resident_bytes);
11748 
11749 	return total_resident_bytes;
11750 }
11751 
11752 #if MACH_ASSERT
11753 static void
11754 pmap_check_ledgers(
11755 	pmap_t pmap)
11756 {
11757 	int     pid;
11758 	char    *procname;
11759 
11760 	if (pmap->pmap_pid == 0) {
11761 		/*
11762 		 * This pmap was not or is no longer fully associated
11763 		 * with a task (e.g. the old pmap after a fork()/exec() or
11764 		 * spawn()).  Its "ledger" still points at a task that is
11765 		 * now using a different (and active) address space, so
11766 		 * we can't check that all the pmap ledgers are balanced here.
11767 		 *
11768 		 * If the "pid" is set, that means that we went through
11769 		 * pmap_set_process() in task_terminate_internal(), so
11770 		 * this task's ledger should not have been re-used and
11771 		 * all the pmap ledgers should be back to 0.
11772 		 */
11773 		return;
11774 	}
11775 
11776 	pid = pmap->pmap_pid;
11777 	procname = pmap->pmap_procname;
11778 
11779 	vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
11780 }
11781 #endif /* MACH_ASSERT */
11782 
11783 void
11784 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
11785 {
11786 }
11787 
11788 /**
11789  * The minimum shared region nesting size is used by the VM to determine when to
11790  * break up large mappings to nested regions. The smallest size that these
11791  * mappings can be broken into is determined by what page table level those
11792  * regions are being nested in at and the size of the page tables.
11793  *
11794  * For instance, if a nested region is nesting at L2 for a process utilizing
11795  * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
11796  * block entry).
11797  *
11798  * @param pmap The target pmap to determine the block size based on whether it's
11799  *             using 16KB or 4KB page tables.
11800  */
11801 uint64_t
11802 pmap_shared_region_size_min(__unused pmap_t pmap)
11803 {
11804 #if (__ARM_VMSA__ > 7)
11805 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11806 
11807 	/**
11808 	 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
11809 	 * 4KB pages). This means that a target pmap will contain L2 entries that
11810 	 * point to shared L3 page tables in the shared region pmap.
11811 	 */
11812 	return pt_attr_twig_size(pt_attr);
11813 
11814 #else
11815 	return ARM_NESTING_SIZE_MIN;
11816 #endif
11817 }
11818 
11819 boolean_t
11820 pmap_enforces_execute_only(
11821 #if (__ARM_VMSA__ == 7)
11822 	__unused
11823 #endif
11824 	pmap_t pmap)
11825 {
11826 #if (__ARM_VMSA__ > 7)
11827 	return pmap != kernel_pmap;
11828 #else
11829 	return FALSE;
11830 #endif
11831 }
11832 
11833 MARK_AS_PMAP_TEXT void
11834 pmap_set_vm_map_cs_enforced_internal(
11835 	pmap_t pmap,
11836 	bool new_value)
11837 {
11838 	validate_pmap_mutable(pmap);
11839 	pmap->pmap_vm_map_cs_enforced = new_value;
11840 }
11841 
11842 void
11843 pmap_set_vm_map_cs_enforced(
11844 	pmap_t pmap,
11845 	bool new_value)
11846 {
11847 #if XNU_MONITOR
11848 	pmap_set_vm_map_cs_enforced_ppl(pmap, new_value);
11849 #else
11850 	pmap_set_vm_map_cs_enforced_internal(pmap, new_value);
11851 #endif
11852 }
11853 
11854 extern int cs_process_enforcement_enable;
11855 bool
11856 pmap_get_vm_map_cs_enforced(
11857 	pmap_t pmap)
11858 {
11859 	if (cs_process_enforcement_enable) {
11860 		return true;
11861 	}
11862 	return pmap->pmap_vm_map_cs_enforced;
11863 }
11864 
11865 MARK_AS_PMAP_TEXT void
11866 pmap_set_jit_entitled_internal(
11867 	__unused pmap_t pmap)
11868 {
11869 	return;
11870 }
11871 
11872 void
11873 pmap_set_jit_entitled(
11874 	pmap_t pmap)
11875 {
11876 #if XNU_MONITOR
11877 	pmap_set_jit_entitled_ppl(pmap);
11878 #else
11879 	pmap_set_jit_entitled_internal(pmap);
11880 #endif
11881 }
11882 
11883 bool
11884 pmap_get_jit_entitled(
11885 	__unused pmap_t pmap)
11886 {
11887 	return false;
11888 }
11889 
11890 MARK_AS_PMAP_TEXT kern_return_t
11891 pmap_query_page_info_internal(
11892 	pmap_t          pmap,
11893 	vm_map_offset_t va,
11894 	int             *disp_p)
11895 {
11896 	pmap_paddr_t    pa;
11897 	int             disp;
11898 	unsigned int    pai;
11899 	pt_entry_t      *pte;
11900 	pv_entry_t      **pv_h, *pve_p;
11901 
11902 	if (pmap == PMAP_NULL || pmap == kernel_pmap) {
11903 		pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11904 		*disp_p = 0;
11905 		pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11906 		return KERN_INVALID_ARGUMENT;
11907 	}
11908 
11909 	disp = 0;
11910 
11911 	validate_pmap(pmap);
11912 	pmap_lock(pmap, PMAP_LOCK_SHARED);
11913 
11914 	pte = pmap_pte(pmap, va);
11915 	if (pte == PT_ENTRY_NULL) {
11916 		goto done;
11917 	}
11918 
11919 	pa = pte_to_pa(*((volatile pt_entry_t*)pte));
11920 	if (pa == 0) {
11921 		if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
11922 			disp |= PMAP_QUERY_PAGE_COMPRESSED;
11923 			if (*pte & ARM_PTE_COMPRESSED_ALT) {
11924 				disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
11925 			}
11926 		}
11927 	} else {
11928 		disp |= PMAP_QUERY_PAGE_PRESENT;
11929 		pai = pa_index(pa);
11930 		if (!pa_valid(pa)) {
11931 			goto done;
11932 		}
11933 		pvh_lock(pai);
11934 		pv_h = pai_to_pvh(pai);
11935 		pve_p = PV_ENTRY_NULL;
11936 		int pve_ptep_idx = 0;
11937 		if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11938 			pve_p = pvh_pve_list(pv_h);
11939 			while (pve_p != PV_ENTRY_NULL &&
11940 			    (pve_ptep_idx = pve_find_ptep_index(pve_p, pte)) == -1) {
11941 				pve_p = pve_next(pve_p);
11942 			}
11943 		}
11944 
11945 		if (ppattr_pve_is_altacct(pai, pve_p, pve_ptep_idx)) {
11946 			disp |= PMAP_QUERY_PAGE_ALTACCT;
11947 		} else if (ppattr_test_reusable(pai)) {
11948 			disp |= PMAP_QUERY_PAGE_REUSABLE;
11949 		} else if (ppattr_test_internal(pai)) {
11950 			disp |= PMAP_QUERY_PAGE_INTERNAL;
11951 		}
11952 		pvh_unlock(pai);
11953 	}
11954 
11955 done:
11956 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
11957 	pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11958 	*disp_p = disp;
11959 	pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11960 	return KERN_SUCCESS;
11961 }
11962 
11963 kern_return_t
11964 pmap_query_page_info(
11965 	pmap_t          pmap,
11966 	vm_map_offset_t va,
11967 	int             *disp_p)
11968 {
11969 #if XNU_MONITOR
11970 	return pmap_query_page_info_ppl(pmap, va, disp_p);
11971 #else
11972 	return pmap_query_page_info_internal(pmap, va, disp_p);
11973 #endif
11974 }
11975 
11976 
11977 
11978 static vm_map_size_t
11979 pmap_user_va_size(pmap_t pmap __unused)
11980 {
11981 #if (__ARM_VMSA__ == 7)
11982 	return VM_MAX_ADDRESS;
11983 #else
11984 #if __ARM_MIXED_PAGE_SIZE__
11985 	uint64_t tcr_value = pmap_get_pt_attr(pmap)->pta_tcr_value;
11986 	return 1ULL << (64 - ((tcr_value >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK));
11987 #else
11988 	return 1ULL << (64 - T0SZ_BOOT);
11989 #endif
11990 #endif /* __ARM_VMSA > 7 */
11991 }
11992 
11993 
11994 
11995 kern_return_t
11996 pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused *trust_cache,
11997     const vm_size_t __unused trust_cache_len)
11998 {
11999 	// Unsupported
12000 	return KERN_NOT_SUPPORTED;
12001 }
12002 
12003 pmap_tc_ret_t
12004 pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused *trust_cache,
12005     const vm_size_t __unused trust_cache_len,
12006     uint8_t const * __unused img4_manifest,
12007     const vm_size_t __unused img4_manifest_buffer_len,
12008     const vm_size_t __unused img4_manifest_actual_len,
12009     bool __unused dry_run)
12010 {
12011 	// Unsupported
12012 	return PMAP_TC_UNKNOWN_FORMAT;
12013 }
12014 
12015 bool
12016 pmap_in_ppl(void)
12017 {
12018 	// Unsupported
12019 	return false;
12020 }
12021 
12022 bool
12023 pmap_has_ppl(void)
12024 {
12025 	// Unsupported
12026 	return false;
12027 }
12028 
12029 void
12030 pmap_lockdown_image4_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
12031 {
12032 	// Unsupported
12033 }
12034 
12035 void
12036 pmap_lockdown_image4_late_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
12037 {
12038 	// Unsupported
12039 }
12040 
12041 void *
12042 pmap_claim_reserved_ppl_page(void)
12043 {
12044 	// Unsupported
12045 	return NULL;
12046 }
12047 
12048 void
12049 pmap_free_reserved_ppl_page(void __unused *kva)
12050 {
12051 	// Unsupported
12052 }
12053 
12054 
12055 MARK_AS_PMAP_TEXT bool
12056 pmap_is_trust_cache_loaded_internal(const uuid_t uuid)
12057 {
12058 	bool found = false;
12059 
12060 	pmap_simple_lock(&pmap_loaded_trust_caches_lock);
12061 
12062 	for (struct pmap_image4_trust_cache const *c = pmap_image4_trust_caches; c != NULL; c = c->next) {
12063 		if (bcmp(uuid, c->module->uuid, sizeof(uuid_t)) == 0) {
12064 			found = true;
12065 			goto done;
12066 		}
12067 	}
12068 
12069 #ifdef PLATFORM_BridgeOS
12070 	for (struct pmap_legacy_trust_cache const *c = pmap_legacy_trust_caches; c != NULL; c = c->next) {
12071 		if (bcmp(uuid, c->uuid, sizeof(uuid_t)) == 0) {
12072 			found = true;
12073 			goto done;
12074 		}
12075 	}
12076 #endif
12077 
12078 done:
12079 	pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
12080 	return found;
12081 }
12082 
12083 bool
12084 pmap_is_trust_cache_loaded(const uuid_t uuid)
12085 {
12086 #if XNU_MONITOR
12087 	return pmap_is_trust_cache_loaded_ppl(uuid);
12088 #else
12089 	return pmap_is_trust_cache_loaded_internal(uuid);
12090 #endif
12091 }
12092 
12093 MARK_AS_PMAP_TEXT bool
12094 pmap_lookup_in_loaded_trust_caches_internal(const uint8_t cdhash[CS_CDHASH_LEN])
12095 {
12096 	struct pmap_image4_trust_cache const *cache = NULL;
12097 #ifdef PLATFORM_BridgeOS
12098 	struct pmap_legacy_trust_cache const *legacy = NULL;
12099 #endif
12100 
12101 	pmap_simple_lock(&pmap_loaded_trust_caches_lock);
12102 
12103 	for (cache = pmap_image4_trust_caches; cache != NULL; cache = cache->next) {
12104 		uint8_t hash_type = 0, flags = 0;
12105 
12106 		if (lookup_in_trust_cache_module(cache->module, cdhash, &hash_type, &flags)) {
12107 			goto done;
12108 		}
12109 	}
12110 
12111 #ifdef PLATFORM_BridgeOS
12112 	for (legacy = pmap_legacy_trust_caches; legacy != NULL; legacy = legacy->next) {
12113 		for (uint32_t i = 0; i < legacy->num_hashes; i++) {
12114 			if (bcmp(legacy->hashes[i], cdhash, CS_CDHASH_LEN) == 0) {
12115 				goto done;
12116 			}
12117 		}
12118 	}
12119 #endif
12120 
12121 done:
12122 	pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
12123 
12124 	if (cache != NULL) {
12125 		return true;
12126 #ifdef PLATFORM_BridgeOS
12127 	} else if (legacy != NULL) {
12128 		return true;
12129 #endif
12130 	}
12131 
12132 	return false;
12133 }
12134 
12135 bool
12136 pmap_lookup_in_loaded_trust_caches(const uint8_t cdhash[CS_CDHASH_LEN])
12137 {
12138 #if XNU_MONITOR
12139 	return pmap_lookup_in_loaded_trust_caches_ppl(cdhash);
12140 #else
12141 	return pmap_lookup_in_loaded_trust_caches_internal(cdhash);
12142 #endif
12143 }
12144 
12145 MARK_AS_PMAP_TEXT uint32_t
12146 pmap_lookup_in_static_trust_cache_internal(const uint8_t cdhash[CS_CDHASH_LEN])
12147 {
12148 	// Awkward indirection, because the PPL macros currently force their functions to be static.
12149 	return lookup_in_static_trust_cache(cdhash);
12150 }
12151 
12152 uint32_t
12153 pmap_lookup_in_static_trust_cache(const uint8_t cdhash[CS_CDHASH_LEN])
12154 {
12155 #if XNU_MONITOR
12156 	return pmap_lookup_in_static_trust_cache_ppl(cdhash);
12157 #else
12158 	return pmap_lookup_in_static_trust_cache_internal(cdhash);
12159 #endif
12160 }
12161 
12162 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_compilation_service_cdhash_lock, 0);
12163 MARK_AS_PMAP_DATA uint8_t pmap_compilation_service_cdhash[CS_CDHASH_LEN] = { 0 };
12164 
12165 MARK_AS_PMAP_TEXT void
12166 pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
12167 {
12168 
12169 	pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
12170 	memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
12171 	pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
12172 
12173 	pmap_cs_log_info("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
12174 }
12175 
12176 MARK_AS_PMAP_TEXT bool
12177 pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
12178 {
12179 	bool match = false;
12180 
12181 	pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
12182 	if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
12183 		match = true;
12184 	}
12185 	pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
12186 
12187 	if (match) {
12188 		pmap_cs_log_info("Matched Compilation Service CDHash through the PPL");
12189 	}
12190 
12191 	return match;
12192 }
12193 
12194 void
12195 pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
12196 {
12197 #if XNU_MONITOR
12198 	pmap_set_compilation_service_cdhash_ppl(cdhash);
12199 #else
12200 	pmap_set_compilation_service_cdhash_internal(cdhash);
12201 #endif
12202 }
12203 
12204 bool
12205 pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
12206 {
12207 #if XNU_MONITOR
12208 	return pmap_match_compilation_service_cdhash_ppl(cdhash);
12209 #else
12210 	return pmap_match_compilation_service_cdhash_internal(cdhash);
12211 #endif
12212 }
12213 
12214 /*
12215  * As part of supporting local signing on the device, we need the PMAP layer
12216  * to store the local signing key so that PMAP_CS can validate with it. We
12217  * store it at the PMAP layer such that it is accessible to both AMFI and
12218  * PMAP_CS should they need it.
12219  */
12220 MARK_AS_PMAP_DATA static bool pmap_local_signing_public_key_set = false;
12221 MARK_AS_PMAP_DATA static uint8_t pmap_local_signing_public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE] = { 0 };
12222 
12223 MARK_AS_PMAP_TEXT void
12224 pmap_set_local_signing_public_key_internal(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])
12225 {
12226 	bool key_set = false;
12227 
12228 	/*
12229 	 * os_atomic_cmpxchg returns true in case the exchange was successful. For us,
12230 	 * a successful exchange means that the local signing public key has _not_ been
12231 	 * set. In case the key has been set, we panic as we would never expect the
12232 	 * kernel to attempt to set the key more than once.
12233 	 */
12234 	key_set = !os_atomic_cmpxchg(&pmap_local_signing_public_key_set, false, true, relaxed);
12235 
12236 	if (key_set) {
12237 		panic("attempted to set the local signing public key multiple times");
12238 	}
12239 
12240 	memcpy(pmap_local_signing_public_key, public_key, PMAP_ECC_P384_PUBLIC_KEY_SIZE);
12241 	pmap_cs_log_info("set local signing public key");
12242 }
12243 
12244 void
12245 pmap_set_local_signing_public_key(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])
12246 {
12247 #if XNU_MONITOR
12248 	return pmap_set_local_signing_public_key_ppl(public_key);
12249 #else
12250 	return pmap_set_local_signing_public_key_internal(public_key);
12251 #endif
12252 }
12253 
12254 uint8_t*
12255 pmap_get_local_signing_public_key(void)
12256 {
12257 	bool key_set = os_atomic_load(&pmap_local_signing_public_key_set, relaxed);
12258 
12259 	if (key_set) {
12260 		return pmap_local_signing_public_key;
12261 	}
12262 
12263 	return NULL;
12264 }
12265 
12266 /*
12267  * Locally signed applications need to be explicitly authorized by an entitled application
12268  * before we allow them to run.
12269  */
12270 MARK_AS_PMAP_DATA static uint8_t pmap_local_signing_cdhash[CS_CDHASH_LEN] = {0};
12271 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_local_signing_cdhash_lock, 0);
12272 
12273 MARK_AS_PMAP_TEXT void
12274 pmap_unrestrict_local_signing_internal(
12275 	const uint8_t cdhash[CS_CDHASH_LEN])
12276 {
12277 
12278 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
12279 	memcpy(pmap_local_signing_cdhash, cdhash, sizeof(pmap_local_signing_cdhash));
12280 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
12281 
12282 	pmap_cs_log_debug("unrestricted local signing for CDHash: 0x%02X%02X%02X%02X%02X...",
12283 	    cdhash[0], cdhash[1], cdhash[2], cdhash[3], cdhash[4]);
12284 }
12285 
12286 void
12287 pmap_unrestrict_local_signing(
12288 	const uint8_t cdhash[CS_CDHASH_LEN])
12289 {
12290 #if XNU_MONITOR
12291 	return pmap_unrestrict_local_signing_ppl(cdhash);
12292 #else
12293 	return pmap_unrestrict_local_signing_internal(cdhash);
12294 #endif
12295 }
12296 
12297 #if PMAP_CS
12298 MARK_AS_PMAP_TEXT static void
12299 pmap_restrict_local_signing(void)
12300 {
12301 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
12302 	memset(pmap_local_signing_cdhash, 0, sizeof(pmap_local_signing_cdhash));
12303 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
12304 }
12305 
12306 MARK_AS_PMAP_TEXT static bool
12307 pmap_local_signing_restricted(
12308 	const uint8_t cdhash[CS_CDHASH_LEN])
12309 {
12310 	pmap_simple_lock(&pmap_local_signing_cdhash_lock);
12311 	int ret = memcmp(pmap_local_signing_cdhash, cdhash, sizeof(pmap_local_signing_cdhash));
12312 	pmap_simple_unlock(&pmap_local_signing_cdhash_lock);
12313 
12314 	return ret != 0;
12315 }
12316 
12317 MARK_AS_PMAP_TEXT bool
12318 pmap_cs_query_entitlements_internal(
12319 	pmap_t pmap,
12320 	CEQuery_t query,
12321 	size_t queryLength,
12322 	CEQueryContext_t finalContext)
12323 {
12324 	struct pmap_cs_code_directory *cd_entry = NULL;
12325 	bool ret = false;
12326 
12327 	if (!pmap_cs) {
12328 		panic("PMAP_CS: cannot query for entitlements as pmap_cs is turned off");
12329 	}
12330 
12331 	/*
12332 	 * When a pmap has not been passed in, we assume the caller wants to check the
12333 	 * entitlements on the current user space process.
12334 	 */
12335 	if (pmap == NULL) {
12336 		pmap = current_pmap();
12337 	}
12338 
12339 	if (pmap == kernel_pmap) {
12340 		/*
12341 		 * Instead of panicking we will just return false.
12342 		 */
12343 		return false;
12344 	}
12345 
12346 	if (query == NULL || queryLength > 64) {
12347 		panic("PMAP_CS: bogus entitlements query");
12348 	} else {
12349 		pmap_cs_assert_addr((vm_address_t)query, sizeof(CEQueryOperation_t) * queryLength, false, true);
12350 	}
12351 
12352 	if (finalContext != NULL) {
12353 		pmap_cs_assert_addr((vm_address_t)finalContext, sizeof(*finalContext), false, false);
12354 	}
12355 
12356 	validate_pmap(pmap);
12357 	pmap_lock(pmap, PMAP_LOCK_SHARED);
12358 
12359 	cd_entry = pmap_cs_code_directory_from_region(pmap->pmap_cs_main);
12360 	if (cd_entry == NULL) {
12361 		pmap_cs_log_error("attempted to query entitlements from an invalid pmap or a retired code directory");
12362 		goto out;
12363 	}
12364 
12365 	if (cd_entry->ce_ctx == NULL) {
12366 		pmap_cs_log_debug("%s: code signature doesn't have any entitlements", cd_entry->identifier);
12367 		goto out;
12368 	}
12369 
12370 	der_vm_context_t executionContext = cd_entry->ce_ctx->der_context;
12371 
12372 	for (size_t op = 0; op < queryLength; op++) {
12373 		executionContext = amfi->CoreEntitlements.der_vm_execute(executionContext, query[op]);
12374 	}
12375 
12376 	if (amfi->CoreEntitlements.der_vm_context_is_valid(executionContext)) {
12377 		ret = true;
12378 		if (finalContext != NULL) {
12379 			pmap_pin_kernel_pages((vm_offset_t)finalContext, sizeof(*finalContext));
12380 			finalContext->der_context = executionContext;
12381 			pmap_unpin_kernel_pages((vm_offset_t)finalContext, sizeof(*finalContext));
12382 		}
12383 	} else {
12384 		ret = false;
12385 	}
12386 
12387 out:
12388 	if (cd_entry) {
12389 		lck_rw_unlock_shared(&cd_entry->rwlock);
12390 		cd_entry = NULL;
12391 	}
12392 	pmap_unlock(pmap, PMAP_LOCK_SHARED);
12393 
12394 	return ret;
12395 }
12396 #endif
12397 
12398 bool
12399 pmap_query_entitlements(
12400 	__unused pmap_t pmap,
12401 	__unused CEQuery_t query,
12402 	__unused size_t queryLength,
12403 	__unused CEQueryContext_t finalContext)
12404 {
12405 #if !PMAP_SUPPORTS_ENTITLEMENT_CHECKS
12406 	panic("PMAP_CS: do not use this API without checking for \'#if PMAP_SUPPORTS_ENTITLEMENT_CHECKS\'");
12407 #else
12408 
12409 #if XNU_MONITOR
12410 	return pmap_cs_query_entitlements_ppl(pmap, query, queryLength, finalContext);
12411 #else
12412 	return pmap_cs_query_entitlements_internal(pmap, query, queryLength, finalContext);
12413 #endif
12414 
12415 #endif /* !PMAP_SUPPORTS_ENTITLEMENT_CHECKS */
12416 }
12417 
12418 MARK_AS_PMAP_TEXT void
12419 pmap_footprint_suspend_internal(
12420 	vm_map_t        map,
12421 	boolean_t       suspend)
12422 {
12423 #if DEVELOPMENT || DEBUG
12424 	if (suspend) {
12425 		current_thread()->pmap_footprint_suspended = TRUE;
12426 		map->pmap->footprint_was_suspended = TRUE;
12427 	} else {
12428 		current_thread()->pmap_footprint_suspended = FALSE;
12429 	}
12430 #else /* DEVELOPMENT || DEBUG */
12431 	(void) map;
12432 	(void) suspend;
12433 #endif /* DEVELOPMENT || DEBUG */
12434 }
12435 
12436 void
12437 pmap_footprint_suspend(
12438 	vm_map_t map,
12439 	boolean_t suspend)
12440 {
12441 #if XNU_MONITOR
12442 	pmap_footprint_suspend_ppl(map, suspend);
12443 #else
12444 	pmap_footprint_suspend_internal(map, suspend);
12445 #endif
12446 }
12447 
12448 MARK_AS_PMAP_TEXT void
12449 pmap_nop_internal(pmap_t pmap __unused)
12450 {
12451 	validate_pmap_mutable(pmap);
12452 }
12453 
12454 void
12455 pmap_nop(pmap_t pmap)
12456 {
12457 #if XNU_MONITOR
12458 	pmap_nop_ppl(pmap);
12459 #else
12460 	pmap_nop_internal(pmap);
12461 #endif
12462 }
12463 
12464 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
12465 
12466 struct page_table_dump_header {
12467 	uint64_t pa;
12468 	uint64_t num_entries;
12469 	uint64_t start_va;
12470 	uint64_t end_va;
12471 };
12472 
12473 static kern_return_t
12474 pmap_dump_page_tables_recurse(pmap_t pmap,
12475     const tt_entry_t *ttp,
12476     unsigned int cur_level,
12477     unsigned int level_mask,
12478     uint64_t start_va,
12479     void *buf_start,
12480     void *buf_end,
12481     size_t *bytes_copied)
12482 {
12483 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12484 	uint64_t num_entries = pt_attr_page_size(pt_attr) / sizeof(*ttp);
12485 
12486 	uint64_t size = pt_attr->pta_level_info[cur_level].size;
12487 	uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
12488 	uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
12489 	uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
12490 
12491 	void *bufp = (uint8_t*)buf_start + *bytes_copied;
12492 
12493 	if (cur_level == pt_attr_root_level(pt_attr)) {
12494 		num_entries = pmap_root_alloc_size(pmap) / sizeof(tt_entry_t);
12495 	}
12496 
12497 	uint64_t tt_size = num_entries * sizeof(tt_entry_t);
12498 	const tt_entry_t *tt_end = &ttp[num_entries];
12499 
12500 	if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
12501 		return KERN_INSUFFICIENT_BUFFER_SIZE;
12502 	}
12503 
12504 	if (level_mask & (1U << cur_level)) {
12505 		struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
12506 		header->pa = ml_static_vtop((vm_offset_t)ttp);
12507 		header->num_entries = num_entries;
12508 		header->start_va = start_va;
12509 		header->end_va = start_va + (num_entries * size);
12510 
12511 		bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
12512 		*bytes_copied = *bytes_copied + sizeof(*header) + tt_size;
12513 	}
12514 	uint64_t current_va = start_va;
12515 
12516 	for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
12517 		tt_entry_t tte = *ttep;
12518 
12519 		if (!(tte & valid_mask)) {
12520 			continue;
12521 		}
12522 
12523 		if ((tte & type_mask) == type_block) {
12524 			continue;
12525 		} else {
12526 			if (cur_level >= pt_attr_leaf_level(pt_attr)) {
12527 				panic("%s: corrupt entry %#llx at %p, "
12528 				    "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
12529 				    __FUNCTION__, tte, ttep,
12530 				    ttp, cur_level, bufp, buf_end);
12531 			}
12532 
12533 			const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
12534 
12535 			kern_return_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1,
12536 			    level_mask, current_va, buf_start, buf_end, bytes_copied);
12537 
12538 			if (recurse_result != KERN_SUCCESS) {
12539 				return recurse_result;
12540 			}
12541 		}
12542 	}
12543 
12544 	return KERN_SUCCESS;
12545 }
12546 
12547 kern_return_t
12548 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end, unsigned int level_mask, size_t *bytes_copied)
12549 {
12550 	if (not_in_kdp) {
12551 		panic("pmap_dump_page_tables must only be called from kernel debugger context");
12552 	}
12553 	return pmap_dump_page_tables_recurse(pmap, pmap->tte, pt_attr_root_level(pmap_get_pt_attr(pmap)),
12554 	           level_mask, pmap->min, bufp, buf_end, bytes_copied);
12555 }
12556 
12557 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
12558 
12559 kern_return_t
12560 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
12561     unsigned int level_mask __unused, size_t *bytes_copied __unused)
12562 {
12563 	return KERN_NOT_SUPPORTED;
12564 }
12565 #endif /* !defined(__arm64__) */
12566 
12567 
12568 #ifdef CONFIG_XNUPOST
12569 #ifdef __arm64__
12570 static volatile bool pmap_test_took_fault = false;
12571 
12572 static bool
12573 pmap_test_fault_handler(arm_saved_state_t * state)
12574 {
12575 	bool retval                 = false;
12576 	uint32_t esr                = get_saved_state_esr(state);
12577 	esr_exception_class_t class = ESR_EC(esr);
12578 	fault_status_t fsc          = ISS_IA_FSC(ESR_ISS(esr));
12579 
12580 	if ((class == ESR_EC_DABORT_EL1) &&
12581 	    ((fsc == FSC_PERMISSION_FAULT_L3) || (fsc == FSC_ACCESS_FLAG_FAULT_L3))) {
12582 		pmap_test_took_fault = true;
12583 		/* return to the instruction immediately after the call to NX page */
12584 		set_saved_state_pc(state, get_saved_state_pc(state) + 4);
12585 		retval = true;
12586 	}
12587 
12588 	return retval;
12589 }
12590 
12591 // Disable KASAN instrumentation, as the test pmap's TTBR0 space will not be in the shadow map
12592 static NOKASAN bool
12593 pmap_test_access(pmap_t pmap, vm_map_address_t va, bool should_fault, bool is_write)
12594 {
12595 	pmap_t old_pmap = NULL;
12596 
12597 	pmap_test_took_fault = false;
12598 
12599 	/*
12600 	 * We're potentially switching pmaps without using the normal thread
12601 	 * mechanism; disable interrupts and preemption to avoid any unexpected
12602 	 * memory accesses.
12603 	 */
12604 	uint64_t old_int_state = pmap_interrupts_disable();
12605 	mp_disable_preemption();
12606 
12607 	if (pmap != NULL) {
12608 		old_pmap = current_pmap();
12609 		pmap_switch(pmap);
12610 
12611 		/* Disable PAN; pmap shouldn't be the kernel pmap. */
12612 #if __ARM_PAN_AVAILABLE__
12613 		__builtin_arm_wsr("pan", 0);
12614 #endif /* __ARM_PAN_AVAILABLE__ */
12615 	}
12616 
12617 	ml_expect_fault_begin(pmap_test_fault_handler, va);
12618 
12619 	if (is_write) {
12620 		*((volatile uint64_t*)(va)) = 0xdec0de;
12621 	} else {
12622 		volatile uint64_t tmp = *((volatile uint64_t*)(va));
12623 		(void)tmp;
12624 	}
12625 
12626 	/* Save the fault bool, and undo the gross stuff we did. */
12627 	bool took_fault = pmap_test_took_fault;
12628 	ml_expect_fault_end();
12629 
12630 	if (pmap != NULL) {
12631 #if __ARM_PAN_AVAILABLE__
12632 		__builtin_arm_wsr("pan", 1);
12633 #endif /* __ARM_PAN_AVAILABLE__ */
12634 
12635 		pmap_switch(old_pmap);
12636 	}
12637 
12638 	mp_enable_preemption();
12639 	pmap_interrupts_restore(old_int_state);
12640 	bool retval = (took_fault == should_fault);
12641 	return retval;
12642 }
12643 
12644 static bool
12645 pmap_test_read(pmap_t pmap, vm_map_address_t va, bool should_fault)
12646 {
12647 	bool retval = pmap_test_access(pmap, va, should_fault, false);
12648 
12649 	if (!retval) {
12650 		T_FAIL("%s: %s, "
12651 		    "pmap=%p, va=%p, should_fault=%u",
12652 		    __func__, should_fault ? "did not fault" : "faulted",
12653 		    pmap, (void*)va, (unsigned)should_fault);
12654 	}
12655 
12656 	return retval;
12657 }
12658 
12659 static bool
12660 pmap_test_write(pmap_t pmap, vm_map_address_t va, bool should_fault)
12661 {
12662 	bool retval = pmap_test_access(pmap, va, should_fault, true);
12663 
12664 	if (!retval) {
12665 		T_FAIL("%s: %s, "
12666 		    "pmap=%p, va=%p, should_fault=%u",
12667 		    __func__, should_fault ? "did not fault" : "faulted",
12668 		    pmap, (void*)va, (unsigned)should_fault);
12669 	}
12670 
12671 	return retval;
12672 }
12673 
12674 static bool
12675 pmap_test_check_refmod(pmap_paddr_t pa, unsigned int should_be_set)
12676 {
12677 	unsigned int should_be_clear = (~should_be_set) & (VM_MEM_REFERENCED | VM_MEM_MODIFIED);
12678 	unsigned int bits = pmap_get_refmod((ppnum_t)atop(pa));
12679 
12680 	bool retval = (((bits & should_be_set) == should_be_set) && ((bits & should_be_clear) == 0));
12681 
12682 	if (!retval) {
12683 		T_FAIL("%s: bits=%u, "
12684 		    "pa=%p, should_be_set=%u",
12685 		    __func__, bits,
12686 		    (void*)pa, should_be_set);
12687 	}
12688 
12689 	return retval;
12690 }
12691 
12692 static __attribute__((noinline)) bool
12693 pmap_test_read_write(pmap_t pmap, vm_map_address_t va, bool allow_read, bool allow_write)
12694 {
12695 	bool retval = (pmap_test_read(pmap, va, !allow_read) | pmap_test_write(pmap, va, !allow_write));
12696 	return retval;
12697 }
12698 
12699 static int
12700 pmap_test_test_config(unsigned int flags)
12701 {
12702 	T_LOG("running pmap_test_test_config flags=0x%X", flags);
12703 	unsigned int map_count = 0;
12704 	unsigned long page_ratio = 0;
12705 	pmap_t pmap = pmap_create_options(NULL, 0, flags);
12706 
12707 	if (!pmap) {
12708 		panic("Failed to allocate pmap");
12709 	}
12710 
12711 	__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12712 	uintptr_t native_page_size = pt_attr_page_size(native_pt_attr);
12713 	uintptr_t pmap_page_size = pt_attr_page_size(pt_attr);
12714 	uintptr_t pmap_twig_size = pt_attr_twig_size(pt_attr);
12715 
12716 	if (pmap_page_size <= native_page_size) {
12717 		page_ratio = native_page_size / pmap_page_size;
12718 	} else {
12719 		/*
12720 		 * We claim to support a page_ratio of less than 1, which is
12721 		 * not currently supported by the pmap layer; panic.
12722 		 */
12723 		panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
12724 		    "flags=%u",
12725 		    __func__, native_page_size, pmap_page_size,
12726 		    flags);
12727 	}
12728 
12729 	if (PAGE_RATIO > 1) {
12730 		/*
12731 		 * The kernel is deliberately pretending to have 16KB pages.
12732 		 * The pmap layer has code that supports this, so pretend the
12733 		 * page size is larger than it is.
12734 		 */
12735 		pmap_page_size = PAGE_SIZE;
12736 		native_page_size = PAGE_SIZE;
12737 	}
12738 
12739 	/*
12740 	 * Get two pages from the VM; one to be mapped wired, and one to be
12741 	 * mapped nonwired.
12742 	 */
12743 	vm_page_t unwired_vm_page = vm_page_grab();
12744 	vm_page_t wired_vm_page = vm_page_grab();
12745 
12746 	if ((unwired_vm_page == VM_PAGE_NULL) || (wired_vm_page == VM_PAGE_NULL)) {
12747 		panic("Failed to grab VM pages");
12748 	}
12749 
12750 	ppnum_t pn = VM_PAGE_GET_PHYS_PAGE(unwired_vm_page);
12751 	ppnum_t wired_pn = VM_PAGE_GET_PHYS_PAGE(wired_vm_page);
12752 
12753 	pmap_paddr_t pa = ptoa(pn);
12754 	pmap_paddr_t wired_pa = ptoa(wired_pn);
12755 
12756 	/*
12757 	 * We'll start mappings at the second twig TT.  This keeps us from only
12758 	 * using the first entry in each TT, which would trivially be address
12759 	 * 0; one of the things we will need to test is retrieving the VA for
12760 	 * a given PTE.
12761 	 */
12762 	vm_map_address_t va_base = pmap_twig_size;
12763 	vm_map_address_t wired_va_base = ((2 * pmap_twig_size) - pmap_page_size);
12764 
12765 	if (wired_va_base < (va_base + (page_ratio * pmap_page_size))) {
12766 		/*
12767 		 * Not exactly a functional failure, but this test relies on
12768 		 * there being a spare PTE slot we can use to pin the TT.
12769 		 */
12770 		panic("Cannot pin translation table");
12771 	}
12772 
12773 	/*
12774 	 * Create the wired mapping; this will prevent the pmap layer from
12775 	 * reclaiming our test TTs, which would interfere with this test
12776 	 * ("interfere" -> "make it panic").
12777 	 */
12778 	pmap_enter_addr(pmap, wired_va_base, wired_pa, VM_PROT_READ, VM_PROT_READ, 0, true);
12779 
12780 #if XNU_MONITOR
12781 	/*
12782 	 * If the PPL is enabled, make sure that the kernel cannot write
12783 	 * to PPL memory.
12784 	 */
12785 	if (!pmap_ppl_disable) {
12786 		T_LOG("Validate that kernel cannot write to PPL memory.");
12787 		pt_entry_t * ptep = pmap_pte(pmap, va_base);
12788 		pmap_test_write(NULL, (vm_map_address_t)ptep, true);
12789 	}
12790 #endif
12791 
12792 	/*
12793 	 * Create read-only mappings of the nonwired page; if the pmap does
12794 	 * not use the same page size as the kernel, create multiple mappings
12795 	 * so that the kernel page is fully mapped.
12796 	 */
12797 	for (map_count = 0; map_count < page_ratio; map_count++) {
12798 		pmap_enter_addr(pmap, va_base + (pmap_page_size * map_count), pa + (pmap_page_size * (map_count)), VM_PROT_READ, VM_PROT_READ, 0, false);
12799 	}
12800 
12801 	/* Validate that all the PTEs have the expected PA and VA. */
12802 	for (map_count = 0; map_count < page_ratio; map_count++) {
12803 		pt_entry_t * ptep = pmap_pte(pmap, va_base + (pmap_page_size * map_count));
12804 
12805 		if (pte_to_pa(*ptep) != (pa + (pmap_page_size * map_count))) {
12806 			T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
12807 			    (void*)pte_to_pa(*ptep), (void*)(pa + (pmap_page_size * map_count)), map_count);
12808 		}
12809 
12810 		if (ptep_get_va(ptep) != (va_base + (pmap_page_size * map_count))) {
12811 			T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
12812 			    (void*)ptep_get_va(ptep), (void*)(va_base + (pmap_page_size * map_count)), map_count);
12813 		}
12814 	}
12815 
12816 	T_LOG("Validate that reads to our mapping do not fault.");
12817 	pmap_test_read(pmap, va_base, false);
12818 
12819 	T_LOG("Validate that writes to our mapping fault.");
12820 	pmap_test_write(pmap, va_base, true);
12821 
12822 	T_LOG("Make the first mapping writable.");
12823 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
12824 
12825 	T_LOG("Validate that writes to our mapping do not fault.");
12826 	pmap_test_write(pmap, va_base, false);
12827 
12828 
12829 	T_LOG("Make the first mapping XO.");
12830 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_EXECUTE, VM_PROT_EXECUTE, 0, false);
12831 
12832 	T_LOG("Validate that reads to our mapping do not fault.");
12833 	pmap_test_read(pmap, va_base, false);
12834 
12835 	T_LOG("Validate that writes to our mapping fault.");
12836 	pmap_test_write(pmap, va_base, true);
12837 
12838 
12839 	/*
12840 	 * For page ratios of greater than 1: validate that writes to the other
12841 	 * mappings still fault.  Remove the mappings afterwards (we're done
12842 	 * with page ratio testing).
12843 	 */
12844 	for (map_count = 1; map_count < page_ratio; map_count++) {
12845 		pmap_test_write(pmap, va_base + (pmap_page_size * map_count), true);
12846 		pmap_remove(pmap, va_base + (pmap_page_size * map_count), va_base + (pmap_page_size * map_count) + pmap_page_size);
12847 	}
12848 
12849 	T_LOG("Mark the page unreferenced and unmodified.");
12850 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12851 	pmap_test_check_refmod(pa, 0);
12852 
12853 	/*
12854 	 * Begin testing the ref/mod state machine.  Re-enter the mapping with
12855 	 * different protection/fault_type settings, and confirm that the
12856 	 * ref/mod state matches our expectations at each step.
12857 	 */
12858 	T_LOG("!ref/!mod: read, no fault.  Expect ref/!mod");
12859 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_NONE, 0, false);
12860 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
12861 
12862 	T_LOG("!ref/!mod: read, read fault.  Expect ref/!mod");
12863 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12864 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
12865 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
12866 
12867 	T_LOG("!ref/!mod: rw, read fault.  Expect ref/!mod");
12868 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12869 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, false);
12870 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
12871 
12872 	T_LOG("ref/!mod: rw, read fault.  Expect ref/!mod");
12873 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ, 0, false);
12874 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
12875 
12876 	T_LOG("!ref/!mod: rw, rw fault.  Expect ref/mod");
12877 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12878 	pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
12879 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
12880 
12881 	/*
12882 	 * Shared memory testing; we'll have two mappings; one read-only,
12883 	 * one read-write.
12884 	 */
12885 	vm_map_address_t rw_base = va_base;
12886 	vm_map_address_t ro_base = va_base + pmap_page_size;
12887 
12888 	pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
12889 	pmap_enter_addr(pmap, ro_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
12890 
12891 	/*
12892 	 * Test that we take faults as expected for unreferenced/unmodified
12893 	 * pages.  Also test the arm_fast_fault interface, to ensure that
12894 	 * mapping permissions change as expected.
12895 	 */
12896 	T_LOG("!ref/!mod: expect no access");
12897 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12898 	pmap_test_read_write(pmap, ro_base, false, false);
12899 	pmap_test_read_write(pmap, rw_base, false, false);
12900 
12901 	T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
12902 	arm_fast_fault(pmap, rw_base, VM_PROT_READ, false, false);
12903 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
12904 	pmap_test_read_write(pmap, ro_base, true, false);
12905 	pmap_test_read_write(pmap, rw_base, true, false);
12906 
12907 	T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
12908 	arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
12909 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
12910 	pmap_test_read_write(pmap, ro_base, true, false);
12911 	pmap_test_read_write(pmap, rw_base, true, true);
12912 
12913 	T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
12914 	pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
12915 	arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
12916 	pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
12917 	pmap_test_read_write(pmap, ro_base, true, false);
12918 	pmap_test_read_write(pmap, rw_base, true, true);
12919 
12920 	T_LOG("RW protect both mappings; should not change protections.");
12921 	pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
12922 	pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
12923 	pmap_test_read_write(pmap, ro_base, true, false);
12924 	pmap_test_read_write(pmap, rw_base, true, true);
12925 
12926 	T_LOG("Read protect both mappings; RW mapping should become RO.");
12927 	pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ);
12928 	pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ);
12929 	pmap_test_read_write(pmap, ro_base, true, false);
12930 	pmap_test_read_write(pmap, rw_base, true, false);
12931 
12932 	T_LOG("RW protect the page; mappings should not change protections.");
12933 	pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
12934 	pmap_page_protect(pn, VM_PROT_ALL);
12935 	pmap_test_read_write(pmap, ro_base, true, false);
12936 	pmap_test_read_write(pmap, rw_base, true, true);
12937 
12938 	T_LOG("Read protect the page; RW mapping should become RO.");
12939 	pmap_page_protect(pn, VM_PROT_READ);
12940 	pmap_test_read_write(pmap, ro_base, true, false);
12941 	pmap_test_read_write(pmap, rw_base, true, false);
12942 
12943 	T_LOG("Validate that disconnect removes all known mappings of the page.");
12944 	pmap_disconnect(pn);
12945 	if (!pmap_verify_free(pn)) {
12946 		T_FAIL("Page still has mappings");
12947 	}
12948 
12949 	T_LOG("Remove the wired mapping, so we can tear down the test map.");
12950 	pmap_remove(pmap, wired_va_base, wired_va_base + pmap_page_size);
12951 	pmap_destroy(pmap);
12952 
12953 	T_LOG("Release the pages back to the VM.");
12954 	vm_page_lock_queues();
12955 	vm_page_free(unwired_vm_page);
12956 	vm_page_free(wired_vm_page);
12957 	vm_page_unlock_queues();
12958 
12959 	T_LOG("Testing successful!");
12960 	return 0;
12961 }
12962 #endif /* __arm64__ */
12963 
12964 kern_return_t
12965 pmap_test(void)
12966 {
12967 	T_LOG("Starting pmap_tests");
12968 #ifdef __arm64__
12969 	int flags = 0;
12970 	flags |= PMAP_CREATE_64BIT;
12971 
12972 #if __ARM_MIXED_PAGE_SIZE__
12973 	T_LOG("Testing VM_PAGE_SIZE_4KB");
12974 	pmap_test_test_config(flags | PMAP_CREATE_FORCE_4K_PAGES);
12975 	T_LOG("Testing VM_PAGE_SIZE_16KB");
12976 	pmap_test_test_config(flags);
12977 #else /* __ARM_MIXED_PAGE_SIZE__ */
12978 	pmap_test_test_config(flags);
12979 #endif /* __ARM_MIXED_PAGE_SIZE__ */
12980 
12981 #endif /* __arm64__ */
12982 	T_PASS("completed pmap_test successfully");
12983 	return KERN_SUCCESS;
12984 }
12985 #endif /* CONFIG_XNUPOST */
12986 
12987 /*
12988  * The following function should never make it to RELEASE code, since
12989  * it provides a way to get the PPL to modify text pages.
12990  */
12991 #if DEVELOPMENT || DEBUG
12992 
12993 #define ARM_UNDEFINED_INSN 0xe7f000f0
12994 #define ARM_UNDEFINED_INSN_THUMB 0xde00
12995 
12996 /**
12997  * Forcibly overwrite executable text with an illegal instruction.
12998  *
12999  * @note Only used for xnu unit testing.
13000  *
13001  * @param pa The physical address to corrupt.
13002  *
13003  * @return KERN_SUCCESS on success.
13004  */
13005 kern_return_t
13006 pmap_test_text_corruption(pmap_paddr_t pa)
13007 {
13008 #if XNU_MONITOR
13009 	return pmap_test_text_corruption_ppl(pa);
13010 #else /* XNU_MONITOR */
13011 	return pmap_test_text_corruption_internal(pa);
13012 #endif /* XNU_MONITOR */
13013 }
13014 
13015 MARK_AS_PMAP_TEXT kern_return_t
13016 pmap_test_text_corruption_internal(pmap_paddr_t pa)
13017 {
13018 	vm_offset_t va = phystokv(pa);
13019 	unsigned int pai = pa_index(pa);
13020 
13021 	assert(pa_valid(pa));
13022 
13023 	pvh_lock(pai);
13024 
13025 	pv_entry_t **pv_h  = pai_to_pvh(pai);
13026 	assert(!pvh_test_type(pv_h, PVH_TYPE_NULL));
13027 #if defined(PVH_FLAG_EXEC)
13028 	const bool need_ap_twiddle = pvh_get_flags(pv_h) & PVH_FLAG_EXEC;
13029 
13030 	if (need_ap_twiddle) {
13031 		pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
13032 	}
13033 #endif /* defined(PVH_FLAG_EXEC) */
13034 
13035 	/*
13036 	 * The low bit in an instruction address indicates a THUMB instruction
13037 	 */
13038 	if (va & 1) {
13039 		va &= ~(vm_offset_t)1;
13040 		*(uint16_t *)va = ARM_UNDEFINED_INSN_THUMB;
13041 	} else {
13042 		*(uint32_t *)va = ARM_UNDEFINED_INSN;
13043 	}
13044 
13045 #if defined(PVH_FLAG_EXEC)
13046 	if (need_ap_twiddle) {
13047 		pmap_set_ptov_ap(pai, AP_RONA, FALSE);
13048 	}
13049 #endif /* defined(PVH_FLAG_EXEC) */
13050 
13051 	InvalidatePoU_IcacheRegion(va, sizeof(uint32_t));
13052 
13053 	pvh_unlock(pai);
13054 
13055 	return KERN_SUCCESS;
13056 }
13057 
13058 #endif /* DEVELOPMENT || DEBUG */
13059