xref: /xnu-12377.1.9/osfmk/arm/pmap/pmap_data.h (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /**
29  * This header file is used to store the types, prototypes, and inline functions
30  * that define some of the most important data structures used in the pmap. This
31  * header is only meant for sharing types within the pmap; if a type is meant to
32  * be used by the rest of the kernel, then put it into osfmk/arm/pmap.h.
33  */
34 #ifndef _ARM_PMAP_PMAP_DATA_H_
35 #define _ARM_PMAP_PMAP_DATA_H_
36 
37 #include <stdint.h>
38 
39 #include <kern/ledger.h>
40 #include <mach/vm_types.h>
41 #include <mach_assert.h>
42 #include <vm/vm_page.h>
43 
44 #include <arm/cpu_data.h>
45 #include <arm/machine_routines.h>
46 #include <arm64/proc_reg.h>
47 
48 /* Temporary include before moving all ledger functions into pmap_data.c */
49 #include <os/refcnt.h>
50 
51 /**
52  * These headers are safe to be included in this file since they shouldn't rely
53  * on any of the internal pmap header files (so no circular dependencies).
54  */
55 #include <arm/pmap.h>
56 #include <arm/pmap/pmap_pt_geometry.h>
57 
58 /**
59  * These values represent the first and last kernel-managed physical addresses.
60  * We keep track of extra metadata on kernel-managed pages compared to other
61  * pages (usually iBoot carved out memory or I/O).
62  */
63 extern pmap_paddr_t vm_first_phys, vm_last_phys;
64 
65 /**
66  * Return whether the given address represents a kernel-managed physical page.
67  *
68  * Whether a page is considered "kernel-managed" is determined by the BootArgs
69  * passed by the bootloader. Typically memory carved out by the bootloader as
70  * well as I/O memory should return false.
71  *
72  * @param pa The physical address to check.
73  */
74 static inline bool
pa_valid(pmap_paddr_t pa)75 pa_valid(pmap_paddr_t pa)
76 {
77 	return (pa >= vm_first_phys) && (pa < vm_last_phys);
78 }
79 
80 /**
81  * The pmap has a variety of data structures (pv_head_table/pp_attr_table) that
82  * contain an entry for every kernel-managed page in the system. These systems
83  * are indexed with physical address indices ("pai") generated by this function.
84  *
85  * The logic is simple since there should be one entry in each of these data
86  * structures for each kernel-managed physical page in the system. These data
87  * structures are allocated on boot based on the amount of memory available.
88  *
89  * @note PAIs are defined using the VM page size, which might not be identical
90  *       to the underlying hardware page size for an arbitrary address space.
91  *       This means that the data structures relying on PAIs will contain one
92  *       entry for each VM page, not hardware page.
93  *
94  * @note This function is only valid for physical addresses that are
95  *       kernel-managed.
96  */
97 
98 static inline unsigned int
pa_index(pmap_paddr_t pa)99 pa_index(pmap_paddr_t pa)
100 {
101 	return (unsigned int)atop(pa - vm_first_phys);
102 }
103 
104 /* See the definition of pv_head_table for more information. */
105 extern pv_entry_t **pv_head_table;
106 
107 /* Represents a NULL entry in the pv_head_table. */
108 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
109 
110 /**
111  * Given a physical address index, return the corresponding pv_head_table entry.
112  *
113  * @note Despite returning a pointer to a pv_entry_t pointer, the entry might
114  *       actually be a different type of pointer (pt_entry_t or pt_desc_t)
115  *       depending on the type for this entry. Determine the type using
116  *       pvh_test_type().
117  *
118  * @param pai The index returned by pa_index() for the page whose pv_head_table
119  *            entry should be retrieved.
120  */
121 static inline pv_entry_t **
pai_to_pvh(unsigned int pai)122 pai_to_pvh(unsigned int pai)
123 {
124 	return &pv_head_table[pai];
125 }
126 
127 /**
128  * Each pv_head_table entry can be one of four different types:
129  *
130  * - PVH_TYPE_NULL: No mappings to the physical page exist outside of the
131  *                  physical aperture. Physical aperture mappings are not
132  *                  tracked in the pv_head_table.
133  *
134  * - PVH_TYPE_PVEP: There are multiple mappings to the physical page.
135  *                  These entries are linked lists of pv_entry_t objects (which
136  *                  each contain a pointer to the associated PTE and a pointer
137  *                  to the next entry in the list).
138  *
139  * - PVH_TYPE_PTEP: There is a single mapping to the physical page. Once more
140  *                  mappings are created, this entry will get upgraded to an
141  *                  entry of type PVH_TYPE_PVEP. These entries are pointers
142  *                  directly to the page table entry that contain the mapping
143  *                  (pt_entry_t*).
144  *
145  * - PVH_TYPE_PTDP: The physical page is being used as a page table. These
146  *                  entries are pointers to page table descriptor structures
147  *                  (pt_desc_t) which contain metadata related to each page
148  *                  table.
149  *
150  * The type is stored in the bottom two bits of each pv_head_table entry. That
151  * type needs to be checked before dereferencing the pointer to determine which
152  * pointer type to dereference as.
153  */
154 __enum_closed_decl(pvh_type_t, uint8_t, {
155 	PVH_TYPE_NULL = 0b00,
156 	PVH_TYPE_PVEP = 0b01,
157 	PVH_TYPE_PTEP = 0b10,
158 	PVH_TYPE_PTDP = 0b11,
159 });
160 
161 #define PVH_TYPE_MASK (0x3UL)
162 
163 #if defined(__arm64__)
164 
165 /**
166  * PV_HEAD_TABLE Flags.
167  *
168  * All flags listed below are stored in the pv_head_table entry/pointer
169  * (per-physical-page) unless otherwise noted.
170  *
171  * Please update the pv_walk LLDB macro if these flags are changed or added to.
172  */
173 
174 /**
175  * This flag is set for every mapping created by an IOMMU.
176  *
177  * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
178  * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
179  */
180 #define PVH_FLAG_IOMMU 0x4UL
181 
182 /**
183  * This flag is only valid when PVH_FLAG_IOMMU is set. For an IOMMU mapping, if
184  * this bit is set, then the PTE pointer points directly into the IOMMU page
185  * table for this mapping. If this bit is cleared, then the "PTE pointer" is
186  * actually a pointer to the IOMMU descriptor object that owns this mapping.
187  *
188  * There are cases where it's not easy to tie an IOMMU mapping directly to a
189  * specific page table, so this allows us to at least get a pointer to which
190  * IOMMU created this mapping which is useful for debugging purposes.
191  *
192  * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
193  * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
194  */
195 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
196 
197 /**
198  * This flag is set when the first CPU (non-IOMMU) mapping is created. This is
199  * important to keep track of because various accounting statistics are based on
200  * the options specified for the first CPU mapping. This flag, and thus the
201  * accounting statistics, will persist as long as there *any* mappings of the
202  * page (including IOMMU mappings). This works because the accounting for a page
203  * should not need to change until the page is recycled by the VM layer, and we
204  * double-check that there are no mappings (CPU or IOMMU) when a page is
205  * recycled (see: pmap_verify_free()).
206  */
207 #define PVH_FLAG_CPU (1ULL << 62)
208 
209 /* This bit is used as a lock when modifying a pv_head_table entry. */
210 #define PVH_LOCK_BIT 61
211 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
212 
213 /**
214  * This flag is set when there are any executable mappings to this physical
215  * page. This is used to prevent any writable mappings from being created at
216  * the same time an executable mapping exists.
217  */
218 #define PVH_FLAG_EXEC (1ULL << 60)
219 
220 /**
221  * Marking a pv_head_table entry with this flag denotes that this page is a
222  * kernelcache text or data page that shouldn't have dynamically-created
223  * mappings.  See PVH_FLAG_LOCKDOWN_MASK for more details.
224  */
225 #define PVH_FLAG_LOCKDOWN_KC (1ULL << 59)
226 
227 /**
228  * This flag is used to mark that a page has been hashed into the hibernation
229  * image.
230  *
231  * The hibernation driver will use this to ensure that all PPL-owned memory is
232  * correctly included into the hibernation image (a missing PPL page could be
233  * a security concern when coming out of hibernation).
234  */
235 #define PVH_FLAG_HASHED (1ULL << 58)
236 
237 /**
238  * Marking a pv_head_table entry with this flag denotes that this page is a
239  * code signature page that shouldn't have dynamically-created mappings.
240  * See PVH_FLAG_LOCKDOWN_MASK for more details.
241  */
242 #define PVH_FLAG_LOCKDOWN_CS (1ULL << 57)
243 
244 /**
245  * Marking a pv_head_table entry with this flag denotes that this page is a
246  * read-only allocator page that shouldn't have dynamically-created mappings.
247  * See PVH_FLAG_LOCKDOWN_MASK for more details.
248  */
249 #define PVH_FLAG_LOCKDOWN_RO (1ULL << 56)
250 
251 /**
252  * Marking a pv_head_table entry with this flag denotes that this page is
253  * retired without any mappings and never should be mapped again.
254  */
255 #define PVH_FLAG_RETIRED (1ULL << 55)
256 
257 /**
258  * Flags which disallow a new mapping to a page.
259  */
260 #define PVH_FLAG_NOMAP_MASK (PVH_FLAG_RETIRED)
261 
262 /**
263  * Marking a pv_head_table entry with this flag denotes that this page has
264  * been mapped into a non-coherent coprocessor address space and requires a
265  * cache flush operation once all mappings have been removed.
266  */
267 #define PVH_FLAG_FLUSH_NEEDED (1ULL << 54)
268 
269 /**
270  * Marking a pv_head_table entry with any bit in this mask denotes that this page
271  * has been locked down by the PPL.  Locked down pages can't have new mappings
272  * created or existing mappings removed, and all existing mappings will have been
273  * converted to read-only.  This essentially makes the page immutable.
274  */
275 #define PVH_FLAG_LOCKDOWN_MASK (PVH_FLAG_LOCKDOWN_KC | PVH_FLAG_LOCKDOWN_CS | PVH_FLAG_LOCKDOWN_RO)
276 
277 
278 /**
279  * These bits need to be set to safely dereference a pv_head_table
280  * entry/pointer.
281  *
282  * Any change to this #define should also update the copy located in the pmap.py
283  * LLDB macros file.
284  */
285 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN_MASK | \
286     PVH_FLAG_HASHED | PVH_FLAG_FLUSH_NEEDED | PVH_FLAG_RETIRED)
287 
288 #endif /* defined(__arm64__) */
289 
290 /* Mask used to clear out the TYPE bits from a pv_head_table entry/pointer. */
291 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
292 
293 /* Which 32-bit word in each pv_head_table entry/pointer contains the LOCK bit. */
294 #if defined(__arm64__)
295 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
296 #endif /* defined(__arm64__) */
297 
298 /**
299  * Assert that a pv_head_table entry is locked. Will panic if the lock isn't
300  * acquired.
301  *
302  * @param index The physical address index to check.
303  */
304 static inline void
pvh_assert_locked(__assert_only unsigned int index)305 pvh_assert_locked(__assert_only unsigned int index)
306 {
307 	assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK);
308 }
309 
310 
311 /**
312  * Lock a pv_head_table entry.
313  *
314  * @param index The physical address index of the pv_head_table entry to lock.
315  */
316 static inline void
pvh_lock(unsigned int index)317 pvh_lock(unsigned int index)
318 {
319 	pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
320 	    PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
321 }
322 
323 /**
324  * Unlock a pv_head_table entry.
325  *
326  * @param index The physical address index of the pv_head_table entry to unlock.
327  */
328 static inline void
pvh_unlock(unsigned int index)329 pvh_unlock(unsigned int index)
330 {
331 	pvh_assert_locked(index);
332 
333 	pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
334 	    PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
335 }
336 
337 /**
338  * Check that a pv_head_table entry/pointer is a specific type.
339  *
340  * @param pvh The pv_head_table entry/pointer to check.
341  * @param type The type to check for.
342  *
343  * @return True if the pv_head_table entry is of the passed in type, false
344  *         otherwise.
345  */
346 static inline bool
pvh_test_type(pv_entry_t ** pvh,pvh_type_t type)347 pvh_test_type(pv_entry_t **pvh, pvh_type_t type)
348 {
349 	return ((*(vm_offset_t *)pvh) & PVH_TYPE_MASK) == type;
350 }
351 
352 /**
353  * Convert a pv_head_table entry/pointer into a page table entry pointer. This
354  * should only be done if the type of this entry is PVH_TYPE_PTEP.
355  *
356  * @param pvh The pv_head_table entry/pointer to convert into a pt_entry_t*.
357  *
358  * @return Return back a safe to derefence pointer to the single mapping of this
359  *         physical page by masking off the TYPE bits and adding any missing
360  *         flags to the upper portion of the pointer.
361  */
362 static inline pt_entry_t*
pvh_ptep(pv_entry_t ** pvh)363 pvh_ptep(pv_entry_t **pvh)
364 {
365 	return (pt_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
366 }
367 
368 /**
369  * Convert a pv_head_table entry/pointer into a PVE list pointer. This
370  * should only be done if the type of this entry is PVH_TYPE_PVEP.
371  *
372  * @param pvh The pv_head_table entry/pointer to convert into a safe to
373  *            dereference pv_entry_t*.
374  *
375  * @return Return back a safe to derefence pointer to the first mapping of this
376  *         physical page by masking off the TYPE bits and adding any missing
377  *         flags to the upper portion of the pointer.
378  */
379 static inline pv_entry_t*
pvh_pve_list(pv_entry_t ** pvh)380 pvh_pve_list(pv_entry_t **pvh)
381 {
382 	return (pv_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
383 }
384 
385 /**
386  * Return the flags associated with a pv_head_table entry/pointer.
387  *
388  * @param pvh The pv_head_table entry whose flags to get.
389  */
390 static inline vm_offset_t
pvh_get_flags(pv_entry_t ** pvh)391 pvh_get_flags(pv_entry_t **pvh)
392 {
393 	return (*(vm_offset_t *)pvh) & PVH_HIGH_FLAGS;
394 }
395 
396 /**
397  * Atomically set the flags associated with a pv_head_table entry/pointer.
398  *
399  * @param pvh The pv_head_table entry whose flags are getting set.
400  */
401 static inline void
pvh_set_flags(pv_entry_t ** pvh,vm_offset_t flags)402 pvh_set_flags(pv_entry_t **pvh, vm_offset_t flags)
403 {
404 	os_atomic_store((vm_offset_t *)pvh, ((*(vm_offset_t *)pvh) & ~PVH_HIGH_FLAGS) | flags, relaxed);
405 }
406 
407 /**
408  * Update a pv_head_table entry/pointer to be a different type and/or point to
409  * a different object.
410  *
411  * @note The pv_head_table entry MUST already be locked.
412  *
413  * @note This function will clobber any existing flags stored in the PVH pointer
414  *       (except PVH_FLAG_LOCK). It's up to the caller to preserve flags if that
415  *       functionality is needed (either by ensuring `pvep` contains those
416  *       flags, or by manually setting the flags after this call).
417  *
418  * @param pvh The pv_head_table entry/pointer to update.
419  * @param pvep The new entry to use. This could be either a pt_entry_t*,
420  *             pv_entry_t*, or pt_desc_t* depending on the type.
421  * @param type The type of the new entry.
422  */
423 static inline void
pvh_update_head(pv_entry_t ** pvh,void * pvep,unsigned int type)424 pvh_update_head(pv_entry_t **pvh, void *pvep, unsigned int type)
425 {
426 	assert((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK);
427 	os_atomic_store((vm_offset_t *)pvh, (vm_offset_t)pvep | type | PVH_FLAG_LOCK, relaxed);
428 }
429 
430 /**
431  * Update a pv_head_table entry/pointer to be a different type and/or point to
432  * a different object.
433  *
434  * @note The pv_head_table entry CAN'T already be locked.
435  *
436  * @note This function will clobber any existing flags stored in the PVH
437  *       pointer. It's up to the caller to preserve flags if that functionality
438  *       is needed (either by ensuring `pvep` contains those flags, or by
439  *       manually setting the flags after this call).
440  *
441  * @param pvh The pv_head_table entry/pointer to update.
442  * @param pvep The new entry to use. This could be either a pt_entry_t*,
443  *             pv_entry_t*, or pt_desc_t* depending on the type.
444  * @param type The type of the new entry.
445  */
446 static inline void
pvh_update_head_unlocked(pv_entry_t ** pvh,void * pvep,unsigned int type)447 pvh_update_head_unlocked(pv_entry_t **pvh, void *pvep, unsigned int type)
448 {
449 	assert(!((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK));
450 	*(vm_offset_t *)pvh = ((vm_offset_t)pvep | type) & ~PVH_FLAG_LOCK;
451 }
452 
453 /**
454  * Given a page table entry pointer retrieved from the pv_head_table (from an
455  * entry of type PVH_TYPE_PTEP or PVH_TYPE_PVEP), return back whether the PTE is
456  * an IOMMU mapping.
457  *
458  * @note The way this function determines whether the passed in pointer is
459  *       pointing to an IOMMU PTE, is by checking for a special flag stored in
460  *       the lower bits of the pointer. This flag is only set on pointers stored
461  *       in the pv_head_table, and as such, this function will only work on
462  *       pointers retrieved from the pv_head_table. If a pointer to a PTE was
463  *       directly retrieved from an IOMMU's page tables, this function would
464  *       always return false despite actually being an IOMMU PTE.
465  *
466  * @param ptep A PTE pointer obtained from the pv_head_table to check.
467  *
468  * @return True if the entry is an IOMMU mapping, false otherwise.
469  */
470 static inline bool
pvh_ptep_is_iommu(const pt_entry_t * ptep)471 pvh_ptep_is_iommu(const pt_entry_t *ptep)
472 {
473 #ifdef PVH_FLAG_IOMMU
474 	return (vm_offset_t)ptep & PVH_FLAG_IOMMU;
475 #else /* PVH_FLAG_IOMMU */
476 	#pragma unused(ptep)
477 	return false;
478 #endif /* PVH_FLAG_IOMMU */
479 }
480 
481 /**
482  * Sometimes the PTE pointers retrieved from the pv_head_table (from an entry of
483  * type PVH_TYPE_PTEP or PVH_TYPE_PVEP) contain flags themselves. This function
484  * strips out those flags and returns back a dereferencable pointer.
485  *
486  * @param ptep The PTE pointer to strip out the unwanted flags.
487  *
488  * @return A valid dereferencable pointer to the page table entry.
489  */
490 static inline const pt_entry_t*
pvh_strip_ptep(const pt_entry_t * ptep)491 pvh_strip_ptep(const pt_entry_t *ptep)
492 {
493 #ifdef PVH_FLAG_IOMMU
494 	const vm_offset_t pte_va = (vm_offset_t)ptep;
495 	return (const pt_entry_t*)((pte_va & ~PVH_FLAG_IOMMU) | PVH_FLAG_IOMMU_TABLE);
496 #else /* PVH_FLAG_IOMMU */
497 	return ptep;
498 #endif /* PVH_FLAG_IOMMU */
499 }
500 
501 /**
502  * PVH_TYPE_PVEP Helper Functions.
503  *
504  * The following are methods used to manipulate PVE lists. This is the type of
505  * pv_head_table entry used when there are multiple mappings to a single
506  * physical page.
507  */
508 
509 /**
510  * Whether a physical page is using "alternate accounting" (ALTACCT) for its
511  * ledger statistics is something that needs to be tracked on a per-mapping
512  * basis, not on a per-physical-page basis. Because of that, it's tracked
513  * differently depending on whether there's a single mapping to a page
514  * (PVH_TYPE_PTEP) or multiple (PVH_TYPE_PVEP). For single mappings, the bit is
515  * tracked in the pp_attr_table. But when there are multiple mappings, the least
516  * significant bit of the corresponding "pve_pte" pointer in each pv_entry object
517  * is used as a marker for pages using alternate accounting.
518  *
519  * @note See the definition for PP_ATTR_ALTACCT for a more detailed description
520  *       of what "alternate accounting" actually means in respect to the
521  *       footprint ledger.
522  *
523  * Since some code (KernelDiskImages, e.g.) might map a phsyical page as
524  * "device" memory (i.e. external) while it's also being used as regular
525  * "anonymous" memory (i.e. internal) in user space, we have to manage the
526  * "internal" attribute per mapping rather than per physical page.
527  * When there are multiple mappings, we use the next least significant bit of
528  * the corresponding "pve_pte" pointer for that.
529  */
530 #define PVE_PTEP_ALTACCT ((uintptr_t) 0x1)
531 #define PVE_PTEP_INTERNAL ((uintptr_t) 0x2)
532 #define PVE_PTEP_FLAGS (PVE_PTEP_ALTACCT | PVE_PTEP_INTERNAL)
533 
534 /**
535  * Set the ALTACCT bit for a specific PTE pointer.
536  *
537  * @param pvep A pointer to the current pv_entry mapping in the linked list of
538  *             mappings.
539  * @param idx Index of the chosen PTE pointer inside the PVE.
540  */
541 static inline void
pve_set_altacct(pv_entry_t * pvep,unsigned idx)542 pve_set_altacct(pv_entry_t *pvep, unsigned idx)
543 {
544 	assert(idx < PTE_PER_PVE);
545 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_ALTACCT);
546 }
547 /**
548  * Set the INTERNAL bit for a specific PTE pointer.
549  *
550  * @param pvep A pointer to the current pv_entry mapping in the linked list of
551  *             mappings.
552  * @param idx Index of the chosen PTE pointer inside the PVE.
553  */
554 static inline void
pve_set_internal(pv_entry_t * pvep,unsigned idx)555 pve_set_internal(pv_entry_t *pvep, unsigned idx)
556 {
557 	assert(idx < PTE_PER_PVE);
558 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_INTERNAL);
559 }
560 
561 /**
562  * Clear the ALTACCT bit for a specific PTE pointer.
563  *
564  * @param pvep A pointer to the current pv_entry mapping in the linked list of
565  *             mappings.
566  * @param idx Index of the chosen PTE pointer inside the PVE.
567  */
568 static inline void
pve_clr_altacct(pv_entry_t * pvep,unsigned idx)569 pve_clr_altacct(pv_entry_t *pvep, unsigned idx)
570 {
571 	assert(idx < PTE_PER_PVE);
572 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_ALTACCT);
573 }
574 /**
575  * Clear the INTERNAL bit for a specific PTE pointer.
576  *
577  * @param pvep A pointer to the current pv_entry mapping in the linked list of
578  *             mappings.
579  * @param idx Index of the chosen PTE pointer inside the PVE.
580  */
581 static inline void
pve_clr_internal(pv_entry_t * pvep,unsigned idx)582 pve_clr_internal(pv_entry_t *pvep, unsigned idx)
583 {
584 	assert(idx < PTE_PER_PVE);
585 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_INTERNAL);
586 }
587 
588 /**
589  * Return the ALTACCT bit for a specific PTE pointer.
590  *
591  * @param pvep A pointer to the current pv_entry mapping in the linked list of
592  *             mappings.
593  * @param idx Index of the chosen PTE pointer inside the PVE.
594  */
595 static inline bool
pve_get_altacct(pv_entry_t * pvep,unsigned idx)596 pve_get_altacct(pv_entry_t *pvep, unsigned idx)
597 {
598 	assert(idx < PTE_PER_PVE);
599 	return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_ALTACCT;
600 }
601 /**
602  * Return the INTERNAL bit for a specific PTE pointer.
603  *
604  * @param pvep A pointer to the current pv_entry mapping in the linked list of
605  *             mappings.
606  * @param idx Index of the chosen PTE pointer inside the PVE.
607  */
608 static inline bool
pve_get_internal(pv_entry_t * pvep,unsigned idx)609 pve_get_internal(pv_entry_t *pvep, unsigned idx)
610 {
611 	assert(idx < PTE_PER_PVE);
612 	return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_INTERNAL;
613 }
614 
615 /**
616  * Return the next mapping (pv_entry) in a linked list of mappings. This applies
617  * to pv_head_table entries of type PVH_TYPE_PVEP.
618  *
619  * @param pvep A pointer to the current pv_entry mapping in the linked list of
620  *             mappings.
621  *
622  * @return The next virtual mapping for a physical page, or PV_ENTRY_NULL if the
623  *         end of the list has been reached.
624  */
625 static inline pv_entry_t *
pve_next(pv_entry_t * pvep)626 pve_next(pv_entry_t *pvep)
627 {
628 	return pvep->pve_next;
629 }
630 
631 /**
632  * Return a pointer to the pve_next field in a pv_entry. This value is used
633  * when adding and removing entries to a PVE list.
634  *
635  * @param pvep The pv_entry whose pve_next field is being accessed.
636  *
637  * @return Pointer to the pve_next field.
638  */
639 static inline pv_entry_t **
pve_next_ptr(pv_entry_t * pvep)640 pve_next_ptr(pv_entry_t *pvep)
641 {
642 	return &pvep->pve_next;
643 }
644 
645 /**
646  * Return a pointer to the page table entry for this mapping.
647  *
648  * @param pvep The pv_entry whose pve_ptep field is to be returned.
649  * @param idx Index of the chosen PTE pointer inside the PVE.
650  *
651  * @return Pointer to the page table entry.
652  */
653 static inline pt_entry_t *
pve_get_ptep(pv_entry_t * pvep,unsigned idx)654 pve_get_ptep(pv_entry_t *pvep, unsigned idx)
655 {
656 	assert(idx < PTE_PER_PVE);
657 	return (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_FLAGS);
658 }
659 
660 /**
661  * Update the page table entry for a specific physical to virtual mapping.
662  *
663  * @param pvep The pv_entry to update.
664  * @param idx Index of the chosen PTE pointer inside the PVE.
665  * @param ptep_new The new page table entry.
666  */
667 static inline void
pve_set_ptep(pv_entry_t * pvep,unsigned idx,pt_entry_t * ptep_new)668 pve_set_ptep(pv_entry_t *pvep, unsigned idx, pt_entry_t *ptep_new)
669 {
670 	assert(idx < PTE_PER_PVE);
671 	pvep->pve_ptep[idx] = ptep_new;
672 }
673 
674 /**
675  * Initialize all fields in a PVE to NULL.
676  *
677  * @param pvep The pv_entry to initialize.
678  */
679 static inline void
pve_init(pv_entry_t * pvep)680 pve_init(pv_entry_t *pvep)
681 {
682 	pvep->pve_next = PV_ENTRY_NULL;
683 	for (int i = 0; i < PTE_PER_PVE; i++) {
684 		pvep->pve_ptep[i] = PT_ENTRY_NULL;
685 	}
686 }
687 
688 /**
689  * Find PTE pointer in PVE and return its index.
690  *
691  * @param pvep The PVE to search.
692  * @param ptep PTE to search for.
693  *
694  * @return Index of the found entry, or -1 if no entry exists.
695  */
696 static inline int
pve_find_ptep_index(pv_entry_t * pvep,pt_entry_t * ptep)697 pve_find_ptep_index(pv_entry_t *pvep, pt_entry_t *ptep)
698 {
699 	for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
700 		if (pve_get_ptep(pvep, i) == ptep) {
701 			return (int)i;
702 		}
703 	}
704 
705 	return -1;
706 }
707 
708 /**
709  * Checks if no PTEs are currently associated with this PVE.
710  *
711  * @param pvep The PVE to search.
712  *
713  * @return True if no PTEs are currently associated with this PVE, or false.
714  */
715 static inline bool
pve_is_empty(pv_entry_t * pvep)716 pve_is_empty(pv_entry_t *pvep)
717 {
718 	for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
719 		if (pve_get_ptep(pvep, i) != PT_ENTRY_NULL) {
720 			return false;
721 		}
722 	}
723 
724 	return true;
725 }
726 
727 /**
728  * Prepend a new pv_entry node to a PVE list.
729  *
730  * @note This function will clobber any existing flags stored in the PVH
731  *       pointer. It's up to the caller to preserve flags if that functionality
732  *       is needed (either by ensuring `pvep` contains those flags, or by
733  *       manually setting the flags after this call).
734  *
735  * @param pvh The linked list of mappings to update.
736  * @param pvep The new mapping to add to the linked list.
737  */
738 static inline void
pve_add(pv_entry_t ** pvh,pv_entry_t * pvep)739 pve_add(pv_entry_t **pvh, pv_entry_t *pvep)
740 {
741 	assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
742 
743 	pvep->pve_next = pvh_pve_list(pvh);
744 	pvh_update_head(pvh, pvep, PVH_TYPE_PVEP);
745 }
746 
747 /**
748  * Remove an entry from a PVE list of mappings.
749  *
750  * @note This function will clobber any existing flags stored in the PVH
751  *       pointer. It's up to the caller to preserve flags if that functionality
752  *       is needed.
753  *
754  * @param pvh The pv_head_table entry of the PVE list to remove a mapping from.
755  *            This is the first entry in the list of pv_entry_t mappings.
756  * @param pvepp A pointer to the pv_entry_t* that's being removed. If this entry
757  *              is the first in the linked list of mappings, then this should be
758  *              identical to the pv_head_table entry. If the mapping isn't the
759  *              first, then this is a pointer to the pve_next field in the
760  *              previous mapping.
761  * @param pvep The entry that should be removed. Should be identical to a
762  *             dereference of the pvepp parameter (unless it's the pv_head_table
763  *             entry).
764  */
765 static inline void
pve_remove(pv_entry_t ** pvh,pv_entry_t ** pvepp,pv_entry_t * pvep)766 pve_remove(pv_entry_t **pvh, pv_entry_t **pvepp, pv_entry_t *pvep)
767 {
768 	assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
769 
770 	if (pvepp == pvh) {
771 		if (pve_next(pvep) == PV_ENTRY_NULL) {
772 			/* The last mapping to this page is being removed. */
773 			pvh_update_head(pvh, PV_ENTRY_NULL, PVH_TYPE_NULL);
774 		} else {
775 			/**
776 			 * There are still mappings left, make the next one the new head of
777 			 * the list. This effectively removes the first entry from the list.
778 			 */
779 			pvh_update_head(pvh, pve_next(pvep), PVH_TYPE_PVEP);
780 		}
781 	} else {
782 		/**
783 		 * Move the previous entry's next field to the entry after the one being
784 		 * removed. This will clobber the ALTACCT and INTERNAL bits.
785 		 */
786 		*pvepp = pve_next(pvep);
787 	}
788 }
789 
790 /**
791  * PVH_TYPE_PTDP Types and Helper Functions.
792  *
793  * The following are types and methods used to manipulate page table descriptor
794  * (PTD) objects. This is the type of pv_head_table entry used when a page is
795  * being used as a page table.
796  */
797 
798 /**
799  * When the pmap layer allocates memory, it always does so in chunks of the VM
800  * page size (which are represented by the PAGE_SIZE/PAGE_SHIFT macros). The VM
801  * page size might not match up with the hardware page size for a given address
802  * space (this is especially true on systems that support more than one page
803  * size).
804  *
805  * The pv_head_table is allocated to have one entry per VM page, not hardware
806  * page (which can change depending on the address space). Because of that, a
807  * single VM-page-sized region (single pv_head_table entry) can potentially hold
808  * up to four page tables. Only one page table descriptor (PTD) is allocated per
809  * pv_head_table entry (per VM page), so on some systems, one PTD might have to
810  * keep track of up to four different page tables.
811  */
812 
813 #if __ARM_MIXED_PAGE_SIZE__
814 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
815 #elif (ARM_PGSHIFT == 14)
816 #define PT_INDEX_MAX 1
817 #elif (ARM_PGSHIFT == 12)
818 #define PT_INDEX_MAX 4
819 #else
820 #error Unsupported ARM_PGSHIFT
821 #endif /* __ARM_MIXED_PAGE_SIZE__ || ARM_PGSHIFT == 14 || ARM_PGSHIFT == 12 */
822 
823 
824 /**
825  * Page table descriptor (PTD) info structure.
826  *
827  * Contains information about a page table. These pieces of data are separate
828  * from the PTD itself because in address spaces where the VM page size doesn't
829  * match the underlying hardware page size, one PTD could represent multiple
830  * page tables (and so will need multiple PTD info structures).
831  *
832  * These fields are also in their own struct so that they can be allocated
833  * separately from the associated pt_desc_t object. This allows us to allocate
834  * the counts in this structure in a way that ensures they don't fall within the
835  * same cache line as the main pt_desc_t object. This is important because the
836  * fields in this structure are atomically updated which could cause false
837  * sharing cache performance issues with the "va" field in pt_desc_t if all of
838  * the fields were within the same structure.
839  */
840 typedef struct {
841 	/**
842 	 * Pre-defined sentinel values for ptd_info_t.refcnt. If these refcnt values
843 	 * change, make sure to update the showpte LLDB macro to reflect the
844 	 * changes.
845 	 */
846 	#define PT_DESC_REFCOUNT                0x4000U
847 	#define PT_DESC_IOMMU_GRANTED_REFCOUNT  0x8000U
848 	#define PT_DESC_IOMMU_ACCEPTED_REFCOUNT 0x8001U
849 
850 	/*
851 	 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT.
852 	 * For leaf pagetables, should reflect the number of non-empty PTEs.
853 	 * For IOMMU pages, should always be either PT_DESC_IOMMU_GRANTED_REFCOUNT
854 	 * or PT_DESC_IOMMU_ACCEPTED_REFCOUNT.
855 	 */
856 	unsigned short refcnt;
857 
858 	/*
859 	 * For non-leaf pagetables, should be 0.
860 	 * For leaf pagetables, should reflect the number of wired entries.
861 	 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU
862 	 * operations are implicitly wired).
863 	 */
864 	unsigned short wiredcnt;
865 } ptd_info_t;
866 
867 /**
868  * Page Table Descriptor (PTD).
869  *
870  * Provides a per-table data structure and a way of keeping track of all page
871  * tables in the system.
872  *
873  * This structure is also used as a convenient way of keeping track of IOMMU
874  * pages (which may or may not be used as page tables). In that case the "iommu"
875  * field will point to the owner of the page, ptd_info[0].refcnt will be
876  * PT_DESC_IOMMU_GRANTED_REFCOUNT or PT_DESC_IOMMU_ACCEPTED_REFCOUNT, and
877  * ptd_info[0].wiredcnt can be used as an arbitrary refcnt controlled by the
878  * IOMMU driver.
879  */
880 typedef struct pt_desc {
881 	/**
882 	 * This queue chain provides a mechanism for keeping a list of pages
883 	 * being used as page tables. This is used to potentially reclaim userspace
884 	 * page tables as a fast way of "allocating" a page.
885 	 *
886 	 * Refer to osfmk/kern/queue.h for more information about queue chains.
887 	 */
888 	queue_chain_t pt_page;
889 
890 	/* Each page table is either owned by a pmap or a specific IOMMU. */
891 	union {
892 		struct pmap *pmap;
893 	};
894 
895 	/**
896 	 * The following fields contain per-page-table properties, and as such,
897 	 * might have multiple elements each. This is due to a single PTD
898 	 * potentially representing multiple page tables (in address spaces where
899 	 * the VM page size differs from the hardware page size). Use the
900 	 * ptd_get_index() function to get the correct index for a specific page
901 	 * table.
902 	 */
903 
904 	/**
905 	 * The first address of the virtual address space this page table is
906 	 * translating for, or a value set by an IOMMU driver if this PTD is being
907 	 * used to track an IOMMU page.
908 	 */
909 	vm_offset_t va[PT_INDEX_MAX];
910 
911 	/**
912 	 * ptd_info_t's are allocated separately so as to reduce false sharing
913 	 * with the va field. This is desirable because ptd_info_t's are updated
914 	 * atomically from all CPUs.
915 	 */
916 	ptd_info_t *ptd_info;
917 } pt_desc_t;
918 
919 /**
920  * Convert a pv_head_table entry/pointer into a page table descriptor pointer.
921  * This should only be done if the type of this entry is PVH_TYPE_PTDP.
922  *
923  * @param pvh The pv_head_table entry/pointer to convert into a safe to
924  *            dereference pt_desc_t*.
925  *
926  * @return Return back a safe to derefence pointer to the page table descriptor
927  *         for this physical page by masking off the TYPE bits and adding any
928  *         missing flags to the upper portion of the pointer.
929  */
930 static inline pt_desc_t*
pvh_ptd(pv_entry_t ** pvh)931 pvh_ptd(pv_entry_t **pvh)
932 {
933 	return (pt_desc_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
934 }
935 
936 /**
937  * Given an arbitrary page table entry, return back the page table descriptor
938  * (PTD) object for the page table that contains that entry.
939  *
940  * @param ptep Pointer to a PTE whose page table descriptor object to return.
941  *
942  * @return The PTD object for the passed in page table.
943  */
944 static inline pt_desc_t *
ptep_get_ptd(const pt_entry_t * ptep)945 ptep_get_ptd(const pt_entry_t *ptep)
946 {
947 	assert(ptep != NULL);
948 
949 	const vm_offset_t pt_base_va = (vm_offset_t)ptep;
950 	pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop(pt_base_va)));
951 
952 	if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
953 		panic("%s: invalid PV head 0x%llx for PTE %p", __func__, (uint64_t)(*pvh), ptep);
954 	}
955 
956 	return pvh_ptd(pvh);
957 }
958 
959 /**
960  * Given an arbitrary page table entry, return back the pmap that owns that
961  * page table.
962  *
963  * @note This won't work correctly for page tables owned by IOMMUs, because
964  *       those table aren't owned by any specific pmap.
965  *
966  * @param ptep Pointer to a page table entry whose owner we're trying to return.
967  *
968  * @return The pmap that owns the given page table entry.
969  */
970 static inline struct pmap *
ptep_get_pmap(const pt_entry_t * ptep)971 ptep_get_pmap(const pt_entry_t *ptep)
972 {
973 	return ptep_get_ptd(ptep)->pmap;
974 }
975 
976 
977 /**
978  * Given an arbitrary translation table entry, get the page table descriptor
979  * (PTD) object for the page table pointed to by the TTE.
980  *
981  * @param tte The translation table entry to parse. For instance, if this is an
982  *            L2 TTE, then the PTD for the L3 table this entry points to will be
983  *            returned.
984  *
985  * @return The page table descriptor (PTD) for the page table pointed to by this
986  *         TTE.
987  */
988 static inline pt_desc_t *
tte_get_ptd(const tt_entry_t tte)989 tte_get_ptd(const tt_entry_t tte)
990 {
991 	const vm_offset_t pt_base_va = (vm_offset_t)(tte & ~((tt_entry_t)PAGE_MASK));
992 	pv_entry_t **pvh = pai_to_pvh(pa_index(pt_base_va));
993 
994 	if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
995 		panic("%s: invalid PV head 0x%llx for TTE 0x%llx", __func__, (uint64_t)(*pvh), (uint64_t)tte);
996 	}
997 
998 	return pvh_ptd(pvh);
999 }
1000 
1001 /**
1002  * In address spaces where the VM page size doesn't match the underlying
1003  * hardware page size, one PTD could represent multiple page tables. This
1004  * function returns the correct index value depending on which page table is
1005  * being accessed. That index value can then be used to access the
1006  * per-page-table properties stored within a PTD.
1007  *
1008  * @note See the description above the PT_INDEX_MAX definition for a more
1009  *       detailed explanation of why multiple page tables can be represented
1010  *       by a single PTD object in the pv_head_table.
1011  *
1012  * @param ptd The page table descriptor that's being accessed.
1013  * @param ttep Pointer to the translation table entry that's being accessed.
1014  *
1015  * @return The correct index value for a specific, hardware-sized page
1016  *         table.
1017  */
1018 static inline unsigned
ptd_get_index(__unused const pt_desc_t * ptd,__unused const tt_entry_t * ttep)1019 ptd_get_index(__unused const pt_desc_t *ptd, __unused const tt_entry_t *ttep)
1020 {
1021 #if PT_INDEX_MAX == 1
1022 	return 0;
1023 #else
1024 	assert(ptd != NULL);
1025 
1026 	const uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1027 	const vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1028 
1029 	/**
1030 	 * Use the difference between the VM page shift and the hardware page shift
1031 	 * to get the index of the correct page table. In practice, this equates to
1032 	 * masking out the bottom two bits of the L3 table index in address spaces
1033 	 * where the VM page size is greater than the hardware page size. In address
1034 	 * spaces where they're identical, the index will always be zero.
1035 	 */
1036 	const unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1037 	assert(ttep_index < PT_INDEX_MAX);
1038 
1039 	return ttep_index;
1040 #endif
1041 }
1042 
1043 /**
1044  * In address spaces where the VM page size doesn't match the underlying
1045  * hardware page size, one PTD could represent multiple page tables. This
1046  * function returns the correct ptd_info_t structure depending on which page
1047  * table is being accessed.
1048  *
1049  * @note See the description above the PT_INDEX_MAX definition for a more
1050  *       detailed explanation of why multiple page tables can be represented
1051  *       by a single PTD object in the pv_head_table.
1052  *
1053  * @param ptd The page table descriptor that's being accessed.
1054  * @param ttep Pointer to the translation table entry that's being accessed.
1055  *
1056  * @return The correct ptd_info_t structure for a specific, hardware-sized page
1057  *         table.
1058  */
1059 static inline ptd_info_t *
ptd_get_info(pt_desc_t * ptd,const tt_entry_t * ttep)1060 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1061 {
1062 	assert((ptd != NULL) && (ptd->ptd_info[0].refcnt < PT_DESC_IOMMU_GRANTED_REFCOUNT));
1063 
1064 	return &ptd->ptd_info[ptd_get_index(ptd, ttep)];
1065 }
1066 
1067 /**
1068  * Given a pointer to a page table entry, return back the ptd_info structure
1069  * for the page table that contains that entry.
1070  *
1071  * @param ptep Pointer to a PTE whose ptd_info object to return.
1072  *
1073  * @return The ptd_info object for the page table that contains the passed in
1074  *         page table entry.
1075  */
1076 static inline ptd_info_t *
ptep_get_info(const pt_entry_t * ptep)1077 ptep_get_info(const pt_entry_t *ptep)
1078 {
1079 	return ptd_get_info(ptep_get_ptd(ptep), ptep);
1080 }
1081 
1082 /**
1083  * Return the virtual address mapped by the passed in leaf page table entry,
1084  * using an already-retrieved pagetable descriptor.
1085  *
1086  * @param ptdp pointer to the descriptor for the pagetable containing ptep
1087  * @param ptep Pointer to a PTE to parse
1088  */
1089 static inline vm_map_address_t
ptd_get_va(const pt_desc_t * ptdp,const pt_entry_t * ptep)1090 ptd_get_va(const pt_desc_t *ptdp, const pt_entry_t *ptep)
1091 {
1092 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptdp->pmap);
1093 
1094 	vm_map_address_t va = ptdp->va[ptd_get_index(ptdp, ptep)];
1095 	vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1096 
1097 	va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1098 
1099 	return va;
1100 }
1101 
1102 /**
1103  * Return the virtual address that is being mapped by the passed in leaf page
1104  * table entry.
1105  *
1106  * @param ptep Pointer to a PTE to parse.
1107  */
1108 static inline vm_map_address_t
ptep_get_va(const pt_entry_t * ptep)1109 ptep_get_va(const pt_entry_t *ptep)
1110 {
1111 	return ptd_get_va(ptep_get_ptd(ptep), ptep);
1112 }
1113 
1114 /**
1115  * Physical Page Attribute Table (pp_attr_table) defines and helper functions.
1116  */
1117 
1118 /* How many bits to use for flags on a per-VM-page basis. */
1119 typedef uint16_t pp_attr_t;
1120 
1121 /* See the definition of pp_attr_table for more information. */
1122 extern volatile pp_attr_t* pp_attr_table;
1123 
1124 /**
1125  * Flags stored in the pp_attr_table on a per-physical-page basis.
1126  *
1127  * Please update the pv_walk LLDB macro if these flags are changed or added to.
1128  */
1129 
1130 /**
1131  * The bottom 6-bits are used to store the default WIMG (cacheability and memory
1132  * type) setting for this physical page. This can be changed by calling
1133  * pmap_set_cache_attributes().
1134  *
1135  * If a default WIMG setting isn't set for a page, then the default is Normal,
1136  * Cached memory (VM_WIMG_DEFAULT).
1137  */
1138 #define PP_ATTR_WIMG_MASK 0x003F
1139 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1140 
1141 /**
1142  * The reference and modify bits keep track of whether a page has been accessed
1143  * or modified since the last time the bits were cleared. These bits are used to
1144  * enforce policy decisions in the VM layer.
1145  */
1146 #define PP_ATTR_REFERENCED 0x0040
1147 #define PP_ATTR_MODIFIED   0x0080
1148 
1149 /**
1150  * This physical page is being used as anonymous memory that's internally
1151  * managed by the VM and is not connected to an external pager. This flag is
1152  * only set/cleared on the first CPU mapping of a page (see PVH_FLAG_CPU). Any
1153  * subsequent mappings won't set/clear this flag until all mappings are removed
1154  * and a new CPU mapping is added.
1155  */
1156 #define PP_ATTR_INTERNAL 0x0100
1157 
1158 /**
1159  * This flag is used to keep track of pages that are still resident but are not
1160  * considered dirty and can be reclaimed under memory pressure. These pages do
1161  * not count as a part of the memory footprint, so the footprint ledger does not
1162  * need to be updated for these pages. This is hinted to the VM by the
1163  * `madvise(MADV_FREE_REUSABLE)` system call.
1164  */
1165 #define PP_ATTR_REUSABLE 0x0200
1166 
1167 /**
1168  * This flag denotes that a page is utilizing "alternate accounting". This means
1169  * that the pmap doesn't need to keep track of these pages with regards to the
1170  * footprint ledger because the VM is already accounting for them in a different
1171  * way. These include IOKit mappings (VM adds their entire virtual size to the
1172  * footprint), and purgeable pages (VM counts them only when non-volatile and
1173  * only for one "owner"), among others.
1174  *
1175  * Note that alternate accounting status is tracked on a per-mapping basis (not
1176  * per-page). Because of that the ALTACCT flag in the pp_attr_table is only used
1177  * when there's a single mapping to a page. When there are multiple mappings,
1178  * the status of this flag is tracked in the pv_head_table (see PVE_PTEP_ALTACCT
1179  * above).
1180  */
1181 #define PP_ATTR_ALTACCT 0x0400
1182 
1183 /**
1184  * This bit was originally used on x86 to keep track of what pages to not
1185  * encrypt during the hibernation process as a performance optimization when
1186  * encryption was done in software. This doesn't apply to the ARM
1187  * hibernation process because all pages are automatically encrypted using
1188  * hardware acceleration. Despite that, the pmap still keeps track of this flag
1189  * as a debugging aid on internal builds.
1190  *
1191  * TODO: This bit can probably be reclaimed:
1192  * rdar://70740650 (PMAP Cleanup: Potentially reclaim the PP_ATTR_NOENCRYPT bit on ARM)
1193  */
1194 #define PP_ATTR_NOENCRYPT 0x0800
1195 
1196 /**
1197  * These bits denote that a physical page is expecting the next access or
1198  * modification to set the PP_ATTR_REFERENCED and PP_ATTR_MODIFIED flags
1199  * respectively.
1200  */
1201 #define PP_ATTR_REFFAULT 0x1000
1202 #define PP_ATTR_MODFAULT 0x2000
1203 
1204 #if XNU_MONITOR
1205 /**
1206  * Denotes that a page is owned by the PPL. This is modified/checked with the
1207  * PVH lock held, to avoid ownership related races. This does not need to be a
1208  * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1209  * put the bit.
1210  */
1211 #define PP_ATTR_MONITOR 0x4000
1212 
1213 /**
1214  * Denotes that a page *cannot* be owned by the PPL. This is required in order
1215  * to temporarily 'pin' kernel pages that are used to store PPL output
1216  * parameters. Otherwise a malicious or buggy caller could pass PPL-owned memory
1217  * for these parameters and in so doing stage a write gadget against the PPL.
1218  */
1219 #define PP_ATTR_NO_MONITOR 0x8000
1220 
1221 /**
1222  * All of the bits owned by the PPL; kernel requests to set or clear these bits
1223  * are illegal.
1224  */
1225 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1226 #endif /* XNU_MONITOR */
1227 
1228 /**
1229  * Atomically set some flags in a pp_attr_table entry.
1230  *
1231  * @param pai The physical address index for the entry to update.
1232  * @param bits The flags to set in the entry.
1233  */
1234 static inline void
ppattr_set_bits(unsigned int pai,pp_attr_t bits)1235 ppattr_set_bits(unsigned int pai, pp_attr_t bits)
1236 {
1237 	volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1238 	os_atomic_or(ppattr, bits, acq_rel);
1239 }
1240 
1241 /**
1242  * Atomically clear some flags in a pp_attr_table entry.
1243  *
1244  * @param pai The physical address index for the entry to update.
1245  * @param bits The flags to clear in the entry.
1246  */
1247 static inline void
ppattr_clear_bits(unsigned int pai,pp_attr_t bits)1248 ppattr_clear_bits(unsigned int pai, pp_attr_t bits)
1249 {
1250 	volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1251 	os_atomic_andnot(ppattr, bits, acq_rel);
1252 }
1253 
1254 /**
1255  * Return true if the pp_attr_table entry contains the passed in bits.
1256  *
1257  * @param pai The physical address index for the entry to test.
1258  * @param bits The flags to check for.
1259  */
1260 static inline bool
ppattr_test_bits(unsigned int pai,pp_attr_t bits)1261 ppattr_test_bits(unsigned int pai, pp_attr_t bits)
1262 {
1263 	const volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1264 	return (*ppattr & bits) == bits;
1265 }
1266 
1267 /**
1268  * Only set some flags in a pp_attr_table entry if the passed in physical
1269  * address is a kernel-managed address.
1270  *
1271  * @param pa The physical address for the entry to update.
1272  * @param bits The flags to set in the entry.
1273  */
1274 static inline void
ppattr_pa_set_bits(pmap_paddr_t pa,pp_attr_t bits)1275 ppattr_pa_set_bits(pmap_paddr_t pa, pp_attr_t bits)
1276 {
1277 	if (pa_valid(pa)) {
1278 		ppattr_set_bits(pa_index(pa), bits);
1279 	}
1280 }
1281 
1282 /**
1283  * Only clear some flags in a pp_attr_table entry if the passed in physical
1284  * address is a kernel-managed address.
1285  *
1286  * @param pa The physical address for the entry to update.
1287  * @param bits The flags to clear in the entry.
1288  */
1289 static inline void
ppattr_pa_clear_bits(pmap_paddr_t pa,pp_attr_t bits)1290 ppattr_pa_clear_bits(pmap_paddr_t pa, pp_attr_t bits)
1291 {
1292 	if (pa_valid(pa)) {
1293 		ppattr_clear_bits(pa_index(pa), bits);
1294 	}
1295 }
1296 
1297 /**
1298  * Only test flags in a pp_attr_table entry if the passed in physical address
1299  * is a kernel-managed page.
1300  *
1301  * @param pa The physical address for the entry to test.
1302  * @param bits The flags to check for.
1303  *
1304  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1305  *         depending on whether the bits are set.
1306  */
1307 static inline bool
ppattr_pa_test_bits(pmap_paddr_t pa,pp_attr_t bits)1308 ppattr_pa_test_bits(pmap_paddr_t pa, pp_attr_t bits)
1309 {
1310 	return pa_valid(pa) ? ppattr_test_bits(pa_index(pa), bits) : false;
1311 }
1312 
1313 /**
1314  * Set the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the passed
1315  * in physical address is a kernel-managed page.
1316  *
1317  * @param pa The physical address for the entry to update.
1318  */
1319 static inline void
ppattr_pa_set_modify(pmap_paddr_t pa)1320 ppattr_pa_set_modify(pmap_paddr_t pa)
1321 {
1322 	ppattr_pa_set_bits(pa, PP_ATTR_MODIFIED);
1323 }
1324 
1325 /**
1326  * Clear the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the
1327  * passed in physical address is a kernel-managed page.
1328  *
1329  * @param pa The physical address for the entry to update.
1330  */
1331 static inline void
ppattr_pa_clear_modify(pmap_paddr_t pa)1332 ppattr_pa_clear_modify(pmap_paddr_t pa)
1333 {
1334 	ppattr_pa_clear_bits(pa, PP_ATTR_MODIFIED);
1335 }
1336 
1337 /**
1338  * Set the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1339  * passed in physical address is a kernel-managed page.
1340  *
1341  * @param pa The physical address for the entry to update.
1342  */
1343 static inline void
ppattr_pa_set_reference(pmap_paddr_t pa)1344 ppattr_pa_set_reference(pmap_paddr_t pa)
1345 {
1346 	ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
1347 }
1348 
1349 /**
1350  * Clear the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1351  * passed in physical address is a kernel-managed page.
1352  *
1353  * @param pa The physical address for the entry to update.
1354  */
1355 static inline void
ppattr_pa_clear_reference(pmap_paddr_t pa)1356 ppattr_pa_clear_reference(pmap_paddr_t pa)
1357 {
1358 	ppattr_pa_clear_bits(pa, PP_ATTR_REFERENCED);
1359 }
1360 
1361 #if XNU_MONITOR
1362 
1363 /**
1364  * Set the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the passed
1365  * in physical address is a kernel-managed page.
1366  *
1367  * @param pa The physical address for the entry to update.
1368  */
1369 static inline void
ppattr_pa_set_monitor(pmap_paddr_t pa)1370 ppattr_pa_set_monitor(pmap_paddr_t pa)
1371 {
1372 	ppattr_pa_set_bits(pa, PP_ATTR_MONITOR);
1373 }
1374 
1375 /**
1376  * Clear the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the
1377  * passed in physical address is a kernel-managed page.
1378  *
1379  * @param pa The physical address for the entry to update.
1380  */
1381 static inline void
ppattr_pa_clear_monitor(pmap_paddr_t pa)1382 ppattr_pa_clear_monitor(pmap_paddr_t pa)
1383 {
1384 	ppattr_pa_clear_bits(pa, PP_ATTR_MONITOR);
1385 }
1386 
1387 /**
1388  * Only test for the PP_ATTR_MONITOR flag in a pp_attr_table entry if the passed
1389  * in physical address is a kernel-managed page.
1390  *
1391  * @param pa The physical address for the entry to test.
1392  *
1393  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1394  *         depending on whether the PP_ATTR_MONITOR is set.
1395  */
1396 static inline bool
ppattr_pa_test_monitor(pmap_paddr_t pa)1397 ppattr_pa_test_monitor(pmap_paddr_t pa)
1398 {
1399 	return ppattr_pa_test_bits(pa, PP_ATTR_MONITOR);
1400 }
1401 
1402 /**
1403  * Set the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1404  * passed in physical address is a kernel-managed page.
1405  *
1406  * @param pa The physical address for the entry to update.
1407  */
1408 static inline void
ppattr_pa_set_no_monitor(pmap_paddr_t pa)1409 ppattr_pa_set_no_monitor(pmap_paddr_t pa)
1410 {
1411 	ppattr_pa_set_bits(pa, PP_ATTR_NO_MONITOR);
1412 }
1413 
1414 /**
1415  * Clear the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1416  * passed in physical address is a kernel-managed page.
1417  *
1418  * @param pa The physical address for the entry to update.
1419  */
1420 static inline void
ppattr_pa_clear_no_monitor(pmap_paddr_t pa)1421 ppattr_pa_clear_no_monitor(pmap_paddr_t pa)
1422 {
1423 	ppattr_pa_clear_bits(pa, PP_ATTR_NO_MONITOR);
1424 }
1425 
1426 /**
1427  * Only test for the PP_ATTR_NO_MONITOR flag in a pp_attr_table entry if the
1428  * passed in physical address is a kernel-managed page.
1429  *
1430  * @param pa The physical address for the entry to test.
1431  *
1432  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1433  *         depending on whether the PP_ATTR_NO_MONITOR is set.
1434  */
1435 static inline bool
ppattr_pa_test_no_monitor(pmap_paddr_t pa)1436 ppattr_pa_test_no_monitor(pmap_paddr_t pa)
1437 {
1438 	return ppattr_pa_test_bits(pa, PP_ATTR_NO_MONITOR);
1439 }
1440 
1441 #endif /* XNU_MONITOR */
1442 
1443 /**
1444  * Set the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1445  *
1446  * @param pai The physical address index for the entry to update.
1447  */
1448 static inline void
ppattr_set_internal(unsigned int pai)1449 ppattr_set_internal(unsigned int pai)
1450 {
1451 	ppattr_set_bits(pai, PP_ATTR_INTERNAL);
1452 }
1453 
1454 /**
1455  * Clear the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1456  *
1457  * @param pai The physical address index for the entry to update.
1458  */
1459 static inline void
ppattr_clear_internal(unsigned int pai)1460 ppattr_clear_internal(unsigned int pai)
1461 {
1462 	ppattr_clear_bits(pai, PP_ATTR_INTERNAL);
1463 }
1464 
1465 /**
1466  * Return true if the pp_attr_table entry has the PP_ATTR_INTERNAL flag set.
1467  *
1468  * @param pai The physical address index for the entry to test.
1469  */
1470 static inline bool
ppattr_test_internal(unsigned int pai)1471 ppattr_test_internal(unsigned int pai)
1472 {
1473 	return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1474 }
1475 
1476 /**
1477  * Set the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1478  *
1479  * @param pai The physical address index for the entry to update.
1480  */
1481 static inline void
ppattr_set_reusable(unsigned int pai)1482 ppattr_set_reusable(unsigned int pai)
1483 {
1484 	ppattr_set_bits(pai, PP_ATTR_REUSABLE);
1485 }
1486 
1487 /**
1488  * Clear the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1489  *
1490  * @param pai The physical address index for the entry to update.
1491  */
1492 static inline void
ppattr_clear_reusable(unsigned int pai)1493 ppattr_clear_reusable(unsigned int pai)
1494 {
1495 	ppattr_clear_bits(pai, PP_ATTR_REUSABLE);
1496 }
1497 
1498 /**
1499  * Return true if the pp_attr_table entry has the PP_ATTR_REUSABLE flag set.
1500  *
1501  * @param pai The physical address index for the entry to test.
1502  */
1503 static inline bool
ppattr_test_reusable(unsigned int pai)1504 ppattr_test_reusable(unsigned int pai)
1505 {
1506 	return ppattr_test_bits(pai, PP_ATTR_REUSABLE);
1507 }
1508 
1509 /**
1510  * Set the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1511  *
1512  * @note This is only valid when the ALTACCT flag is being tracked using the
1513  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1514  *       PP_ATTR_ALTACCT definitions for more information.
1515  *
1516  * @param pai The physical address index for the entry to update.
1517  */
1518 static inline void
ppattr_set_altacct(unsigned int pai)1519 ppattr_set_altacct(unsigned int pai)
1520 {
1521 	ppattr_set_bits(pai, PP_ATTR_ALTACCT);
1522 }
1523 
1524 /**
1525  * Clear the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1526  *
1527  * @note This is only valid when the ALTACCT flag is being tracked using the
1528  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1529  *       PP_ATTR_ALTACCT definitions for more information.
1530  *
1531  * @param pai The physical address index for the entry to update.
1532  */
1533 static inline void
ppattr_clear_altacct(unsigned int pai)1534 ppattr_clear_altacct(unsigned int pai)
1535 {
1536 	ppattr_clear_bits(pai, PP_ATTR_ALTACCT);
1537 }
1538 
1539 /**
1540  * Get the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1541  *
1542  * @note This is only valid when the ALTACCT flag is being tracked using the
1543  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1544  *       PP_ATTR_ALTACCT definitions for more information.
1545  *
1546  * @param pai The physical address index for the entry to test.
1547  *
1548  * @return True if the passed in page uses alternate accounting, false
1549  *         otherwise.
1550  */
1551 static inline bool
ppattr_is_altacct(unsigned int pai)1552 ppattr_is_altacct(unsigned int pai)
1553 {
1554 	return ppattr_test_bits(pai, PP_ATTR_ALTACCT);
1555 }
1556 /**
1557  * Get the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1558  *
1559  * @note This is only valid when the INTERNAL flag is being tracked using the
1560  *       pp_attr_table. See the descriptions above the PVE_PTEP_INTERNAL and
1561  *       PP_ATTR_INTERNAL definitions for more information.
1562  *
1563  * @param pai The physical address index for the entry to test.
1564  *
1565  * @return True if the passed in page is accounted for as "internal", false
1566  *         otherwise.
1567  */
1568 static inline bool
ppattr_is_internal(unsigned int pai)1569 ppattr_is_internal(unsigned int pai)
1570 {
1571 	return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1572 }
1573 
1574 /**
1575  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1576  * depending on whether there are one or multiple mappings to a page. This
1577  * function abstracts out the difference between single and multiple mappings to
1578  * a page and provides a single function for determining whether alternate
1579  * accounting is set for a mapping.
1580  *
1581  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1582  *       definitions for more information.
1583  *
1584  * @param pai The physical address index for the entry to test.
1585  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1586  * @param idx Index of the chosen PTE pointer inside the PVE.
1587  *
1588  * @return True if the passed in page uses alternate accounting, false
1589  *         otherwise.
1590  */
1591 static inline bool
ppattr_pve_is_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1592 ppattr_pve_is_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1593 {
1594 	return (pvep == PV_ENTRY_NULL) ? ppattr_is_altacct(pai) : pve_get_altacct(pvep, idx);
1595 }
1596 /**
1597  * The "internal" (INTERNAL) status for a page is tracked differently
1598  * depending on whether there are one or multiple mappings to a page. This
1599  * function abstracts out the difference between single and multiple mappings to
1600  * a page and provides a single function for determining whether "internal"
1601  * is set for a mapping.
1602  *
1603  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1604  *       definitions for more information.
1605  *
1606  * @param pai The physical address index for the entry to test.
1607  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1608  * @param idx Index of the chosen PTE pointer inside the PVE.
1609  *
1610  * @return True if the passed in page is "internal", false otherwise.
1611  */
1612 static inline bool
ppattr_pve_is_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1613 ppattr_pve_is_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1614 {
1615 	return (pvep == PV_ENTRY_NULL) ? ppattr_is_internal(pai) : pve_get_internal(pvep, idx);
1616 }
1617 
1618 /**
1619  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1620  * depending on whether there are one or multiple mappings to a page. This
1621  * function abstracts out the difference between single and multiple mappings to
1622  * a page and provides a single function for setting the alternate accounting status
1623  * for a mapping.
1624  *
1625  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1626  *       definitions for more information.
1627  *
1628  * @param pai The physical address index for the entry to update.
1629  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1630  * @param idx Index of the chosen PTE pointer inside the PVE.
1631  */
1632 static inline void
ppattr_pve_set_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1633 ppattr_pve_set_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1634 {
1635 	if (pvep == PV_ENTRY_NULL) {
1636 		ppattr_set_altacct(pai);
1637 	} else {
1638 		pve_set_altacct(pvep, idx);
1639 	}
1640 }
1641 /**
1642  * The "internal" (INTERNAL) status for a page is tracked differently
1643  * depending on whether there are one or multiple mappings to a page. This
1644  * function abstracts out the difference between single and multiple mappings to
1645  * a page and provides a single function for setting the "internal" status
1646  * for a mapping.
1647  *
1648  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1649  *       definitions for more information.
1650  *
1651  * @param pai The physical address index for the entry to update.
1652  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1653  * @param idx Index of the chosen PTE pointer inside the PVE.
1654  */
1655 static inline void
ppattr_pve_set_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1656 ppattr_pve_set_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1657 {
1658 	if (pvep == PV_ENTRY_NULL) {
1659 		ppattr_set_internal(pai);
1660 	} else {
1661 		pve_set_internal(pvep, idx);
1662 	}
1663 }
1664 
1665 /**
1666  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1667  * depending on whether there are one or multiple mappings to a page. This
1668  * function abstracts out the difference between single and multiple mappings to
1669  * a page and provides a single function for clearing the alternate accounting status
1670  * for a mapping.
1671  *
1672  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1673  *       definitions for more information.
1674  *
1675  * @param pai The physical address index for the entry to update.
1676  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1677  * @param idx Index of the chosen PTE pointer inside the PVE.
1678  */
1679 static inline void
ppattr_pve_clr_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1680 ppattr_pve_clr_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1681 {
1682 	if (pvep == PV_ENTRY_NULL) {
1683 		ppattr_clear_altacct(pai);
1684 	} else {
1685 		pve_clr_altacct(pvep, idx);
1686 	}
1687 }
1688 /**
1689  * The "internal" (INTERNAL) status for a page is tracked differently
1690  * depending on whether there are one or multiple mappings to a page. This
1691  * function abstracts out the difference between single and multiple mappings to
1692  * a page and provides a single function for clearing the "internal" status
1693  * for a mapping.
1694  *
1695  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1696  *       definitions for more information.
1697  *
1698  * @param pai The physical address index for the entry to update.
1699  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1700  * @param idx Index of the chosen PTE pointer inside the PVE.
1701  */
1702 static inline void
ppattr_pve_clr_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1703 ppattr_pve_clr_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1704 {
1705 	if (pvep == PV_ENTRY_NULL) {
1706 		ppattr_clear_internal(pai);
1707 	} else {
1708 		pve_clr_internal(pvep, idx);
1709 	}
1710 }
1711 
1712 /**
1713  * Set the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1714  *
1715  * @param pai The physical address index for the entry to update.
1716  */
1717 static inline void
ppattr_set_reffault(unsigned int pai)1718 ppattr_set_reffault(unsigned int pai)
1719 {
1720 	ppattr_set_bits(pai, PP_ATTR_REFFAULT);
1721 }
1722 
1723 /**
1724  * Clear the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1725  *
1726  * @param pai The physical address index for the entry to update.
1727  */
1728 static inline void
ppattr_clear_reffault(unsigned int pai)1729 ppattr_clear_reffault(unsigned int pai)
1730 {
1731 	ppattr_clear_bits(pai, PP_ATTR_REFFAULT);
1732 }
1733 
1734 /**
1735  * Return true if the pp_attr_table entry has the PP_ATTR_REFFAULT flag set.
1736  *
1737  * @param pai The physical address index for the entry to test.
1738  */
1739 static inline bool
ppattr_test_reffault(unsigned int pai)1740 ppattr_test_reffault(unsigned int pai)
1741 {
1742 	return ppattr_test_bits(pai, PP_ATTR_REFFAULT);
1743 }
1744 
1745 /**
1746  * Set the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1747  *
1748  * @param pai The physical address index for the entry to update.
1749  */
1750 static inline void
ppattr_set_modfault(unsigned int pai)1751 ppattr_set_modfault(unsigned int pai)
1752 {
1753 	ppattr_set_bits(pai, PP_ATTR_MODFAULT);
1754 }
1755 
1756 /**
1757  * Clear the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1758  *
1759  * @param pai The physical address index for the entry to update.
1760  */
1761 static inline void
ppattr_clear_modfault(unsigned int pai)1762 ppattr_clear_modfault(unsigned int pai)
1763 {
1764 	ppattr_clear_bits(pai, PP_ATTR_MODFAULT);
1765 }
1766 
1767 /**
1768  * Return true if the pp_attr_table entry has the PP_ATTR_MODFAULT flag set.
1769  *
1770  * @param pai The physical address index for the entry to test.
1771  */
1772 static inline bool
ppattr_test_modfault(unsigned int pai)1773 ppattr_test_modfault(unsigned int pai)
1774 {
1775 	return ppattr_test_bits(pai, PP_ATTR_MODFAULT);
1776 }
1777 
1778 static inline boolean_t
pmap_is_preemptible(void)1779 pmap_is_preemptible(void)
1780 {
1781 	return preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT);
1782 }
1783 
1784 /**
1785  * This helper function ensures that potentially-long-running batched PPL operations are
1786  * called in preemptible context before entering the PPL, so that the PPL call may
1787  * periodically exit to allow pending urgent ASTs to be taken.
1788  */
1789 static inline void
pmap_verify_preemptible(void)1790 pmap_verify_preemptible(void)
1791 {
1792 	assert(pmap_is_preemptible());
1793 }
1794 
1795 /**
1796  * The minimum number of pages to keep in the PPL page free list.
1797  *
1798  * We define our target as 8 pages: enough for 2 page table pages, a PTD page,
1799  * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1800  * a single pmap_enter request.
1801  */
1802 #define PMAP_MIN_FREE_PPL_PAGES 8
1803 
1804 /**
1805  * Flags passed to various page allocation functions, usually accessed through
1806  * the pmap_pages_alloc_zeroed() API. Each function that can take these flags as
1807  * a part of its option field, will describe these flags in its function header.
1808  */
1809 
1810 /**
1811  * Instruct the allocation function to return immediately if no pages are
1812  * current available. Without this flag, the function will spin and wait for a
1813  * page to become available. This flag can be required in some circumstances
1814  * (for instance, when allocating pages from within the PPL).
1815  */
1816 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1817 
1818 /**
1819  * Instructs an allocation function to fallback to reclaiming a userspace page
1820  * table if it failed to allocate a page from the free lists. This can be useful
1821  * when allocating from within the PPL because refilling the free lists requires
1822  * exiting and re-entering the PPL (which incurs extra latency).
1823  *
1824  * This is a quick way of allocating a page at the expense of having to
1825  * reallocate the table the next time one of its mappings is accessed.
1826  */
1827 #define PMAP_PAGE_RECLAIM_NOWAIT 0x2
1828 
1829 /**
1830  * Global variables exported to the rest of the internal pmap implementation.
1831  */
1832 #if XNU_MONITOR
1833 extern uint64_t pmap_ppl_free_page_count;
1834 extern pmap_paddr_t pmap_stacks_start_pa;
1835 extern pmap_paddr_t pmap_stacks_end_pa;
1836 extern pmap_paddr_t ppl_cpu_save_area_start;
1837 extern pmap_paddr_t ppl_cpu_save_area_end;
1838 #endif /* XNU_MONITOR */
1839 extern unsigned int inuse_pmap_pages_count;
1840 extern vm_object_t pmap_object;
1841 extern uint32_t pv_alloc_initial_target;
1842 extern uint32_t pv_kern_alloc_initial_target;
1843 
1844 /**
1845  * Functions exported to the rest of the internal pmap implementation.
1846  */
1847 extern void pmap_data_bootstrap(void);
1848 extern void pmap_enqueue_pages(vm_page_t);
1849 extern kern_return_t pmap_pages_alloc_zeroed(pmap_paddr_t *, unsigned, unsigned);
1850 extern void pmap_pages_free(pmap_paddr_t, unsigned);
1851 
1852 #if XNU_MONITOR
1853 
1854 extern void pmap_mark_page_as_ppl_page_internal(pmap_paddr_t, bool);
1855 extern void pmap_mark_page_as_ppl_page(pmap_paddr_t);
1856 extern void pmap_mark_page_as_kernel_page(pmap_paddr_t);
1857 extern pmap_paddr_t pmap_alloc_page_for_kern(unsigned int);
1858 extern void pmap_alloc_page_for_ppl(unsigned int);
1859 extern uint64_t pmap_release_ppl_pages_to_kernel(void);
1860 
1861 extern uint64_t pmap_ledger_validate(const volatile void *);
1862 void pmap_ledger_retain(ledger_t ledger);
1863 void pmap_ledger_release(ledger_t ledger);
1864 extern void pmap_ledger_check_balance(pmap_t pmap);
1865 
1866 kern_return_t pmap_alloc_pmap(pmap_t *pmap);
1867 void pmap_free_pmap(pmap_t pmap);
1868 
1869 #endif /* XNU_MONITOR */
1870 
1871 /**
1872  * The modes in which a pmap lock can be acquired. Note that shared access
1873  * doesn't necessarily mean "read-only". As long as data is atomically updated
1874  * correctly (to account for multi-cpu accesses) data can still get written with
1875  * a shared lock held. Care just needs to be taken so as to not introduce any
1876  * race conditions when there are multiple writers.
1877  *
1878  * This is here in pmap_data.h because it's a needed parameter for pv_alloc()
1879  * and pmap_enter_pv(). This header is always included in pmap_internal.h before
1880  * the rest of the pmap locking code is defined so there shouldn't be any issues
1881  * with missing types.
1882  */
1883 OS_ENUM(pmap_lock_mode, uint8_t,
1884     PMAP_LOCK_SHARED,
1885     PMAP_LOCK_EXCLUSIVE);
1886 
1887 /**
1888  * Possible return values for pv_alloc(). See the pv_alloc() function header for
1889  * a description of each of these values.
1890  */
1891 typedef enum {
1892 	PV_ALLOC_SUCCESS,
1893 	PV_ALLOC_RETRY,
1894 	PV_ALLOC_FAIL
1895 } pv_alloc_return_t;
1896 
1897 extern pv_alloc_return_t pv_alloc(
1898 	pmap_t, unsigned int, pmap_lock_mode_t, unsigned int, pv_entry_t **);
1899 extern void pv_free(pv_entry_t *);
1900 extern void pv_list_free(pv_entry_t *, pv_entry_t *, int);
1901 extern void pmap_compute_pv_targets(void);
1902 extern pv_alloc_return_t pmap_enter_pv(
1903 	pmap_t, pt_entry_t *, int, unsigned int, pmap_lock_mode_t, pv_entry_t **, int *new_pve_ptep_idx);
1904 extern void pmap_remove_pv(pmap_t, pt_entry_t *, int, bool, bool *, bool *);
1905 
1906 extern void ptd_bootstrap(pt_desc_t *, unsigned int);
1907 extern pt_desc_t *ptd_alloc_unlinked(void);
1908 extern pt_desc_t *ptd_alloc(pmap_t);
1909 extern void ptd_deallocate(pt_desc_t *);
1910 extern void ptd_info_init(
1911 	pt_desc_t *, pmap_t, vm_map_address_t, unsigned int, pt_entry_t *);
1912 
1913 extern kern_return_t pmap_ledger_credit(pmap_t, int, ledger_amount_t);
1914 extern kern_return_t pmap_ledger_debit(pmap_t, int, ledger_amount_t);
1915 
1916 extern void validate_pmap_internal(const volatile struct pmap *, const char *);
1917 extern void validate_pmap_mutable_internal(const volatile struct pmap *, const char *);
1918 
1919 /**
1920  * Macro function wrappers around pmap validation so that the calling function
1921  * can be printed in the panic strings for easier validation failure debugging.
1922  */
1923 #define validate_pmap(x) validate_pmap_internal(x, __func__)
1924 #define validate_pmap_mutable(x) validate_pmap_mutable_internal(x, __func__)
1925 
1926 /**
1927  * This structure describes a PPL-owned I/O range.
1928  *
1929  * @note This doesn't necessarily have to represent "I/O" only, this can also
1930  *       represent non-kernel-managed DRAM (e.g., iBoot carveouts). Any physical
1931  *       address region that isn't considered "kernel-managed" is fair game.
1932  *
1933  * @note The layout of this structure needs to map 1-to-1 with the pmap-io-range
1934  *       device tree nodes. Astris (through the LowGlobals) also depends on the
1935  *       consistency of this structure.
1936  */
1937 typedef struct pmap_io_range {
1938 	/* Physical address of the PPL-owned I/O range. */
1939 	uint64_t addr;
1940 
1941 	/**
1942 	 * Length (in bytes) of the PPL-owned I/O range. Has to be the size
1943 	 * of a page if the range will be refered to by pmap_io_filter_entries.
1944 	 */
1945 	uint64_t len;
1946 
1947 	/* Strong DSB required for pages in this range. */
1948 	#define PMAP_IO_RANGE_STRONG_SYNC (1U << 31)
1949 
1950 	/* Corresponds to memory carved out by bootloader. */
1951 	#define PMAP_IO_RANGE_CARVEOUT (1U << 30)
1952 
1953 	/* Pages in this range need to be included in the hibernation image */
1954 	#define PMAP_IO_RANGE_NEEDS_HIBERNATING (1U << 29)
1955 
1956 	/* Mark the range as 'owned' by a given subsystem */
1957 	#define PMAP_IO_RANGE_OWNED (1U << 28)
1958 
1959 	/**
1960 	 * Denotes a range that is *not* to be treated as an I/O range that
1961 	 * needs to be mapped, but only to decorate arbitrary physical
1962 	 * memory ranges (including of managed memory) with extra
1963 	 * flags. I.e. this allows tagging of "ordinary" managed memory
1964 	 * pages with flags like `PMAP_IO_RANGE_PROHIBIT_HIB_WRITE`, or
1965 	 * informing the SPTM that some (nominally) managed memory pages are
1966 	 * unavailable for some reason.
1967 	 *
1968 	 * Notably, `pmap_find_io_attr()`, and anything else that uses
1969 	 * `pmap_io_range`s for denoting to-be-mapped I/O ranges, ignores
1970 	 * entries with this flag.
1971 	 */
1972 	#define PMAP_IO_RANGE_NOT_IO (1U << 27)
1973 
1974 	/* Pages in this range may never be written during hibernation restore. */
1975 	#define PMAP_IO_RANGE_PROHIBIT_HIB_WRITE (1U << 26)
1976 
1977 	/**
1978 	 * Lower 16 bits treated as pp_attr_t, upper 16 bits contain additional
1979 	 * mapping flags (defined above).
1980 	 */
1981 	uint32_t wimg;
1982 
1983 	/**
1984 	 * 4 Character Code (4CC) describing what this range is.
1985 	 *
1986 	 * This has to be unique for each "type" of pages, meaning pages sharing
1987 	 * the same register layout, if it is used for the I/O filter descriptors
1988 	 * below. Otherwise it doesn't matter.
1989 	 */
1990 	uint32_t signature;
1991 } pmap_io_range_t;
1992 
1993 /* Reminder: be sure to change all relevant device trees if you change the layout of pmap_io_range_t */
1994 _Static_assert(sizeof(pmap_io_range_t) == 24, "unexpected size for pmap_io_range_t");
1995 
1996 extern pmap_io_range_t* pmap_find_io_attr(pmap_paddr_t);
1997 
1998 /**
1999  * This structure describes a sub-page-size I/O region owned by PPL but the kernel can write to.
2000  *
2001  * @note I/O filter software will use a collection of such data structures to determine access
2002  *       permissions to a page owned by PPL.
2003  *
2004  * @note The {signature, offset} key is used to index a collection of such data structures to
2005  *       optimize for space in the case where one page layout is repeated for many devices, such
2006  *       as the memory controller channels.
2007  */
2008 typedef struct pmap_io_filter_entry {
2009 	/* 4 Character Code (4CC) describing what this range (page) is. */
2010 	uint32_t signature;
2011 
2012 	/* Offset within the page. It has to be within [0, PAGE_SIZE). */
2013 	uint16_t offset;
2014 
2015 	/* Length of the range, and (offset + length) has to be within [0, PAGE_SIZE). */
2016 	uint16_t length;
2017 } pmap_io_filter_entry_t;
2018 
2019 _Static_assert(sizeof(pmap_io_filter_entry_t) == 8, "unexpected size for pmap_io_filter_entry_t");
2020 
2021 extern pmap_io_filter_entry_t *pmap_find_io_filter_entry(pmap_paddr_t, uint64_t, const pmap_io_range_t **);
2022 
2023 extern void pmap_cpu_data_init_internal(unsigned int);
2024 
2025 /**
2026  * Flush a single 16K page from noncoherent coprocessor caches.
2027  *
2028  * @note Nonocoherent cache flushes are only guaranteed to work if the participating coprocessor(s)
2029  *       do not have any active VA translations for the page being flushed.  Since coprocessor
2030  *       mappings should always be controlled by some PPL IOMMU extension, they should always
2031  *       have PV list entries.  This flush should therefore be performed at a point when the PV
2032  *       list is known to be either empty or at least to not contain any IOMMU entries.  For
2033  *       the purposes of our security model, it is sufficient to wait for the PV list to become
2034  *       empty, as we really want to protect PPL-sensitive pages from malicious/accidental
2035  *       coprocessor cacheline evictions, and the PV list must be empty before a page can be
2036  *       handed to the PPL.
2037  *
2038  * @param paddr The base physical address of the page to flush.
2039  */
2040 extern void pmap_flush_noncoherent_page(pmap_paddr_t paddr);
2041 
2042 #if DEBUG || DEVELOPMENT
2043 extern unsigned int pmap_wcrt_on_non_dram_count_get(void);
2044 extern void pmap_wcrt_on_non_dram_count_increment_atomic(void);
2045 #endif /* DEBUG || DEVELOPMENT */
2046 #endif /* _ARM_PMAP_PMAP_DATA_H_ */
2047