xref: /xnu-8792.61.2/osfmk/arm/pmap/pmap_data.h (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /**
29  * This header file is used to store the types, prototypes, and inline functions
30  * that define some of the most important data structures used in the pmap. This
31  * header is only meant for sharing types within the pmap; if a type is meant to
32  * be used by the rest of the kernel, then put it into osfmk/arm/pmap.h.
33  */
34 #ifndef _ARM_PMAP_PMAP_DATA_H_
35 #define _ARM_PMAP_PMAP_DATA_H_
36 
37 #include <stdint.h>
38 
39 #include <kern/ledger.h>
40 #include <mach/vm_types.h>
41 #include <mach_assert.h>
42 #include <vm/vm_page.h>
43 
44 #include <arm/cpu_data.h>
45 #include <arm/machine_routines.h>
46 #include <arm64/proc_reg.h>
47 
48 /* Temporary include before moving all ledger functions into pmap_data.c */
49 #include <os/refcnt.h>
50 
51 /**
52  * These headers are safe to be included in this file since they shouldn't rely
53  * on any of the internal pmap header files (so no circular dependencies).
54  */
55 #include <arm/pmap.h>
56 #include <arm/pmap/pmap_pt_geometry.h>
57 
58 /**
59  * These values represent the first and last kernel-managed physical addresses.
60  * We keep track of extra metadata on kernel-managed pages compared to other
61  * pages (usually iBoot carved out memory or I/O).
62  */
63 extern pmap_paddr_t vm_first_phys, vm_last_phys;
64 
65 /**
66  * Return whether the given address represents a kernel-managed physical page.
67  *
68  * Whether a page is considered "kernel-managed" is determined by the BootArgs
69  * passed by the bootloader. Typically memory carved out by the bootloader as
70  * well as I/O memory should return false.
71  *
72  * @param pa The physical address to check.
73  */
74 static inline bool
pa_valid(pmap_paddr_t pa)75 pa_valid(pmap_paddr_t pa)
76 {
77 	return (pa >= vm_first_phys) && (pa < vm_last_phys);
78 }
79 
80 /**
81  * The pmap has a variety of data structures (pv_head_table/pp_attr_table) that
82  * contain an entry for every kernel-managed page in the system. These systems
83  * are indexed with physical address indices ("pai") generated by this function.
84  *
85  * The logic is simple since there should be one entry in each of these data
86  * structures for each kernel-managed physical page in the system. These data
87  * structures are allocated on boot based on the amount of memory available.
88  *
89  * @note PAIs are defined using the VM page size, which might not be identical
90  *       to the underlying hardware page size for an arbitrary address space.
91  *       This means that the data structures relying on PAIs will contain one
92  *       entry for each VM page, not hardware page.
93  *
94  * @note This function is only valid for physical addresses that are
95  *       kernel-managed.
96  */
97 static inline unsigned int
pa_index(pmap_paddr_t pa)98 pa_index(pmap_paddr_t pa)
99 {
100 	return (unsigned int)atop(pa - vm_first_phys);
101 }
102 
103 /* See the definition of pv_head_table for more information. */
104 extern pv_entry_t **pv_head_table;
105 
106 /* Represents a NULL entry in the pv_head_table. */
107 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
108 
109 /**
110  * Given a physical address index, return the corresponding pv_head_table entry.
111  *
112  * @note Despite returning a pointer to a pv_entry_t pointer, the entry might
113  *       actually be a different type of pointer (pt_entry_t or pt_desc_t)
114  *       depending on the type for this entry. Determine the type using
115  *       pvh_test_type().
116  *
117  * @param pai The index returned by pa_index() for the page whose pv_head_table
118  *            entry should be retrieved.
119  */
120 static inline pv_entry_t **
pai_to_pvh(unsigned int pai)121 pai_to_pvh(unsigned int pai)
122 {
123 	return &pv_head_table[pai];
124 }
125 
126 /**
127  * Each pv_head_table entry can be one of four different types:
128  *
129  * - PVH_TYPE_NULL: No mappings to the physical page exist outside of the
130  *                  physical aperture. Physical aperture mappings are not
131  *                  tracked in the pv_head_table.
132  *
133  * - PVH_TYPE_PVEP: There are multiple mappings to the physical page.
134  *                  These entries are linked lists of pv_entry_t objects (which
135  *                  each contain a pointer to the associated PTE and a pointer
136  *                  to the next entry in the list).
137  *
138  * - PVH_TYPE_PTEP: There is a single mapping to the physical page. Once more
139  *                  mappings are created, this entry will get upgraded to an
140  *                  entry of type PVH_TYPE_PVEP. These entries are pointers
141  *                  directly to the page table entry that contain the mapping
142  *                  (pt_entry_t*).
143  *
144  * - PVH_TYPE_PTDP: The physical page is being used as a page table. These
145  *                  entries are pointers to page table descriptor structures
146  *                  (pt_desc_t) which contain metadata related to each page
147  *                  table.
148  *
149  * The type is stored in the bottom two bits of each pv_head_table entry. That
150  * type needs to be checked before dereferencing the pointer to determine which
151  * pointer type to dereference as.
152  */
153 #define PVH_TYPE_NULL 0x0UL
154 #define PVH_TYPE_PVEP 0x1UL
155 #define PVH_TYPE_PTEP 0x2UL
156 #define PVH_TYPE_PTDP 0x3UL
157 
158 #define PVH_TYPE_MASK (0x3UL)
159 
160 #if defined(__arm64__)
161 
162 /**
163  * PV_HEAD_TABLE Flags.
164  *
165  * All flags listed below are stored in the pv_head_table entry/pointer
166  * (per-physical-page) unless otherwise noted.
167  *
168  * Please update the pv_walk LLDB macro if these flags are changed or added to.
169  */
170 
171 /**
172  * This flag is set for every mapping created by an IOMMU.
173  *
174  * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
175  * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
176  */
177 #define PVH_FLAG_IOMMU 0x4UL
178 
179 /**
180  * This flag is only valid when PVH_FLAG_IOMMU is set. For an IOMMU mapping, if
181  * this bit is set, then the PTE pointer points directly into the IOMMU page
182  * table for this mapping. If this bit is cleared, then the "PTE pointer" is
183  * actually a pointer to the IOMMU descriptor object that owns this mapping.
184  *
185  * There are cases where it's not easy to tie an IOMMU mapping directly to a
186  * specific page table, so this allows us to at least get a pointer to which
187  * IOMMU created this mapping which is useful for debugging purposes.
188  *
189  * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
190  * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
191  */
192 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
193 
194 /**
195  * This flag is set when the first CPU (non-IOMMU) mapping is created. This is
196  * important to keep track of because various accounting statistics are based on
197  * the options specified for the first CPU mapping. This flag, and thus the
198  * accounting statistics, will persist as long as there *any* mappings of the
199  * page (including IOMMU mappings). This works because the accounting for a page
200  * should not need to change until the page is recycled by the VM layer, and we
201  * double-check that there are no mappings (CPU or IOMMU) when a page is
202  * recycled (see: pmap_verify_free()).
203  */
204 #define PVH_FLAG_CPU (1ULL << 62)
205 
206 /* This bit is used as a lock when modifying a pv_head_table entry. */
207 #define PVH_LOCK_BIT 61
208 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
209 
210 /**
211  * This flag is set when there are any executable mappings to this physical
212  * page. This is used to prevent any writable mappings from being created at
213  * the same time an executable mapping exists.
214  */
215 #define PVH_FLAG_EXEC (1ULL << 60)
216 
217 /**
218  * Marking a pv_head_table entry with this flag denotes that this page is a
219  * kernelcache text or data page that shouldn't have dynamically-created
220  * mappings.  See PVH_FLAG_LOCKDOWN_MASK for more details.
221  */
222 #define PVH_FLAG_LOCKDOWN_KC (1ULL << 59)
223 
224 /**
225  * This flag is used to mark that a page has been hashed into the hibernation
226  * image.
227  *
228  * The hibernation driver will use this to ensure that all PPL-owned memory is
229  * correctly included into the hibernation image (a missing PPL page could be
230  * a security concern when coming out of hibernation).
231  */
232 #define PVH_FLAG_HASHED (1ULL << 58)
233 
234 /**
235  * Marking a pv_head_table entry with this flag denotes that this page is a
236  * code signature page that shouldn't have dynamically-created mappings.
237  * See PVH_FLAG_LOCKDOWN_MASK for more details.
238  */
239 #define PVH_FLAG_LOCKDOWN_CS (1ULL << 57)
240 
241 /**
242  * Marking a pv_head_table entry with this flag denotes that this page is a
243  * read-only allocator page that shouldn't have dynamically-created mappings.
244  * See PVH_FLAG_LOCKDOWN_MASK for more details.
245  */
246 #define PVH_FLAG_LOCKDOWN_RO (1ULL << 56)
247 
248 
249 /**
250  * Marking a pv_head_table entry with any bit in this mask denotes that this page
251  * has been locked down by the PPL.  Locked down pages can't have new mappings
252  * created or existing mappings removed, and all existing mappings will have been
253  * converted to read-only.  This essentially makes the page immutable.
254  */
255 #define PVH_FLAG_LOCKDOWN_MASK (PVH_FLAG_LOCKDOWN_KC | PVH_FLAG_LOCKDOWN_CS | PVH_FLAG_LOCKDOWN_RO)
256 
257 /**
258  * These bits need to be set to safely dereference a pv_head_table
259  * entry/pointer.
260  *
261  * Any change to this #define should also update the copy located in the pmap.py
262  * LLDB macros file.
263  */
264 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN_MASK | PVH_FLAG_HASHED)
265 
266 #endif /* defined(__arm64__) */
267 
268 /* Mask used to clear out the TYPE bits from a pv_head_table entry/pointer. */
269 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
270 
271 /* Which 32-bit word in each pv_head_table entry/pointer contains the LOCK bit. */
272 #if defined(__arm64__)
273 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
274 #endif /* defined(__arm64__) */
275 
276 /**
277  * Assert that a pv_head_table entry is locked. Will panic if the lock isn't
278  * acquired.
279  *
280  * @param index The physical address index to check.
281  */
282 static inline void
pvh_assert_locked(__assert_only unsigned int index)283 pvh_assert_locked(__assert_only unsigned int index)
284 {
285 	assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK);
286 }
287 
288 
289 /**
290  * Lock a pv_head_table entry.
291  *
292  * @param index The physical address index of the pv_head_table entry to lock.
293  */
294 static inline void
pvh_lock(unsigned int index)295 pvh_lock(unsigned int index)
296 {
297 	pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
298 	    PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
299 }
300 
301 /**
302  * Unlock a pv_head_table entry.
303  *
304  * @param index The physical address index of the pv_head_table entry to unlock.
305  */
306 static inline void
pvh_unlock(unsigned int index)307 pvh_unlock(unsigned int index)
308 {
309 	pvh_assert_locked(index);
310 
311 	pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
312 	    PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
313 }
314 
315 /**
316  * Check that a pv_head_table entry/pointer is a specific type.
317  *
318  * @param pvh The pv_head_table entry/pointer to check.
319  * @param type The type to check for.
320  *
321  * @return True if the pv_head_table entry is of the passed in type, false
322  *         otherwise.
323  */
324 static inline bool
pvh_test_type(pv_entry_t ** pvh,vm_offset_t type)325 pvh_test_type(pv_entry_t **pvh, vm_offset_t type)
326 {
327 	return ((*(vm_offset_t *)pvh) & PVH_TYPE_MASK) == type;
328 }
329 
330 /**
331  * Convert a pv_head_table entry/pointer into a page table entry pointer. This
332  * should only be done if the type of this entry is PVH_TYPE_PTEP.
333  *
334  * @param pvh The pv_head_table entry/pointer to convert into a pt_entry_t*.
335  *
336  * @return Return back a safe to derefence pointer to the single mapping of this
337  *         physical page by masking off the TYPE bits and adding any missing
338  *         flags to the upper portion of the pointer.
339  */
340 static inline pt_entry_t*
pvh_ptep(pv_entry_t ** pvh)341 pvh_ptep(pv_entry_t **pvh)
342 {
343 	return (pt_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
344 }
345 
346 /**
347  * Convert a pv_head_table entry/pointer into a PVE list pointer. This
348  * should only be done if the type of this entry is PVH_TYPE_PVEP.
349  *
350  * @param pvh The pv_head_table entry/pointer to convert into a safe to
351  *            dereference pv_entry_t*.
352  *
353  * @return Return back a safe to derefence pointer to the first mapping of this
354  *         physical page by masking off the TYPE bits and adding any missing
355  *         flags to the upper portion of the pointer.
356  */
357 static inline pv_entry_t*
pvh_pve_list(pv_entry_t ** pvh)358 pvh_pve_list(pv_entry_t **pvh)
359 {
360 	return (pv_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
361 }
362 
363 /**
364  * Return the flags associated with a pv_head_table entry/pointer.
365  *
366  * @param pvh The pv_head_table entry whose flags to get.
367  */
368 static inline vm_offset_t
pvh_get_flags(pv_entry_t ** pvh)369 pvh_get_flags(pv_entry_t **pvh)
370 {
371 	return (*(vm_offset_t *)pvh) & PVH_HIGH_FLAGS;
372 }
373 
374 /**
375  * Atomically set the flags associated with a pv_head_table entry/pointer.
376  *
377  * @param pvh The pv_head_table entry whose flags are getting set.
378  */
379 static inline void
pvh_set_flags(pv_entry_t ** pvh,vm_offset_t flags)380 pvh_set_flags(pv_entry_t **pvh, vm_offset_t flags)
381 {
382 	os_atomic_store((vm_offset_t *)pvh, ((*(vm_offset_t *)pvh) & ~PVH_HIGH_FLAGS) | flags, relaxed);
383 }
384 
385 /**
386  * Update a pv_head_table entry/pointer to be a different type and/or point to
387  * a different object.
388  *
389  * @note The pv_head_table entry MUST already be locked.
390  *
391  * @note This function will clobber any existing flags stored in the PVH pointer
392  *       (except PVH_FLAG_LOCK). It's up to the caller to preserve flags if that
393  *       functionality is needed (either by ensuring `pvep` contains those
394  *       flags, or by manually setting the flags after this call).
395  *
396  * @param pvh The pv_head_table entry/pointer to update.
397  * @param pvep The new entry to use. This could be either a pt_entry_t*,
398  *             pv_entry_t*, or pt_desc_t* depending on the type.
399  * @param type The type of the new entry.
400  */
401 static inline void
pvh_update_head(pv_entry_t ** pvh,void * pvep,unsigned int type)402 pvh_update_head(pv_entry_t **pvh, void *pvep, unsigned int type)
403 {
404 	assert((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK);
405 	os_atomic_store((vm_offset_t *)pvh, (vm_offset_t)pvep | type | PVH_FLAG_LOCK, relaxed);
406 }
407 
408 /**
409  * Update a pv_head_table entry/pointer to be a different type and/or point to
410  * a different object.
411  *
412  * @note The pv_head_table entry CAN'T already be locked.
413  *
414  * @note This function will clobber any existing flags stored in the PVH
415  *       pointer. It's up to the caller to preserve flags if that functionality
416  *       is needed (either by ensuring `pvep` contains those flags, or by
417  *       manually setting the flags after this call).
418  *
419  * @param pvh The pv_head_table entry/pointer to update.
420  * @param pvep The new entry to use. This could be either a pt_entry_t*,
421  *             pv_entry_t*, or pt_desc_t* depending on the type.
422  * @param type The type of the new entry.
423  */
424 static inline void
pvh_update_head_unlocked(pv_entry_t ** pvh,void * pvep,unsigned int type)425 pvh_update_head_unlocked(pv_entry_t **pvh, void *pvep, unsigned int type)
426 {
427 	assert(!((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK));
428 	*(vm_offset_t *)pvh = ((vm_offset_t)pvep | type) & ~PVH_FLAG_LOCK;
429 }
430 
431 /**
432  * Given a page table entry pointer retrieved from the pv_head_table (from an
433  * entry of type PVH_TYPE_PTEP or PVH_TYPE_PVEP), return back whether the PTE is
434  * an IOMMU mapping.
435  *
436  * @note The way this function determines whether the passed in pointer is
437  *       pointing to an IOMMU PTE, is by checking for a special flag stored in
438  *       the lower bits of the pointer. This flag is only set on pointers stored
439  *       in the pv_head_table, and as such, this function will only work on
440  *       pointers retrieved from the pv_head_table. If a pointer to a PTE was
441  *       directly retrieved from an IOMMU's page tables, this function would
442  *       always return false despite actually being an IOMMU PTE.
443  *
444  * @param ptep A PTE pointer obtained from the pv_head_table to check.
445  *
446  * @return True if the entry is an IOMMU mapping, false otherwise.
447  */
448 static inline bool
pvh_ptep_is_iommu(const pt_entry_t * ptep)449 pvh_ptep_is_iommu(const pt_entry_t *ptep)
450 {
451 #ifdef PVH_FLAG_IOMMU
452 	return (vm_offset_t)ptep & PVH_FLAG_IOMMU;
453 #else /* PVH_FLAG_IOMMU */
454 	#pragma unused(ptep)
455 	return false;
456 #endif /* PVH_FLAG_IOMMU */
457 }
458 
459 /**
460  * Sometimes the PTE pointers retrieved from the pv_head_table (from an entry of
461  * type PVH_TYPE_PTEP or PVH_TYPE_PVEP) contain flags themselves. This function
462  * strips out those flags and returns back a dereferencable pointer.
463  *
464  * @param ptep The PTE pointer to strip out the unwanted flags.
465  *
466  * @return A valid dereferencable pointer to the page table entry.
467  */
468 static inline const pt_entry_t*
pvh_strip_ptep(const pt_entry_t * ptep)469 pvh_strip_ptep(const pt_entry_t *ptep)
470 {
471 #ifdef PVH_FLAG_IOMMU
472 	const vm_offset_t pte_va = (vm_offset_t)ptep;
473 	return (const pt_entry_t*)((pte_va & ~PVH_FLAG_IOMMU) | PVH_FLAG_IOMMU_TABLE);
474 #else /* PVH_FLAG_IOMMU */
475 	return ptep;
476 #endif /* PVH_FLAG_IOMMU */
477 }
478 
479 /**
480  * PVH_TYPE_PVEP Helper Functions.
481  *
482  * The following are methods used to manipulate PVE lists. This is the type of
483  * pv_head_table entry used when there are multiple mappings to a single
484  * physical page.
485  */
486 
487 /**
488  * Whether a physical page is using "alternate accounting" (ALTACCT) for its
489  * ledger statistics is something that needs to be tracked on a per-mapping
490  * basis, not on a per-physical-page basis. Because of that, it's tracked
491  * differently depending on whether there's a single mapping to a page
492  * (PVH_TYPE_PTEP) or multiple (PVH_TYPE_PVEP). For single mappings, the bit is
493  * tracked in the pp_attr_table. But when there are multiple mappings, the least
494  * significant bit of the corresponding "pve_pte" pointer in each pv_entry object
495  * is used as a marker for pages using alternate accounting.
496  *
497  * @note See the definition for PP_ATTR_ALTACCT for a more detailed description
498  *       of what "alternate accounting" actually means in respect to the
499  *       footprint ledger.
500  *
501  * Since some code (KernelDiskImages, e.g.) might map a phsyical page as
502  * "device" memory (i.e. external) while it's also being used as regular
503  * "anonymous" memory (i.e. internal) in user space, we have to manage the
504  * "internal" attribute per mapping rather than per physical page.
505  * When there are multiple mappings, we use the next least significant bit of
506  * the corresponding "pve_pte" pointer for that.
507  */
508 #define PVE_PTEP_ALTACCT ((uintptr_t) 0x1)
509 #define PVE_PTEP_INTERNAL ((uintptr_t) 0x2)
510 #define PVE_PTEP_FLAGS (PVE_PTEP_ALTACCT | PVE_PTEP_INTERNAL)
511 
512 /**
513  * Set the ALTACCT bit for a specific PTE pointer.
514  *
515  * @param pvep A pointer to the current pv_entry mapping in the linked list of
516  *             mappings.
517  * @param idx Index of the chosen PTE pointer inside the PVE.
518  */
519 static inline void
pve_set_altacct(pv_entry_t * pvep,unsigned idx)520 pve_set_altacct(pv_entry_t *pvep, unsigned idx)
521 {
522 	assert(idx < PTE_PER_PVE);
523 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_ALTACCT);
524 }
525 /**
526  * Set the INTERNAL bit for a specific PTE pointer.
527  *
528  * @param pvep A pointer to the current pv_entry mapping in the linked list of
529  *             mappings.
530  * @param idx Index of the chosen PTE pointer inside the PVE.
531  */
532 static inline void
pve_set_internal(pv_entry_t * pvep,unsigned idx)533 pve_set_internal(pv_entry_t *pvep, unsigned idx)
534 {
535 	assert(idx < PTE_PER_PVE);
536 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_INTERNAL);
537 }
538 
539 /**
540  * Clear the ALTACCT bit for a specific PTE pointer.
541  *
542  * @param pvep A pointer to the current pv_entry mapping in the linked list of
543  *             mappings.
544  * @param idx Index of the chosen PTE pointer inside the PVE.
545  */
546 static inline void
pve_clr_altacct(pv_entry_t * pvep,unsigned idx)547 pve_clr_altacct(pv_entry_t *pvep, unsigned idx)
548 {
549 	assert(idx < PTE_PER_PVE);
550 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_ALTACCT);
551 }
552 /**
553  * Clear the INTERNAL bit for a specific PTE pointer.
554  *
555  * @param pvep A pointer to the current pv_entry mapping in the linked list of
556  *             mappings.
557  * @param idx Index of the chosen PTE pointer inside the PVE.
558  */
559 static inline void
pve_clr_internal(pv_entry_t * pvep,unsigned idx)560 pve_clr_internal(pv_entry_t *pvep, unsigned idx)
561 {
562 	assert(idx < PTE_PER_PVE);
563 	pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_INTERNAL);
564 }
565 
566 /**
567  * Return the ALTACCT bit for a specific PTE pointer.
568  *
569  * @param pvep A pointer to the current pv_entry mapping in the linked list of
570  *             mappings.
571  * @param idx Index of the chosen PTE pointer inside the PVE.
572  */
573 static inline bool
pve_get_altacct(pv_entry_t * pvep,unsigned idx)574 pve_get_altacct(pv_entry_t *pvep, unsigned idx)
575 {
576 	assert(idx < PTE_PER_PVE);
577 	return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_ALTACCT;
578 }
579 /**
580  * Return the INTERNAL bit for a specific PTE pointer.
581  *
582  * @param pvep A pointer to the current pv_entry mapping in the linked list of
583  *             mappings.
584  * @param idx Index of the chosen PTE pointer inside the PVE.
585  */
586 static inline bool
pve_get_internal(pv_entry_t * pvep,unsigned idx)587 pve_get_internal(pv_entry_t *pvep, unsigned idx)
588 {
589 	assert(idx < PTE_PER_PVE);
590 	return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_INTERNAL;
591 }
592 
593 /**
594  * Return the next mapping (pv_entry) in a linked list of mappings. This applies
595  * to pv_head_table entries of type PVH_TYPE_PVEP.
596  *
597  * @param pvep A pointer to the current pv_entry mapping in the linked list of
598  *             mappings.
599  *
600  * @return The next virtual mapping for a physical page, or PV_ENTRY_NULL if the
601  *         end of the list has been reached.
602  */
603 static inline pv_entry_t *
pve_next(pv_entry_t * pvep)604 pve_next(pv_entry_t *pvep)
605 {
606 	return pvep->pve_next;
607 }
608 
609 /**
610  * Return a pointer to the pve_next field in a pv_entry. This value is used
611  * when adding and removing entries to a PVE list.
612  *
613  * @param pvep The pv_entry whose pve_next field is being accessed.
614  *
615  * @return Pointer to the pve_next field.
616  */
617 static inline pv_entry_t **
pve_next_ptr(pv_entry_t * pvep)618 pve_next_ptr(pv_entry_t *pvep)
619 {
620 	return &pvep->pve_next;
621 }
622 
623 /**
624  * Return a pointer to the page table entry for this mapping.
625  *
626  * @param pvep The pv_entry whose pve_ptep field is to be returned.
627  * @param idx Index of the chosen PTE pointer inside the PVE.
628  *
629  * @return Pointer to the page table entry.
630  */
631 static inline pt_entry_t *
pve_get_ptep(pv_entry_t * pvep,unsigned idx)632 pve_get_ptep(pv_entry_t *pvep, unsigned idx)
633 {
634 	assert(idx < PTE_PER_PVE);
635 	return (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_FLAGS);
636 }
637 
638 /**
639  * Update the page table entry for a specific physical to virtual mapping.
640  *
641  * @param pvep The pv_entry to update.
642  * @param idx Index of the chosen PTE pointer inside the PVE.
643  * @param ptep_new The new page table entry.
644  */
645 static inline void
pve_set_ptep(pv_entry_t * pvep,unsigned idx,pt_entry_t * ptep_new)646 pve_set_ptep(pv_entry_t *pvep, unsigned idx, pt_entry_t *ptep_new)
647 {
648 	assert(idx < PTE_PER_PVE);
649 	pvep->pve_ptep[idx] = ptep_new;
650 }
651 
652 /**
653  * Initialize all fields in a PVE to NULL.
654  *
655  * @param pvep The pv_entry to initialize.
656  */
657 static inline void
pve_init(pv_entry_t * pvep)658 pve_init(pv_entry_t *pvep)
659 {
660 	pvep->pve_next = PV_ENTRY_NULL;
661 	for (int i = 0; i < PTE_PER_PVE; i++) {
662 		pvep->pve_ptep[i] = PT_ENTRY_NULL;
663 	}
664 }
665 
666 /**
667  * Find PTE pointer in PVE and return its index.
668  *
669  * @param pvep The PVE to search.
670  * @param ptep PTE to search for.
671  *
672  * @return Index of the found entry, or -1 if no entry exists.
673  */
674 static inline int
pve_find_ptep_index(pv_entry_t * pvep,pt_entry_t * ptep)675 pve_find_ptep_index(pv_entry_t *pvep, pt_entry_t *ptep)
676 {
677 	for (int i = 0; i < PTE_PER_PVE; i++) {
678 		if (pve_get_ptep(pvep, i) == ptep) {
679 			return i;
680 		}
681 	}
682 
683 	return -1;
684 }
685 
686 /**
687  * Checks if no PTEs are currently associated with this PVE.
688  *
689  * @param pvep The PVE to search.
690  *
691  * @return True if no PTEs are currently associated with this PVE, or false.
692  */
693 static inline bool
pve_is_empty(pv_entry_t * pvep)694 pve_is_empty(pv_entry_t *pvep)
695 {
696 	for (int i = 0; i < PTE_PER_PVE; i++) {
697 		if (pve_get_ptep(pvep, i) != PT_ENTRY_NULL) {
698 			return false;
699 		}
700 	}
701 
702 	return true;
703 }
704 
705 /**
706  * Prepend a new pv_entry node to a PVE list.
707  *
708  * @note This function will clobber any existing flags stored in the PVH
709  *       pointer. It's up to the caller to preserve flags if that functionality
710  *       is needed (either by ensuring `pvep` contains those flags, or by
711  *       manually setting the flags after this call).
712  *
713  * @param pvh The linked list of mappings to update.
714  * @param pvep The new mapping to add to the linked list.
715  */
716 static inline void
pve_add(pv_entry_t ** pvh,pv_entry_t * pvep)717 pve_add(pv_entry_t **pvh, pv_entry_t *pvep)
718 {
719 	assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
720 
721 	pvep->pve_next = pvh_pve_list(pvh);
722 	pvh_update_head(pvh, pvep, PVH_TYPE_PVEP);
723 }
724 
725 /**
726  * Remove an entry from a PVE list of mappings.
727  *
728  * @note This function will clobber any existing flags stored in the PVH
729  *       pointer. It's up to the caller to preserve flags if that functionality
730  *       is needed.
731  *
732  * @param pvh The pv_head_table entry of the PVE list to remove a mapping from.
733  *            This is the first entry in the list of pv_entry_t mappings.
734  * @param pvepp A pointer to the pv_entry_t* that's being removed. If this entry
735  *              is the first in the linked list of mappings, then this should be
736  *              identical to the pv_head_table entry. If the mapping isn't the
737  *              first, then this is a pointer to the pve_next field in the
738  *              previous mapping.
739  * @param pvep The entry that should be removed. Should be identical to a
740  *             dereference of the pvepp parameter (unless it's the pv_head_table
741  *             entry).
742  */
743 static inline void
pve_remove(pv_entry_t ** pvh,pv_entry_t ** pvepp,pv_entry_t * pvep)744 pve_remove(pv_entry_t **pvh, pv_entry_t **pvepp, pv_entry_t *pvep)
745 {
746 	assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
747 
748 	if (pvepp == pvh) {
749 		if (pve_next(pvep) == PV_ENTRY_NULL) {
750 			/* The last mapping to this page is being removed. */
751 			pvh_update_head(pvh, PV_ENTRY_NULL, PVH_TYPE_NULL);
752 		} else {
753 			/**
754 			 * There are still mappings left, make the next one the new head of
755 			 * the list. This effectively removes the first entry from the list.
756 			 */
757 			pvh_update_head(pvh, pve_next(pvep), PVH_TYPE_PVEP);
758 		}
759 	} else {
760 		/**
761 		 * Move the previous entry's next field to the entry after the one being
762 		 * removed. This will clobber the ALTACCT and INTERNAL bits.
763 		 */
764 		*pvepp = pve_next(pvep);
765 	}
766 }
767 
768 /**
769  * PVH_TYPE_PTDP Types and Helper Functions.
770  *
771  * The following are types and methods used to manipulate page table descriptor
772  * (PTD) objects. This is the type of pv_head_table entry used when a page is
773  * being used as a page table.
774  */
775 
776 /**
777  * When the pmap layer allocates memory, it always does so in chunks of the VM
778  * page size (which are represented by the PAGE_SIZE/PAGE_SHIFT macros). The VM
779  * page size might not match up with the hardware page size for a given address
780  * space (this is especially true on systems that support more than one page
781  * size).
782  *
783  * The pv_head_table is allocated to have one entry per VM page, not hardware
784  * page (which can change depending on the address space). Because of that, a
785  * single VM-page-sized region (single pv_head_table entry) can potentially hold
786  * up to four page tables. Only one page table descriptor (PTD) is allocated per
787  * pv_head_table entry (per VM page), so on some systems, one PTD might have to
788  * keep track of up to four different page tables.
789  */
790 
791 #if __ARM_MIXED_PAGE_SIZE__
792 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
793 #elif (ARM_PGSHIFT == 14)
794 #define PT_INDEX_MAX 1
795 #elif (ARM_PGSHIFT == 12)
796 #define PT_INDEX_MAX 4
797 #else
798 #error Unsupported ARM_PGSHIFT
799 #endif /* __ARM_MIXED_PAGE_SIZE__ || ARM_PGSHIFT == 14 || ARM_PGSHIFT == 12 */
800 
801 
802 /**
803  * Page table descriptor (PTD) info structure.
804  *
805  * Contains information about a page table. These pieces of data are separate
806  * from the PTD itself because in address spaces where the VM page size doesn't
807  * match the underlying hardware page size, one PTD could represent multiple
808  * page tables (and so will need multiple PTD info structures).
809  *
810  * These fields are also in their own struct so that they can be allocated
811  * separately from the associated pt_desc_t object. This allows us to allocate
812  * the counts in this structure in a way that ensures they don't fall within the
813  * same cache line as the main pt_desc_t object. This is important because the
814  * fields in this structure are atomically updated which could cause false
815  * sharing cache performance issues with the "va" field in pt_desc_t if all of
816  * the fields were within the same structure.
817  */
818 typedef struct {
819 	/**
820 	 * Pre-defined sentinel values for ptd_info_t.refcnt. If these refcnt values
821 	 * change, make sure to update the showpte LLDB macro to reflect the
822 	 * changes.
823 	 */
824 	#define PT_DESC_REFCOUNT                0x4000U
825 	#define PT_DESC_IOMMU_GRANTED_REFCOUNT  0x8000U
826 	#define PT_DESC_IOMMU_ACCEPTED_REFCOUNT 0x8001U
827 
828 	/*
829 	 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT.
830 	 * For leaf pagetables, should reflect the number of non-empty PTEs.
831 	 * For IOMMU pages, should always be either PT_DESC_IOMMU_GRANTED_REFCOUNT
832 	 * or PT_DESC_IOMMU_ACCEPTED_REFCOUNT.
833 	 */
834 	unsigned short refcnt;
835 
836 	/*
837 	 * For non-leaf pagetables, should be 0.
838 	 * For leaf pagetables, should reflect the number of wired entries.
839 	 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU
840 	 * operations are implicitly wired).
841 	 */
842 	unsigned short wiredcnt;
843 } ptd_info_t;
844 
845 /**
846  * Page Table Descriptor (PTD).
847  *
848  * Provides a per-table data structure and a way of keeping track of all page
849  * tables in the system.
850  *
851  * This structure is also used as a convenient way of keeping track of IOMMU
852  * pages (which may or may not be used as page tables). In that case the "iommu"
853  * field will point to the owner of the page, ptd_info[0].refcnt will be
854  * PT_DESC_IOMMU_GRANTED_REFCOUNT or PT_DESC_IOMMU_ACCEPTED_REFCOUNT, and
855  * ptd_info[0].wiredcnt can be used as an arbitrary refcnt controlled by the
856  * IOMMU driver.
857  */
858 typedef struct pt_desc {
859 	/**
860 	 * This queue chain provides a mechanism for keeping a list of pages
861 	 * being used as page tables. This is used to potentially reclaim userspace
862 	 * page tables as a fast way of "allocating" a page.
863 	 *
864 	 * Refer to osfmk/kern/queue.h for more information about queue chains.
865 	 */
866 	queue_chain_t pt_page;
867 
868 	/* Each page table is either owned by a pmap or a specific IOMMU. */
869 	union {
870 		struct pmap *pmap;
871 	};
872 
873 	/**
874 	 * The following fields contain per-page-table properties, and as such,
875 	 * might have multiple elements each. This is due to a single PTD
876 	 * potentially representing multiple page tables (in address spaces where
877 	 * the VM page size differs from the hardware page size). Use the
878 	 * ptd_get_index() function to get the correct index for a specific page
879 	 * table.
880 	 */
881 
882 	/**
883 	 * The first address of the virtual address space this page table is
884 	 * translating for, or a value set by an IOMMU driver if this PTD is being
885 	 * used to track an IOMMU page.
886 	 */
887 	vm_offset_t va[PT_INDEX_MAX];
888 
889 	/**
890 	 * ptd_info_t's are allocated separately so as to reduce false sharing
891 	 * with the va field. This is desirable because ptd_info_t's are updated
892 	 * atomically from all CPUs.
893 	 */
894 	ptd_info_t *ptd_info;
895 } pt_desc_t;
896 
897 /**
898  * Convert a pv_head_table entry/pointer into a page table descriptor pointer.
899  * This should only be done if the type of this entry is PVH_TYPE_PTDP.
900  *
901  * @param pvh The pv_head_table entry/pointer to convert into a safe to
902  *            dereference pt_desc_t*.
903  *
904  * @return Return back a safe to derefence pointer to the page table descriptor
905  *         for this physical page by masking off the TYPE bits and adding any
906  *         missing flags to the upper portion of the pointer.
907  */
908 static inline pt_desc_t*
pvh_ptd(pv_entry_t ** pvh)909 pvh_ptd(pv_entry_t **pvh)
910 {
911 	return (pt_desc_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
912 }
913 
914 /**
915  * Given an arbitrary page table entry, return back the page table descriptor
916  * (PTD) object for the page table that contains that entry.
917  *
918  * @param ptep Pointer to a PTE whose page table descriptor object to return.
919  *
920  * @return The PTD object for the passed in page table.
921  */
922 static inline pt_desc_t *
ptep_get_ptd(const pt_entry_t * ptep)923 ptep_get_ptd(const pt_entry_t *ptep)
924 {
925 	assert(ptep != NULL);
926 
927 	const vm_offset_t pt_base_va = (vm_offset_t)ptep;
928 	pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop(pt_base_va)));
929 
930 	if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
931 		panic("%s: invalid PV head 0x%llx for PTE %p", __func__, (uint64_t)(*pvh), ptep);
932 	}
933 
934 	return pvh_ptd(pvh);
935 }
936 
937 /**
938  * Given an arbitrary page table entry, return back the pmap that owns that
939  * page table.
940  *
941  * @note This won't work correctly for page tables owned by IOMMUs, because
942  *       those table aren't owned by any specific pmap.
943  *
944  * @param ptep Pointer to a page table entry whose owner we're trying to return.
945  *
946  * @return The pmap that owns the given page table entry.
947  */
948 static inline struct pmap *
ptep_get_pmap(const pt_entry_t * ptep)949 ptep_get_pmap(const pt_entry_t *ptep)
950 {
951 	return ptep_get_ptd(ptep)->pmap;
952 }
953 
954 
955 /**
956  * Given an arbitrary translation table entry, get the page table descriptor
957  * (PTD) object for the page table pointed to by the TTE.
958  *
959  * @param tte The translation table entry to parse. For instance, if this is an
960  *            L2 TTE, then the PTD for the L3 table this entry points to will be
961  *            returned.
962  *
963  * @return The page table descriptor (PTD) for the page table pointed to by this
964  *         TTE.
965  */
966 static inline pt_desc_t *
tte_get_ptd(const tt_entry_t tte)967 tte_get_ptd(const tt_entry_t tte)
968 {
969 	const vm_offset_t pt_base_va = (vm_offset_t)(tte & ~PAGE_MASK);
970 	pv_entry_t **pvh = pai_to_pvh(pa_index(pt_base_va));
971 
972 	if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
973 		panic("%s: invalid PV head 0x%llx for TTE 0x%llx", __func__, (uint64_t)(*pvh), (uint64_t)tte);
974 	}
975 
976 	return pvh_ptd(pvh);
977 }
978 
979 /**
980  * In address spaces where the VM page size doesn't match the underlying
981  * hardware page size, one PTD could represent multiple page tables. This
982  * function returns the correct index value depending on which page table is
983  * being accessed. That index value can then be used to access the
984  * per-page-table properties stored within a PTD.
985  *
986  * @note See the description above the PT_INDEX_MAX definition for a more
987  *       detailed explanation of why multiple page tables can be represented
988  *       by a single PTD object in the pv_head_table.
989  *
990  * @param ptd The page table descriptor that's being accessed.
991  * @param ttep Pointer to the translation table entry that's being accessed.
992  *
993  * @return The correct index value for a specific, hardware-sized page
994  *         table.
995  */
996 static inline unsigned
ptd_get_index(__unused const pt_desc_t * ptd,__unused const tt_entry_t * ttep)997 ptd_get_index(__unused const pt_desc_t *ptd, __unused const tt_entry_t *ttep)
998 {
999 #if PT_INDEX_MAX == 1
1000 	return 0;
1001 #else
1002 	assert(ptd != NULL);
1003 
1004 	const uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1005 	const vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1006 
1007 	/**
1008 	 * Use the difference between the VM page shift and the hardware page shift
1009 	 * to get the index of the correct page table. In practice, this equates to
1010 	 * masking out the bottom two bits of the L3 table index in address spaces
1011 	 * where the VM page size is greater than the hardware page size. In address
1012 	 * spaces where they're identical, the index will always be zero.
1013 	 */
1014 	const unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1015 	assert(ttep_index < PT_INDEX_MAX);
1016 
1017 	return ttep_index;
1018 #endif
1019 }
1020 
1021 /**
1022  * In address spaces where the VM page size doesn't match the underlying
1023  * hardware page size, one PTD could represent multiple page tables. This
1024  * function returns the correct ptd_info_t structure depending on which page
1025  * table is being accessed.
1026  *
1027  * @note See the description above the PT_INDEX_MAX definition for a more
1028  *       detailed explanation of why multiple page tables can be represented
1029  *       by a single PTD object in the pv_head_table.
1030  *
1031  * @param ptd The page table descriptor that's being accessed.
1032  * @param ttep Pointer to the translation table entry that's being accessed.
1033  *
1034  * @return The correct ptd_info_t structure for a specific, hardware-sized page
1035  *         table.
1036  */
1037 static inline ptd_info_t *
ptd_get_info(pt_desc_t * ptd,const tt_entry_t * ttep)1038 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1039 {
1040 	assert((ptd != NULL) && (ptd->ptd_info[0].refcnt < PT_DESC_IOMMU_GRANTED_REFCOUNT));
1041 
1042 	return &ptd->ptd_info[ptd_get_index(ptd, ttep)];
1043 }
1044 
1045 /**
1046  * Given a pointer to a page table entry, return back the ptd_info structure
1047  * for the page table that contains that entry.
1048  *
1049  * @param ptep Pointer to a PTE whose ptd_info object to return.
1050  *
1051  * @return The ptd_info object for the page table that contains the passed in
1052  *         page table entry.
1053  */
1054 static inline ptd_info_t *
ptep_get_info(const pt_entry_t * ptep)1055 ptep_get_info(const pt_entry_t *ptep)
1056 {
1057 	return ptd_get_info(ptep_get_ptd(ptep), ptep);
1058 }
1059 
1060 /**
1061  * Return the virtual address mapped by the passed in leaf page table entry,
1062  * using an already-retrieved pagetable descriptor.
1063  *
1064  * @param ptdp pointer to the descriptor for the pagetable containing ptep
1065  * @param ptep Pointer to a PTE to parse
1066  */
1067 static inline vm_map_address_t
ptd_get_va(const pt_desc_t * ptdp,const pt_entry_t * ptep)1068 ptd_get_va(const pt_desc_t *ptdp, const pt_entry_t *ptep)
1069 {
1070 	const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptdp->pmap);
1071 
1072 	vm_map_address_t va = ptdp->va[ptd_get_index(ptdp, ptep)];
1073 	vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1074 
1075 	va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1076 
1077 	return va;
1078 }
1079 
1080 /**
1081  * Return the virtual address that is being mapped by the passed in leaf page
1082  * table entry.
1083  *
1084  * @param ptep Pointer to a PTE to parse.
1085  */
1086 static inline vm_map_address_t
ptep_get_va(const pt_entry_t * ptep)1087 ptep_get_va(const pt_entry_t *ptep)
1088 {
1089 	return ptd_get_va(ptep_get_ptd(ptep), ptep);
1090 }
1091 
1092 /**
1093  * Physical Page Attribute Table (pp_attr_table) defines and helper functions.
1094  */
1095 
1096 /* How many bits to use for flags on a per-VM-page basis. */
1097 typedef uint16_t pp_attr_t;
1098 
1099 /* See the definition of pp_attr_table for more information. */
1100 extern volatile pp_attr_t* pp_attr_table;
1101 
1102 /**
1103  * Flags stored in the pp_attr_table on a per-physical-page basis.
1104  *
1105  * Please update the pv_walk LLDB macro if these flags are changed or added to.
1106  */
1107 
1108 /**
1109  * The bottom 6-bits are used to store the default WIMG (cacheability and memory
1110  * type) setting for this physical page. This can be changed by calling
1111  * pmap_set_cache_attributes().
1112  *
1113  * If a default WIMG setting isn't set for a page, then the default is Normal,
1114  * Cached memory (VM_WIMG_DEFAULT).
1115  */
1116 #define PP_ATTR_WIMG_MASK 0x003F
1117 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1118 
1119 /**
1120  * The reference and modify bits keep track of whether a page has been accessed
1121  * or modified since the last time the bits were cleared. These bits are used to
1122  * enforce policy decisions in the VM layer.
1123  */
1124 #define PP_ATTR_REFERENCED 0x0040
1125 #define PP_ATTR_MODIFIED   0x0080
1126 
1127 /**
1128  * This physical page is being used as anonymous memory that's internally
1129  * managed by the VM and is not connected to an external pager. This flag is
1130  * only set/cleared on the first CPU mapping of a page (see PVH_FLAG_CPU). Any
1131  * subsequent mappings won't set/clear this flag until all mappings are removed
1132  * and a new CPU mapping is added.
1133  */
1134 #define PP_ATTR_INTERNAL 0x0100
1135 
1136 /**
1137  * This flag is used to keep track of pages that are still resident but are not
1138  * considered dirty and can be reclaimed under memory pressure. These pages do
1139  * not count as a part of the memory footprint, so the footprint ledger does not
1140  * need to be updated for these pages. This is hinted to the VM by the
1141  * `madvise(MADV_FREE_REUSABLE)` system call.
1142  */
1143 #define PP_ATTR_REUSABLE 0x0200
1144 
1145 /**
1146  * This flag denotes that a page is utilizing "alternate accounting". This means
1147  * that the pmap doesn't need to keep track of these pages with regards to the
1148  * footprint ledger because the VM is already accounting for them in a different
1149  * way. These include IOKit mappings (VM adds their entire virtual size to the
1150  * footprint), and purgeable pages (VM counts them only when non-volatile and
1151  * only for one "owner"), among others.
1152  *
1153  * Note that alternate accounting status is tracked on a per-mapping basis (not
1154  * per-page). Because of that the ALTACCT flag in the pp_attr_table is only used
1155  * when there's a single mapping to a page. When there are multiple mappings,
1156  * the status of this flag is tracked in the pv_head_table (see PVE_PTEP_ALTACCT
1157  * above).
1158  */
1159 #define PP_ATTR_ALTACCT 0x0400
1160 
1161 /**
1162  * This bit was originally used on x86 to keep track of what pages to not
1163  * encrypt during the hibernation process as a performance optimization when
1164  * encryption was done in software. This doesn't apply to the ARM
1165  * hibernation process because all pages are automatically encrypted using
1166  * hardware acceleration. Despite that, the pmap still keeps track of this flag
1167  * as a debugging aid on internal builds.
1168  *
1169  * TODO: This bit can probably be reclaimed:
1170  * rdar://70740650 (PMAP Cleanup: Potentially reclaim the PP_ATTR_NOENCRYPT bit on ARM)
1171  */
1172 #define PP_ATTR_NOENCRYPT 0x0800
1173 
1174 /**
1175  * These bits denote that a physical page is expecting the next access or
1176  * modification to set the PP_ATTR_REFERENCED and PP_ATTR_MODIFIED flags
1177  * respectively.
1178  */
1179 #define PP_ATTR_REFFAULT 0x1000
1180 #define PP_ATTR_MODFAULT 0x2000
1181 
1182 #if XNU_MONITOR
1183 /**
1184  * Denotes that a page is owned by the PPL. This is modified/checked with the
1185  * PVH lock held, to avoid ownership related races. This does not need to be a
1186  * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1187  * put the bit.
1188  */
1189 #define PP_ATTR_MONITOR 0x4000
1190 
1191 /**
1192  * Denotes that a page *cannot* be owned by the PPL. This is required in order
1193  * to temporarily 'pin' kernel pages that are used to store PPL output
1194  * parameters. Otherwise a malicious or buggy caller could pass PPL-owned memory
1195  * for these parameters and in so doing stage a write gadget against the PPL.
1196  */
1197 #define PP_ATTR_NO_MONITOR 0x8000
1198 
1199 /**
1200  * All of the bits owned by the PPL; kernel requests to set or clear these bits
1201  * are illegal.
1202  */
1203 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1204 #endif /* XNU_MONITOR */
1205 
1206 /**
1207  * Atomically set some flags in a pp_attr_table entry.
1208  *
1209  * @param pai The physical address index for the entry to update.
1210  * @param bits The flags to set in the entry.
1211  */
1212 static inline void
ppattr_set_bits(unsigned int pai,pp_attr_t bits)1213 ppattr_set_bits(unsigned int pai, pp_attr_t bits)
1214 {
1215 	volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1216 	os_atomic_or(ppattr, bits, acq_rel);
1217 }
1218 
1219 /**
1220  * Atomically clear some flags in a pp_attr_table entry.
1221  *
1222  * @param pai The physical address index for the entry to update.
1223  * @param bits The flags to clear in the entry.
1224  */
1225 static inline void
ppattr_clear_bits(unsigned int pai,pp_attr_t bits)1226 ppattr_clear_bits(unsigned int pai, pp_attr_t bits)
1227 {
1228 	volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1229 	os_atomic_andnot(ppattr, bits, acq_rel);
1230 }
1231 
1232 /**
1233  * Return true if the pp_attr_table entry contains the passed in bits.
1234  *
1235  * @param pai The physical address index for the entry to test.
1236  * @param bits The flags to check for.
1237  */
1238 static inline bool
ppattr_test_bits(unsigned int pai,pp_attr_t bits)1239 ppattr_test_bits(unsigned int pai, pp_attr_t bits)
1240 {
1241 	const volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1242 	return (*ppattr & bits) == bits;
1243 }
1244 
1245 /**
1246  * Only set some flags in a pp_attr_table entry if the passed in physical
1247  * address is a kernel-managed address.
1248  *
1249  * @param pa The physical address for the entry to update.
1250  * @param bits The flags to set in the entry.
1251  */
1252 static inline void
ppattr_pa_set_bits(pmap_paddr_t pa,pp_attr_t bits)1253 ppattr_pa_set_bits(pmap_paddr_t pa, pp_attr_t bits)
1254 {
1255 	if (pa_valid(pa)) {
1256 		ppattr_set_bits(pa_index(pa), bits);
1257 	}
1258 }
1259 
1260 /**
1261  * Only clear some flags in a pp_attr_table entry if the passed in physical
1262  * address is a kernel-managed address.
1263  *
1264  * @param pa The physical address for the entry to update.
1265  * @param bits The flags to clear in the entry.
1266  */
1267 static inline void
ppattr_pa_clear_bits(pmap_paddr_t pa,pp_attr_t bits)1268 ppattr_pa_clear_bits(pmap_paddr_t pa, pp_attr_t bits)
1269 {
1270 	if (pa_valid(pa)) {
1271 		ppattr_clear_bits(pa_index(pa), bits);
1272 	}
1273 }
1274 
1275 /**
1276  * Only test flags in a pp_attr_table entry if the passed in physical address
1277  * is a kernel-managed page.
1278  *
1279  * @param pa The physical address for the entry to test.
1280  * @param bits The flags to check for.
1281  *
1282  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1283  *         depending on whether the bits are set.
1284  */
1285 static inline bool
ppattr_pa_test_bits(pmap_paddr_t pa,pp_attr_t bits)1286 ppattr_pa_test_bits(pmap_paddr_t pa, pp_attr_t bits)
1287 {
1288 	return pa_valid(pa) ? ppattr_test_bits(pa_index(pa), bits) : false;
1289 }
1290 
1291 /**
1292  * Set the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the passed
1293  * in physical address is a kernel-managed page.
1294  *
1295  * @param pa The physical address for the entry to update.
1296  */
1297 static inline void
ppattr_pa_set_modify(pmap_paddr_t pa)1298 ppattr_pa_set_modify(pmap_paddr_t pa)
1299 {
1300 	ppattr_pa_set_bits(pa, PP_ATTR_MODIFIED);
1301 }
1302 
1303 /**
1304  * Clear the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the
1305  * passed in physical address is a kernel-managed page.
1306  *
1307  * @param pa The physical address for the entry to update.
1308  */
1309 static inline void
ppattr_pa_clear_modify(pmap_paddr_t pa)1310 ppattr_pa_clear_modify(pmap_paddr_t pa)
1311 {
1312 	ppattr_pa_clear_bits(pa, PP_ATTR_MODIFIED);
1313 }
1314 
1315 /**
1316  * Set the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1317  * passed in physical address is a kernel-managed page.
1318  *
1319  * @param pa The physical address for the entry to update.
1320  */
1321 static inline void
ppattr_pa_set_reference(pmap_paddr_t pa)1322 ppattr_pa_set_reference(pmap_paddr_t pa)
1323 {
1324 	ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
1325 }
1326 
1327 /**
1328  * Clear the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1329  * passed in physical address is a kernel-managed page.
1330  *
1331  * @param pa The physical address for the entry to update.
1332  */
1333 static inline void
ppattr_pa_clear_reference(pmap_paddr_t pa)1334 ppattr_pa_clear_reference(pmap_paddr_t pa)
1335 {
1336 	ppattr_pa_clear_bits(pa, PP_ATTR_REFERENCED);
1337 }
1338 
1339 #if XNU_MONITOR
1340 
1341 /**
1342  * Set the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the passed
1343  * in physical address is a kernel-managed page.
1344  *
1345  * @param pa The physical address for the entry to update.
1346  */
1347 static inline void
ppattr_pa_set_monitor(pmap_paddr_t pa)1348 ppattr_pa_set_monitor(pmap_paddr_t pa)
1349 {
1350 	ppattr_pa_set_bits(pa, PP_ATTR_MONITOR);
1351 }
1352 
1353 /**
1354  * Clear the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the
1355  * passed in physical address is a kernel-managed page.
1356  *
1357  * @param pa The physical address for the entry to update.
1358  */
1359 static inline void
ppattr_pa_clear_monitor(pmap_paddr_t pa)1360 ppattr_pa_clear_monitor(pmap_paddr_t pa)
1361 {
1362 	ppattr_pa_clear_bits(pa, PP_ATTR_MONITOR);
1363 }
1364 
1365 /**
1366  * Only test for the PP_ATTR_MONITOR flag in a pp_attr_table entry if the passed
1367  * in physical address is a kernel-managed page.
1368  *
1369  * @param pa The physical address for the entry to test.
1370  *
1371  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1372  *         depending on whether the PP_ATTR_MONITOR is set.
1373  */
1374 static inline bool
ppattr_pa_test_monitor(pmap_paddr_t pa)1375 ppattr_pa_test_monitor(pmap_paddr_t pa)
1376 {
1377 	return ppattr_pa_test_bits(pa, PP_ATTR_MONITOR);
1378 }
1379 
1380 /**
1381  * Set the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1382  * passed in physical address is a kernel-managed page.
1383  *
1384  * @param pa The physical address for the entry to update.
1385  */
1386 static inline void
ppattr_pa_set_no_monitor(pmap_paddr_t pa)1387 ppattr_pa_set_no_monitor(pmap_paddr_t pa)
1388 {
1389 	ppattr_pa_set_bits(pa, PP_ATTR_NO_MONITOR);
1390 }
1391 
1392 /**
1393  * Clear the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1394  * passed in physical address is a kernel-managed page.
1395  *
1396  * @param pa The physical address for the entry to update.
1397  */
1398 static inline void
ppattr_pa_clear_no_monitor(pmap_paddr_t pa)1399 ppattr_pa_clear_no_monitor(pmap_paddr_t pa)
1400 {
1401 	ppattr_pa_clear_bits(pa, PP_ATTR_NO_MONITOR);
1402 }
1403 
1404 /**
1405  * Only test for the PP_ATTR_NO_MONITOR flag in a pp_attr_table entry if the
1406  * passed in physical address is a kernel-managed page.
1407  *
1408  * @param pa The physical address for the entry to test.
1409  *
1410  * @return False if the PA isn't a kernel-managed page, otherwise true/false
1411  *         depending on whether the PP_ATTR_NO_MONITOR is set.
1412  */
1413 static inline bool
ppattr_pa_test_no_monitor(pmap_paddr_t pa)1414 ppattr_pa_test_no_monitor(pmap_paddr_t pa)
1415 {
1416 	return ppattr_pa_test_bits(pa, PP_ATTR_NO_MONITOR);
1417 }
1418 
1419 #endif /* XNU_MONITOR */
1420 
1421 /**
1422  * Set the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1423  *
1424  * @param pai The physical address index for the entry to update.
1425  */
1426 static inline void
ppattr_set_internal(unsigned int pai)1427 ppattr_set_internal(unsigned int pai)
1428 {
1429 	ppattr_set_bits(pai, PP_ATTR_INTERNAL);
1430 }
1431 
1432 /**
1433  * Clear the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1434  *
1435  * @param pai The physical address index for the entry to update.
1436  */
1437 static inline void
ppattr_clear_internal(unsigned int pai)1438 ppattr_clear_internal(unsigned int pai)
1439 {
1440 	ppattr_clear_bits(pai, PP_ATTR_INTERNAL);
1441 }
1442 
1443 /**
1444  * Return true if the pp_attr_table entry has the PP_ATTR_INTERNAL flag set.
1445  *
1446  * @param pai The physical address index for the entry to test.
1447  */
1448 static inline bool
ppattr_test_internal(unsigned int pai)1449 ppattr_test_internal(unsigned int pai)
1450 {
1451 	return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1452 }
1453 
1454 /**
1455  * Set the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1456  *
1457  * @param pai The physical address index for the entry to update.
1458  */
1459 static inline void
ppattr_set_reusable(unsigned int pai)1460 ppattr_set_reusable(unsigned int pai)
1461 {
1462 	ppattr_set_bits(pai, PP_ATTR_REUSABLE);
1463 }
1464 
1465 /**
1466  * Clear the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1467  *
1468  * @param pai The physical address index for the entry to update.
1469  */
1470 static inline void
ppattr_clear_reusable(unsigned int pai)1471 ppattr_clear_reusable(unsigned int pai)
1472 {
1473 	ppattr_clear_bits(pai, PP_ATTR_REUSABLE);
1474 }
1475 
1476 /**
1477  * Return true if the pp_attr_table entry has the PP_ATTR_REUSABLE flag set.
1478  *
1479  * @param pai The physical address index for the entry to test.
1480  */
1481 static inline bool
ppattr_test_reusable(unsigned int pai)1482 ppattr_test_reusable(unsigned int pai)
1483 {
1484 	return ppattr_test_bits(pai, PP_ATTR_REUSABLE);
1485 }
1486 
1487 /**
1488  * Set the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1489  *
1490  * @note This is only valid when the ALTACCT flag is being tracked using the
1491  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1492  *       PP_ATTR_ALTACCT definitions for more information.
1493  *
1494  * @param pai The physical address index for the entry to update.
1495  */
1496 static inline void
ppattr_set_altacct(unsigned int pai)1497 ppattr_set_altacct(unsigned int pai)
1498 {
1499 	ppattr_set_bits(pai, PP_ATTR_ALTACCT);
1500 }
1501 
1502 /**
1503  * Clear the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1504  *
1505  * @note This is only valid when the ALTACCT flag is being tracked using the
1506  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1507  *       PP_ATTR_ALTACCT definitions for more information.
1508  *
1509  * @param pai The physical address index for the entry to update.
1510  */
1511 static inline void
ppattr_clear_altacct(unsigned int pai)1512 ppattr_clear_altacct(unsigned int pai)
1513 {
1514 	ppattr_clear_bits(pai, PP_ATTR_ALTACCT);
1515 }
1516 
1517 /**
1518  * Get the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1519  *
1520  * @note This is only valid when the ALTACCT flag is being tracked using the
1521  *       pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1522  *       PP_ATTR_ALTACCT definitions for more information.
1523  *
1524  * @param pai The physical address index for the entry to test.
1525  *
1526  * @return True if the passed in page uses alternate accounting, false
1527  *         otherwise.
1528  */
1529 static inline bool
ppattr_is_altacct(unsigned int pai)1530 ppattr_is_altacct(unsigned int pai)
1531 {
1532 	return ppattr_test_bits(pai, PP_ATTR_ALTACCT);
1533 }
1534 /**
1535  * Get the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1536  *
1537  * @note This is only valid when the INTERNAL flag is being tracked using the
1538  *       pp_attr_table. See the descriptions above the PVE_PTEP_INTERNAL and
1539  *       PP_ATTR_INTERNAL definitions for more information.
1540  *
1541  * @param pai The physical address index for the entry to test.
1542  *
1543  * @return True if the passed in page is accounted for as "internal", false
1544  *         otherwise.
1545  */
1546 static inline bool
ppattr_is_internal(unsigned int pai)1547 ppattr_is_internal(unsigned int pai)
1548 {
1549 	return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1550 }
1551 
1552 /**
1553  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1554  * depending on whether there are one or multiple mappings to a page. This
1555  * function abstracts out the difference between single and multiple mappings to
1556  * a page and provides a single function for determining whether alternate
1557  * accounting is set for a mapping.
1558  *
1559  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1560  *       definitions for more information.
1561  *
1562  * @param pai The physical address index for the entry to test.
1563  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1564  * @param idx Index of the chosen PTE pointer inside the PVE.
1565  *
1566  * @return True if the passed in page uses alternate accounting, false
1567  *         otherwise.
1568  */
1569 static inline bool
ppattr_pve_is_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1570 ppattr_pve_is_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1571 {
1572 	return (pvep == PV_ENTRY_NULL) ? ppattr_is_altacct(pai) : pve_get_altacct(pvep, idx);
1573 }
1574 /**
1575  * The "internal" (INTERNAL) status for a page is tracked differently
1576  * depending on whether there are one or multiple mappings to a page. This
1577  * function abstracts out the difference between single and multiple mappings to
1578  * a page and provides a single function for determining whether "internal"
1579  * is set for a mapping.
1580  *
1581  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1582  *       definitions for more information.
1583  *
1584  * @param pai The physical address index for the entry to test.
1585  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1586  * @param idx Index of the chosen PTE pointer inside the PVE.
1587  *
1588  * @return True if the passed in page is "internal", false otherwise.
1589  */
1590 static inline bool
ppattr_pve_is_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1591 ppattr_pve_is_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1592 {
1593 	return (pvep == PV_ENTRY_NULL) ? ppattr_is_internal(pai) : pve_get_internal(pvep, idx);
1594 }
1595 
1596 /**
1597  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1598  * depending on whether there are one or multiple mappings to a page. This
1599  * function abstracts out the difference between single and multiple mappings to
1600  * a page and provides a single function for setting the alternate accounting status
1601  * for a mapping.
1602  *
1603  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1604  *       definitions for more information.
1605  *
1606  * @param pai The physical address index for the entry to update.
1607  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1608  * @param idx Index of the chosen PTE pointer inside the PVE.
1609  */
1610 static inline void
ppattr_pve_set_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1611 ppattr_pve_set_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1612 {
1613 	if (pvep == PV_ENTRY_NULL) {
1614 		ppattr_set_altacct(pai);
1615 	} else {
1616 		pve_set_altacct(pvep, idx);
1617 	}
1618 }
1619 /**
1620  * The "internal" (INTERNAL) status for a page is tracked differently
1621  * depending on whether there are one or multiple mappings to a page. This
1622  * function abstracts out the difference between single and multiple mappings to
1623  * a page and provides a single function for setting the "internal" status
1624  * for a mapping.
1625  *
1626  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1627  *       definitions for more information.
1628  *
1629  * @param pai The physical address index for the entry to update.
1630  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1631  * @param idx Index of the chosen PTE pointer inside the PVE.
1632  */
1633 static inline void
ppattr_pve_set_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1634 ppattr_pve_set_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1635 {
1636 	if (pvep == PV_ENTRY_NULL) {
1637 		ppattr_set_internal(pai);
1638 	} else {
1639 		pve_set_internal(pvep, idx);
1640 	}
1641 }
1642 
1643 /**
1644  * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1645  * depending on whether there are one or multiple mappings to a page. This
1646  * function abstracts out the difference between single and multiple mappings to
1647  * a page and provides a single function for clearing the alternate accounting status
1648  * for a mapping.
1649  *
1650  * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1651  *       definitions for more information.
1652  *
1653  * @param pai The physical address index for the entry to update.
1654  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1655  * @param idx Index of the chosen PTE pointer inside the PVE.
1656  */
1657 static inline void
ppattr_pve_clr_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1658 ppattr_pve_clr_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1659 {
1660 	if (pvep == PV_ENTRY_NULL) {
1661 		ppattr_clear_altacct(pai);
1662 	} else {
1663 		pve_clr_altacct(pvep, idx);
1664 	}
1665 }
1666 /**
1667  * The "internal" (INTERNAL) status for a page is tracked differently
1668  * depending on whether there are one or multiple mappings to a page. This
1669  * function abstracts out the difference between single and multiple mappings to
1670  * a page and provides a single function for clearing the "internal" status
1671  * for a mapping.
1672  *
1673  * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1674  *       definitions for more information.
1675  *
1676  * @param pai The physical address index for the entry to update.
1677  * @param pvep Pointer to the pv_entry_t object containing that mapping.
1678  * @param idx Index of the chosen PTE pointer inside the PVE.
1679  */
1680 static inline void
ppattr_pve_clr_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1681 ppattr_pve_clr_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1682 {
1683 	if (pvep == PV_ENTRY_NULL) {
1684 		ppattr_clear_internal(pai);
1685 	} else {
1686 		pve_clr_internal(pvep, idx);
1687 	}
1688 }
1689 
1690 /**
1691  * Set the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1692  *
1693  * @param pai The physical address index for the entry to update.
1694  */
1695 static inline void
ppattr_set_reffault(unsigned int pai)1696 ppattr_set_reffault(unsigned int pai)
1697 {
1698 	ppattr_set_bits(pai, PP_ATTR_REFFAULT);
1699 }
1700 
1701 /**
1702  * Clear the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1703  *
1704  * @param pai The physical address index for the entry to update.
1705  */
1706 static inline void
ppattr_clear_reffault(unsigned int pai)1707 ppattr_clear_reffault(unsigned int pai)
1708 {
1709 	ppattr_clear_bits(pai, PP_ATTR_REFFAULT);
1710 }
1711 
1712 /**
1713  * Return true if the pp_attr_table entry has the PP_ATTR_REFFAULT flag set.
1714  *
1715  * @param pai The physical address index for the entry to test.
1716  */
1717 static inline bool
ppattr_test_reffault(unsigned int pai)1718 ppattr_test_reffault(unsigned int pai)
1719 {
1720 	return ppattr_test_bits(pai, PP_ATTR_REFFAULT);
1721 }
1722 
1723 /**
1724  * Set the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1725  *
1726  * @param pai The physical address index for the entry to update.
1727  */
1728 static inline void
ppattr_set_modfault(unsigned int pai)1729 ppattr_set_modfault(unsigned int pai)
1730 {
1731 	ppattr_set_bits(pai, PP_ATTR_MODFAULT);
1732 }
1733 
1734 /**
1735  * Clear the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1736  *
1737  * @param pai The physical address index for the entry to update.
1738  */
1739 static inline void
ppattr_clear_modfault(unsigned int pai)1740 ppattr_clear_modfault(unsigned int pai)
1741 {
1742 	ppattr_clear_bits(pai, PP_ATTR_MODFAULT);
1743 }
1744 
1745 /**
1746  * Return true if the pp_attr_table entry has the PP_ATTR_MODFAULT flag set.
1747  *
1748  * @param pai The physical address index for the entry to test.
1749  */
1750 static inline bool
ppattr_test_modfault(unsigned int pai)1751 ppattr_test_modfault(unsigned int pai)
1752 {
1753 	return ppattr_test_bits(pai, PP_ATTR_MODFAULT);
1754 }
1755 
1756 /**
1757  * The minimum number of pages to keep in the PPL page free list.
1758  *
1759  * We define our target as 8 pages: enough for 2 page table pages, a PTD page,
1760  * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1761  * a single pmap_enter request.
1762  */
1763 #define PMAP_MIN_FREE_PPL_PAGES 8
1764 
1765 /**
1766  * Flags passed to various page allocation functions, usually accessed through
1767  * the pmap_pages_alloc_zeroed() API. Each function that can take these flags as
1768  * a part of its option field, will describe these flags in its function header.
1769  */
1770 
1771 /**
1772  * Instruct the allocation function to return immediately if no pages are
1773  * current available. Without this flag, the function will spin and wait for a
1774  * page to become available. This flag can be required in some circumstances
1775  * (for instance, when allocating pages from within the PPL).
1776  */
1777 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1778 
1779 /**
1780  * Instructs an allocation function to fallback to reclaiming a userspace page
1781  * table if it failed to allocate a page from the free lists. This can be useful
1782  * when allocating from within the PPL because refilling the free lists requires
1783  * exiting and re-entering the PPL (which incurs extra latency).
1784  *
1785  * This is a quick way of allocating a page at the expense of having to
1786  * reallocate the table the next time one of its mappings is accessed.
1787  */
1788 #define PMAP_PAGE_RECLAIM_NOWAIT 0x2
1789 
1790 /**
1791  * Global variables exported to the rest of the internal pmap implementation.
1792  */
1793 #if XNU_MONITOR
1794 extern uint64_t pmap_ppl_free_page_count;
1795 extern pmap_paddr_t pmap_stacks_start_pa;
1796 extern pmap_paddr_t pmap_stacks_end_pa;
1797 extern pmap_paddr_t ppl_cpu_save_area_start;
1798 extern pmap_paddr_t ppl_cpu_save_area_end;
1799 #endif /* XNU_MONITOR */
1800 extern unsigned int inuse_pmap_pages_count;
1801 extern vm_object_t pmap_object;
1802 extern uint32_t pv_alloc_initial_target;
1803 extern uint32_t pv_kern_alloc_initial_target;
1804 
1805 /**
1806  * Functions exported to the rest of the internal pmap implementation.
1807  */
1808 extern void pmap_data_bootstrap(void);
1809 extern void pmap_enqueue_pages(vm_page_t);
1810 extern kern_return_t pmap_pages_alloc_zeroed(pmap_paddr_t *, unsigned, unsigned);
1811 extern void pmap_pages_free(pmap_paddr_t, unsigned);
1812 
1813 #if XNU_MONITOR
1814 
1815 extern void pmap_mark_page_as_ppl_page_internal(pmap_paddr_t, bool);
1816 extern void pmap_mark_page_as_ppl_page(pmap_paddr_t);
1817 extern void pmap_mark_page_as_kernel_page(pmap_paddr_t);
1818 extern pmap_paddr_t pmap_alloc_page_for_kern(unsigned int);
1819 extern void pmap_alloc_page_for_ppl(unsigned int);
1820 extern uint64_t pmap_release_ppl_pages_to_kernel(void);
1821 
1822 extern uint64_t pmap_ledger_validate(const volatile void *);
1823 void pmap_ledger_retain(ledger_t ledger);
1824 void pmap_ledger_release(ledger_t ledger);
1825 extern void pmap_ledger_check_balance(pmap_t pmap);
1826 
1827 kern_return_t pmap_alloc_pmap(pmap_t *pmap);
1828 void pmap_free_pmap(pmap_t pmap);
1829 
1830 #endif /* XNU_MONITOR */
1831 
1832 /**
1833  * The modes in which a pmap lock can be acquired. Note that shared access
1834  * doesn't necessarily mean "read-only". As long as data is atomically updated
1835  * correctly (to account for multi-cpu accesses) data can still get written with
1836  * a shared lock held. Care just needs to be taken so as to not introduce any
1837  * race conditions when there are multiple writers.
1838  *
1839  * This is here in pmap_data.h because it's a needed parameter for pv_alloc()
1840  * and pmap_enter_pv(). This header is always included in pmap_internal.h before
1841  * the rest of the pmap locking code is defined so there shouldn't be any issues
1842  * with missing types.
1843  */
1844 OS_ENUM(pmap_lock_mode, uint8_t,
1845     PMAP_LOCK_SHARED,
1846     PMAP_LOCK_EXCLUSIVE);
1847 
1848 /**
1849  * Possible return values for pv_alloc(). See the pv_alloc() function header for
1850  * a description of each of these values.
1851  */
1852 typedef enum {
1853 	PV_ALLOC_SUCCESS,
1854 	PV_ALLOC_RETRY,
1855 	PV_ALLOC_FAIL
1856 } pv_alloc_return_t;
1857 
1858 extern pv_alloc_return_t pv_alloc(
1859 	pmap_t, unsigned int, pmap_lock_mode_t, unsigned int, pv_entry_t **);
1860 extern void pv_free(pv_entry_t *);
1861 extern void pv_list_free(pv_entry_t *, pv_entry_t *, int);
1862 extern void pmap_compute_pv_targets(void);
1863 extern pv_alloc_return_t pmap_enter_pv(
1864 	pmap_t, pt_entry_t *, int, unsigned int, pmap_lock_mode_t, pv_entry_t **, int *new_pve_ptep_idx);
1865 extern void pmap_remove_pv(pmap_t, pt_entry_t *, int, bool, bool *, bool *);
1866 
1867 extern void ptd_bootstrap(pt_desc_t *, unsigned int);
1868 extern pt_desc_t *ptd_alloc_unlinked(void);
1869 extern pt_desc_t *ptd_alloc(pmap_t);
1870 extern void ptd_deallocate(pt_desc_t *);
1871 extern void ptd_info_init(
1872 	pt_desc_t *, pmap_t, vm_map_address_t, unsigned int, pt_entry_t *);
1873 
1874 extern kern_return_t pmap_ledger_credit(pmap_t, int, ledger_amount_t);
1875 extern kern_return_t pmap_ledger_debit(pmap_t, int, ledger_amount_t);
1876 
1877 extern void validate_pmap_internal(const volatile struct pmap *, const char *);
1878 extern void validate_pmap_mutable_internal(const volatile struct pmap *, const char *);
1879 
1880 /**
1881  * Macro function wrappers around pmap validation so that the calling function
1882  * can be printed in the panic strings for easier validation failure debugging.
1883  */
1884 #define validate_pmap(x) validate_pmap_internal(x, __func__)
1885 #define validate_pmap_mutable(x) validate_pmap_mutable_internal(x, __func__)
1886 
1887 /**
1888  * This structure describes a PPL-owned I/O range.
1889  *
1890  * @note This doesn't necessarily have to represent "I/O" only, this can also
1891  *       represent non-kernel-managed DRAM (e.g., iBoot carveouts). Any physical
1892  *       address region that isn't considered "kernel-managed" is fair game.
1893  *
1894  * @note The layout of this structure needs to map 1-to-1 with the pmap-io-range
1895  *       device tree nodes. Astris (through the LowGlobals) also depends on the
1896  *       consistency of this structure.
1897  */
1898 typedef struct pmap_io_range {
1899 	/* Physical address of the PPL-owned I/O range. */
1900 	uint64_t addr;
1901 
1902 	/**
1903 	 * Length (in bytes) of the PPL-owned I/O range. Has to be the size
1904 	 * of a page if the range will be refered to by pmap_io_filter_entries.
1905 	 */
1906 	uint64_t len;
1907 
1908 	/* Strong DSB required for pages in this range. */
1909 	#define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31)
1910 
1911 	/* Corresponds to memory carved out by bootloader. */
1912 	#define PMAP_IO_RANGE_CARVEOUT (1UL << 30)
1913 
1914 	/* Pages in this range need to be included in the hibernation image */
1915 	#define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29)
1916 
1917 	/* Mark the range as 'owned' by a given subsystem */
1918 	#define PMAP_IO_RANGE_OWNED (1UL << 28)
1919 
1920 	/**
1921 	 * Lower 16 bits treated as pp_attr_t, upper 16 bits contain additional
1922 	 * mapping flags (defined above).
1923 	 */
1924 	uint32_t wimg;
1925 
1926 	/**
1927 	 * 4 Character Code (4CC) describing what this range is.
1928 	 *
1929 	 * This has to be unique for each "type" of pages, meaning pages sharing
1930 	 * the same register layout, if it is used for the I/O filter descriptors
1931 	 * below. Otherwise it doesn't matter.
1932 	 */
1933 	uint32_t signature;
1934 } pmap_io_range_t;
1935 
1936 /* Reminder: be sure to change all relevant device trees if you change the layout of pmap_io_range_t */
1937 _Static_assert(sizeof(pmap_io_range_t) == 24, "unexpected size for pmap_io_range_t");
1938 
1939 extern pmap_io_range_t* pmap_find_io_attr(pmap_paddr_t);
1940 
1941 /**
1942  * This structure describes a sub-page-size I/O region owned by PPL but the kernel can write to.
1943  *
1944  * @note I/O filter software will use a collection of such data structures to determine access
1945  *       permissions to a page owned by PPL.
1946  *
1947  * @note The {signature, offset} key is used to index a collection of such data structures to
1948  *       optimize for space in the case where one page layout is repeated for many devices, such
1949  *       as the memory controller channels.
1950  */
1951 typedef struct pmap_io_filter_entry {
1952 	/* 4 Character Code (4CC) describing what this range (page) is. */
1953 	uint32_t signature;
1954 
1955 	/* Offset within the page. It has to be within [0, PAGE_SIZE). */
1956 	uint16_t offset;
1957 
1958 	/* Length of the range, and (offset + length) has to be within [0, PAGE_SIZE). */
1959 	uint16_t length;
1960 } pmap_io_filter_entry_t;
1961 
1962 _Static_assert(sizeof(pmap_io_filter_entry_t) == 8, "unexpected size for pmap_io_filter_entry_t");
1963 
1964 extern pmap_io_filter_entry_t *pmap_find_io_filter_entry(pmap_paddr_t, uint64_t, const pmap_io_range_t **);
1965 
1966 extern void pmap_cpu_data_init_internal(unsigned int);
1967 
1968 #endif /* _ARM_PMAP_PMAP_DATA_H_ */
1969