/* * Copyright (c) 2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /** * This header file is used to store the types, prototypes, and inline functions * that define some of the most important data structures used in the pmap. This * header is only meant for sharing types within the pmap; if a type is meant to * be used by the rest of the kernel, then put it into osfmk/arm/pmap.h. */ #ifndef _ARM_PMAP_PMAP_DATA_H_ #define _ARM_PMAP_PMAP_DATA_H_ #include #include #include #include #include #include #include #include /* Temporary include before moving all ledger functions into pmap_data.c */ #include /** * These headers are safe to be included in this file since they shouldn't rely * on any of the internal pmap header files (so no circular dependencies). */ #include #include /** * These values represent the first and last kernel-managed physical addresses. * We keep track of extra metadata on kernel-managed pages compared to other * pages (usually iBoot carved out memory or I/O). */ extern pmap_paddr_t vm_first_phys, vm_last_phys; /** * Return whether the given address represents a kernel-managed physical page. * * Whether a page is considered "kernel-managed" is determined by the BootArgs * passed by the bootloader. Typically memory carved out by the bootloader as * well as I/O memory should return false. * * @param pa The physical address to check. */ static inline bool pa_valid(pmap_paddr_t pa) { return (pa >= vm_first_phys) && (pa < vm_last_phys); } /** * The pmap has a variety of data structures (pv_head_table/pp_attr_table) that * contain an entry for every kernel-managed page in the system. These systems * are indexed with physical address indices ("pai") generated by this function. * * The logic is simple since there should be one entry in each of these data * structures for each kernel-managed physical page in the system. These data * structures are allocated on boot based on the amount of memory available. * * @note PAIs are defined using the VM page size, which might not be identical * to the underlying hardware page size for an arbitrary address space. * This means that the data structures relying on PAIs will contain one * entry for each VM page, not hardware page. * * @note This function is only valid for physical addresses that are * kernel-managed. */ static inline unsigned int pa_index(pmap_paddr_t pa) { return (unsigned int)atop(pa - vm_first_phys); } /* See the definition of pv_head_table for more information. */ extern pv_entry_t **pv_head_table; /* Represents a NULL entry in the pv_head_table. */ #define PV_ENTRY_NULL ((pv_entry_t *) 0) /** * Given a physical address index, return the corresponding pv_head_table entry. * * @note Despite returning a pointer to a pv_entry_t pointer, the entry might * actually be a different type of pointer (pt_entry_t or pt_desc_t) * depending on the type for this entry. Determine the type using * pvh_test_type(). * * @param pai The index returned by pa_index() for the page whose pv_head_table * entry should be retrieved. */ static inline pv_entry_t ** pai_to_pvh(unsigned int pai) { return &pv_head_table[pai]; } /** * Each pv_head_table entry can be one of four different types: * * - PVH_TYPE_NULL: No mappings to the physical page exist outside of the * physical aperture. Physical aperture mappings are not * tracked in the pv_head_table. * * - PVH_TYPE_PVEP: There are multiple mappings to the physical page. * These entries are linked lists of pv_entry_t objects (which * each contain a pointer to the associated PTE and a pointer * to the next entry in the list). * * - PVH_TYPE_PTEP: There is a single mapping to the physical page. Once more * mappings are created, this entry will get upgraded to an * entry of type PVH_TYPE_PVEP. These entries are pointers * directly to the page table entry that contain the mapping * (pt_entry_t*). * * - PVH_TYPE_PTDP: The physical page is being used as a page table. These * entries are pointers to page table descriptor structures * (pt_desc_t) which contain metadata related to each page * table. * * The type is stored in the bottom two bits of each pv_head_table entry. That * type needs to be checked before dereferencing the pointer to determine which * pointer type to dereference as. */ #define PVH_TYPE_NULL 0x0UL #define PVH_TYPE_PVEP 0x1UL #define PVH_TYPE_PTEP 0x2UL #define PVH_TYPE_PTDP 0x3UL #define PVH_TYPE_MASK (0x3UL) #if defined(__arm64__) /** * PV_HEAD_TABLE Flags. * * All flags listed below are stored in the pv_head_table entry/pointer * (per-physical-page) unless otherwise noted. * * Please update the pv_walk LLDB macro if these flags are changed or added to. */ /** * This flag is set for every mapping created by an IOMMU. * * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table * entry/pointer for single-PTE entries (PVH_TYPE_PTEP). */ #define PVH_FLAG_IOMMU 0x4UL /** * This flag is only valid when PVH_FLAG_IOMMU is set. For an IOMMU mapping, if * this bit is set, then the PTE pointer points directly into the IOMMU page * table for this mapping. If this bit is cleared, then the "PTE pointer" is * actually a pointer to the IOMMU descriptor object that owns this mapping. * * There are cases where it's not easy to tie an IOMMU mapping directly to a * specific page table, so this allows us to at least get a pointer to which * IOMMU created this mapping which is useful for debugging purposes. * * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table * entry/pointer for single-PTE entries (PVH_TYPE_PTEP). */ #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /** * This flag is set when the first CPU (non-IOMMU) mapping is created. This is * important to keep track of because various accounting statistics are based on * the options specified for the first CPU mapping. This flag, and thus the * accounting statistics, will persist as long as there *any* mappings of the * page (including IOMMU mappings). This works because the accounting for a page * should not need to change until the page is recycled by the VM layer, and we * double-check that there are no mappings (CPU or IOMMU) when a page is * recycled (see: pmap_verify_free()). */ #define PVH_FLAG_CPU (1ULL << 62) /* This bit is used as a lock when modifying a pv_head_table entry. */ #define PVH_LOCK_BIT 61 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT) /** * This flag is set when there are any executable mappings to this physical * page. This is used to prevent any writable mappings from being created at * the same time an executable mapping exists. */ #define PVH_FLAG_EXEC (1ULL << 60) /** * Marking a pv_head_table entry with this flag denotes that this page is a * kernelcache text or data page that shouldn't have dynamically-created * mappings. See PVH_FLAG_LOCKDOWN_MASK for more details. */ #define PVH_FLAG_LOCKDOWN_KC (1ULL << 59) /** * This flag is used to mark that a page has been hashed into the hibernation * image. * * The hibernation driver will use this to ensure that all PPL-owned memory is * correctly included into the hibernation image (a missing PPL page could be * a security concern when coming out of hibernation). */ #define PVH_FLAG_HASHED (1ULL << 58) /** * Marking a pv_head_table entry with this flag denotes that this page is a * code signature page that shouldn't have dynamically-created mappings. * See PVH_FLAG_LOCKDOWN_MASK for more details. */ #define PVH_FLAG_LOCKDOWN_CS (1ULL << 57) /** * Marking a pv_head_table entry with this flag denotes that this page is a * read-only allocator page that shouldn't have dynamically-created mappings. * See PVH_FLAG_LOCKDOWN_MASK for more details. */ #define PVH_FLAG_LOCKDOWN_RO (1ULL << 56) /** * Marking a pv_head_table entry with this flag denotes that this page is * retired without any mappings and never should be mapped again. */ #define PVH_FLAG_RETIRED (1ULL << 55) /** * Flags which disallow a new mapping to a page. */ #define PVH_FLAG_NOMAP_MASK (PVH_FLAG_RETIRED) /** * Marking a pv_head_table entry with this flag denotes that this page has * been mapped into a non-coherent coprocessor address space and requires a * cache flush operation once all mappings have been removed. */ #define PVH_FLAG_FLUSH_NEEDED (1ULL << 52) /** * Marking a pv_head_table entry with any bit in this mask denotes that this page * has been locked down by the PPL. Locked down pages can't have new mappings * created or existing mappings removed, and all existing mappings will have been * converted to read-only. This essentially makes the page immutable. */ #define PVH_FLAG_LOCKDOWN_MASK (PVH_FLAG_LOCKDOWN_KC | PVH_FLAG_LOCKDOWN_CS | PVH_FLAG_LOCKDOWN_RO) /** * These bits need to be set to safely dereference a pv_head_table * entry/pointer. * * Any change to this #define should also update the copy located in the pmap.py * LLDB macros file. */ #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN_MASK | \ PVH_FLAG_HASHED | PVH_FLAG_FLUSH_NEEDED | PVH_FLAG_RETIRED) #endif /* defined(__arm64__) */ /* Mask used to clear out the TYPE bits from a pv_head_table entry/pointer. */ #define PVH_LIST_MASK (~PVH_TYPE_MASK) /* Which 32-bit word in each pv_head_table entry/pointer contains the LOCK bit. */ #if defined(__arm64__) #define PVH_LOCK_WORD 1 /* Assumes little-endian */ #endif /* defined(__arm64__) */ /** * Assert that a pv_head_table entry is locked. Will panic if the lock isn't * acquired. * * @param index The physical address index to check. */ static inline void pvh_assert_locked(__assert_only unsigned int index) { assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); } /** * Lock a pv_head_table entry. * * @param index The physical address index of the pv_head_table entry to lock. */ static inline void pvh_lock(unsigned int index) { pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); } /** * Unlock a pv_head_table entry. * * @param index The physical address index of the pv_head_table entry to unlock. */ static inline void pvh_unlock(unsigned int index) { pvh_assert_locked(index); pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); } /** * Check that a pv_head_table entry/pointer is a specific type. * * @param pvh The pv_head_table entry/pointer to check. * @param type The type to check for. * * @return True if the pv_head_table entry is of the passed in type, false * otherwise. */ static inline bool pvh_test_type(pv_entry_t **pvh, vm_offset_t type) { return ((*(vm_offset_t *)pvh) & PVH_TYPE_MASK) == type; } /** * Convert a pv_head_table entry/pointer into a page table entry pointer. This * should only be done if the type of this entry is PVH_TYPE_PTEP. * * @param pvh The pv_head_table entry/pointer to convert into a pt_entry_t*. * * @return Return back a safe to derefence pointer to the single mapping of this * physical page by masking off the TYPE bits and adding any missing * flags to the upper portion of the pointer. */ static inline pt_entry_t* pvh_ptep(pv_entry_t **pvh) { return (pt_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS); } /** * Convert a pv_head_table entry/pointer into a PVE list pointer. This * should only be done if the type of this entry is PVH_TYPE_PVEP. * * @param pvh The pv_head_table entry/pointer to convert into a safe to * dereference pv_entry_t*. * * @return Return back a safe to derefence pointer to the first mapping of this * physical page by masking off the TYPE bits and adding any missing * flags to the upper portion of the pointer. */ static inline pv_entry_t* pvh_pve_list(pv_entry_t **pvh) { return (pv_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS); } /** * Return the flags associated with a pv_head_table entry/pointer. * * @param pvh The pv_head_table entry whose flags to get. */ static inline vm_offset_t pvh_get_flags(pv_entry_t **pvh) { return (*(vm_offset_t *)pvh) & PVH_HIGH_FLAGS; } /** * Atomically set the flags associated with a pv_head_table entry/pointer. * * @param pvh The pv_head_table entry whose flags are getting set. */ static inline void pvh_set_flags(pv_entry_t **pvh, vm_offset_t flags) { os_atomic_store((vm_offset_t *)pvh, ((*(vm_offset_t *)pvh) & ~PVH_HIGH_FLAGS) | flags, relaxed); } /** * Update a pv_head_table entry/pointer to be a different type and/or point to * a different object. * * @note The pv_head_table entry MUST already be locked. * * @note This function will clobber any existing flags stored in the PVH pointer * (except PVH_FLAG_LOCK). It's up to the caller to preserve flags if that * functionality is needed (either by ensuring `pvep` contains those * flags, or by manually setting the flags after this call). * * @param pvh The pv_head_table entry/pointer to update. * @param pvep The new entry to use. This could be either a pt_entry_t*, * pv_entry_t*, or pt_desc_t* depending on the type. * @param type The type of the new entry. */ static inline void pvh_update_head(pv_entry_t **pvh, void *pvep, unsigned int type) { assert((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK); os_atomic_store((vm_offset_t *)pvh, (vm_offset_t)pvep | type | PVH_FLAG_LOCK, relaxed); } /** * Update a pv_head_table entry/pointer to be a different type and/or point to * a different object. * * @note The pv_head_table entry CAN'T already be locked. * * @note This function will clobber any existing flags stored in the PVH * pointer. It's up to the caller to preserve flags if that functionality * is needed (either by ensuring `pvep` contains those flags, or by * manually setting the flags after this call). * * @param pvh The pv_head_table entry/pointer to update. * @param pvep The new entry to use. This could be either a pt_entry_t*, * pv_entry_t*, or pt_desc_t* depending on the type. * @param type The type of the new entry. */ static inline void pvh_update_head_unlocked(pv_entry_t **pvh, void *pvep, unsigned int type) { assert(!((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK)); *(vm_offset_t *)pvh = ((vm_offset_t)pvep | type) & ~PVH_FLAG_LOCK; } /** * Given a page table entry pointer retrieved from the pv_head_table (from an * entry of type PVH_TYPE_PTEP or PVH_TYPE_PVEP), return back whether the PTE is * an IOMMU mapping. * * @note The way this function determines whether the passed in pointer is * pointing to an IOMMU PTE, is by checking for a special flag stored in * the lower bits of the pointer. This flag is only set on pointers stored * in the pv_head_table, and as such, this function will only work on * pointers retrieved from the pv_head_table. If a pointer to a PTE was * directly retrieved from an IOMMU's page tables, this function would * always return false despite actually being an IOMMU PTE. * * @param ptep A PTE pointer obtained from the pv_head_table to check. * * @return True if the entry is an IOMMU mapping, false otherwise. */ static inline bool pvh_ptep_is_iommu(const pt_entry_t *ptep) { #ifdef PVH_FLAG_IOMMU return (vm_offset_t)ptep & PVH_FLAG_IOMMU; #else /* PVH_FLAG_IOMMU */ #pragma unused(ptep) return false; #endif /* PVH_FLAG_IOMMU */ } /** * Sometimes the PTE pointers retrieved from the pv_head_table (from an entry of * type PVH_TYPE_PTEP or PVH_TYPE_PVEP) contain flags themselves. This function * strips out those flags and returns back a dereferencable pointer. * * @param ptep The PTE pointer to strip out the unwanted flags. * * @return A valid dereferencable pointer to the page table entry. */ static inline const pt_entry_t* pvh_strip_ptep(const pt_entry_t *ptep) { #ifdef PVH_FLAG_IOMMU const vm_offset_t pte_va = (vm_offset_t)ptep; return (const pt_entry_t*)((pte_va & ~PVH_FLAG_IOMMU) | PVH_FLAG_IOMMU_TABLE); #else /* PVH_FLAG_IOMMU */ return ptep; #endif /* PVH_FLAG_IOMMU */ } /** * PVH_TYPE_PVEP Helper Functions. * * The following are methods used to manipulate PVE lists. This is the type of * pv_head_table entry used when there are multiple mappings to a single * physical page. */ /** * Whether a physical page is using "alternate accounting" (ALTACCT) for its * ledger statistics is something that needs to be tracked on a per-mapping * basis, not on a per-physical-page basis. Because of that, it's tracked * differently depending on whether there's a single mapping to a page * (PVH_TYPE_PTEP) or multiple (PVH_TYPE_PVEP). For single mappings, the bit is * tracked in the pp_attr_table. But when there are multiple mappings, the least * significant bit of the corresponding "pve_pte" pointer in each pv_entry object * is used as a marker for pages using alternate accounting. * * @note See the definition for PP_ATTR_ALTACCT for a more detailed description * of what "alternate accounting" actually means in respect to the * footprint ledger. * * Since some code (KernelDiskImages, e.g.) might map a phsyical page as * "device" memory (i.e. external) while it's also being used as regular * "anonymous" memory (i.e. internal) in user space, we have to manage the * "internal" attribute per mapping rather than per physical page. * When there are multiple mappings, we use the next least significant bit of * the corresponding "pve_pte" pointer for that. */ #define PVE_PTEP_ALTACCT ((uintptr_t) 0x1) #define PVE_PTEP_INTERNAL ((uintptr_t) 0x2) #define PVE_PTEP_FLAGS (PVE_PTEP_ALTACCT | PVE_PTEP_INTERNAL) /** * Set the ALTACCT bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void pve_set_altacct(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_ALTACCT); } /** * Set the INTERNAL bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void pve_set_internal(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_INTERNAL); } /** * Clear the ALTACCT bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void pve_clr_altacct(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_ALTACCT); } /** * Clear the INTERNAL bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void pve_clr_internal(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_INTERNAL); } /** * Return the ALTACCT bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline bool pve_get_altacct(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_ALTACCT; } /** * Return the INTERNAL bit for a specific PTE pointer. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline bool pve_get_internal(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_INTERNAL; } /** * Return the next mapping (pv_entry) in a linked list of mappings. This applies * to pv_head_table entries of type PVH_TYPE_PVEP. * * @param pvep A pointer to the current pv_entry mapping in the linked list of * mappings. * * @return The next virtual mapping for a physical page, or PV_ENTRY_NULL if the * end of the list has been reached. */ static inline pv_entry_t * pve_next(pv_entry_t *pvep) { return pvep->pve_next; } /** * Return a pointer to the pve_next field in a pv_entry. This value is used * when adding and removing entries to a PVE list. * * @param pvep The pv_entry whose pve_next field is being accessed. * * @return Pointer to the pve_next field. */ static inline pv_entry_t ** pve_next_ptr(pv_entry_t *pvep) { return &pvep->pve_next; } /** * Return a pointer to the page table entry for this mapping. * * @param pvep The pv_entry whose pve_ptep field is to be returned. * @param idx Index of the chosen PTE pointer inside the PVE. * * @return Pointer to the page table entry. */ static inline pt_entry_t * pve_get_ptep(pv_entry_t *pvep, unsigned idx) { assert(idx < PTE_PER_PVE); return (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_FLAGS); } /** * Update the page table entry for a specific physical to virtual mapping. * * @param pvep The pv_entry to update. * @param idx Index of the chosen PTE pointer inside the PVE. * @param ptep_new The new page table entry. */ static inline void pve_set_ptep(pv_entry_t *pvep, unsigned idx, pt_entry_t *ptep_new) { assert(idx < PTE_PER_PVE); pvep->pve_ptep[idx] = ptep_new; } /** * Initialize all fields in a PVE to NULL. * * @param pvep The pv_entry to initialize. */ static inline void pve_init(pv_entry_t *pvep) { pvep->pve_next = PV_ENTRY_NULL; for (int i = 0; i < PTE_PER_PVE; i++) { pvep->pve_ptep[i] = PT_ENTRY_NULL; } } /** * Find PTE pointer in PVE and return its index. * * @param pvep The PVE to search. * @param ptep PTE to search for. * * @return Index of the found entry, or -1 if no entry exists. */ static inline int pve_find_ptep_index(pv_entry_t *pvep, pt_entry_t *ptep) { for (unsigned int i = 0; i < PTE_PER_PVE; i++) { if (pve_get_ptep(pvep, i) == ptep) { return (int)i; } } return -1; } /** * Checks if no PTEs are currently associated with this PVE. * * @param pvep The PVE to search. * * @return True if no PTEs are currently associated with this PVE, or false. */ static inline bool pve_is_empty(pv_entry_t *pvep) { for (unsigned int i = 0; i < PTE_PER_PVE; i++) { if (pve_get_ptep(pvep, i) != PT_ENTRY_NULL) { return false; } } return true; } /** * Prepend a new pv_entry node to a PVE list. * * @note This function will clobber any existing flags stored in the PVH * pointer. It's up to the caller to preserve flags if that functionality * is needed (either by ensuring `pvep` contains those flags, or by * manually setting the flags after this call). * * @param pvh The linked list of mappings to update. * @param pvep The new mapping to add to the linked list. */ static inline void pve_add(pv_entry_t **pvh, pv_entry_t *pvep) { assert(pvh_test_type(pvh, PVH_TYPE_PVEP)); pvep->pve_next = pvh_pve_list(pvh); pvh_update_head(pvh, pvep, PVH_TYPE_PVEP); } /** * Remove an entry from a PVE list of mappings. * * @note This function will clobber any existing flags stored in the PVH * pointer. It's up to the caller to preserve flags if that functionality * is needed. * * @param pvh The pv_head_table entry of the PVE list to remove a mapping from. * This is the first entry in the list of pv_entry_t mappings. * @param pvepp A pointer to the pv_entry_t* that's being removed. If this entry * is the first in the linked list of mappings, then this should be * identical to the pv_head_table entry. If the mapping isn't the * first, then this is a pointer to the pve_next field in the * previous mapping. * @param pvep The entry that should be removed. Should be identical to a * dereference of the pvepp parameter (unless it's the pv_head_table * entry). */ static inline void pve_remove(pv_entry_t **pvh, pv_entry_t **pvepp, pv_entry_t *pvep) { assert(pvh_test_type(pvh, PVH_TYPE_PVEP)); if (pvepp == pvh) { if (pve_next(pvep) == PV_ENTRY_NULL) { /* The last mapping to this page is being removed. */ pvh_update_head(pvh, PV_ENTRY_NULL, PVH_TYPE_NULL); } else { /** * There are still mappings left, make the next one the new head of * the list. This effectively removes the first entry from the list. */ pvh_update_head(pvh, pve_next(pvep), PVH_TYPE_PVEP); } } else { /** * Move the previous entry's next field to the entry after the one being * removed. This will clobber the ALTACCT and INTERNAL bits. */ *pvepp = pve_next(pvep); } } /** * PVH_TYPE_PTDP Types and Helper Functions. * * The following are types and methods used to manipulate page table descriptor * (PTD) objects. This is the type of pv_head_table entry used when a page is * being used as a page table. */ /** * When the pmap layer allocates memory, it always does so in chunks of the VM * page size (which are represented by the PAGE_SIZE/PAGE_SHIFT macros). The VM * page size might not match up with the hardware page size for a given address * space (this is especially true on systems that support more than one page * size). * * The pv_head_table is allocated to have one entry per VM page, not hardware * page (which can change depending on the address space). Because of that, a * single VM-page-sized region (single pv_head_table entry) can potentially hold * up to four page tables. Only one page table descriptor (PTD) is allocated per * pv_head_table entry (per VM page), so on some systems, one PTD might have to * keep track of up to four different page tables. */ #if __ARM_MIXED_PAGE_SIZE__ #define PT_INDEX_MAX (ARM_PGBYTES / 4096) #elif (ARM_PGSHIFT == 14) #define PT_INDEX_MAX 1 #elif (ARM_PGSHIFT == 12) #define PT_INDEX_MAX 4 #else #error Unsupported ARM_PGSHIFT #endif /* __ARM_MIXED_PAGE_SIZE__ || ARM_PGSHIFT == 14 || ARM_PGSHIFT == 12 */ /** * Page table descriptor (PTD) info structure. * * Contains information about a page table. These pieces of data are separate * from the PTD itself because in address spaces where the VM page size doesn't * match the underlying hardware page size, one PTD could represent multiple * page tables (and so will need multiple PTD info structures). * * These fields are also in their own struct so that they can be allocated * separately from the associated pt_desc_t object. This allows us to allocate * the counts in this structure in a way that ensures they don't fall within the * same cache line as the main pt_desc_t object. This is important because the * fields in this structure are atomically updated which could cause false * sharing cache performance issues with the "va" field in pt_desc_t if all of * the fields were within the same structure. */ typedef struct { /** * Pre-defined sentinel values for ptd_info_t.refcnt. If these refcnt values * change, make sure to update the showpte LLDB macro to reflect the * changes. */ #define PT_DESC_REFCOUNT 0x4000U #define PT_DESC_IOMMU_GRANTED_REFCOUNT 0x8000U #define PT_DESC_IOMMU_ACCEPTED_REFCOUNT 0x8001U /* * For non-leaf pagetables, should always be PT_DESC_REFCOUNT. * For leaf pagetables, should reflect the number of non-empty PTEs. * For IOMMU pages, should always be either PT_DESC_IOMMU_GRANTED_REFCOUNT * or PT_DESC_IOMMU_ACCEPTED_REFCOUNT. */ unsigned short refcnt; /* * For non-leaf pagetables, should be 0. * For leaf pagetables, should reflect the number of wired entries. * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU * operations are implicitly wired). */ unsigned short wiredcnt; } ptd_info_t; /** * Page Table Descriptor (PTD). * * Provides a per-table data structure and a way of keeping track of all page * tables in the system. * * This structure is also used as a convenient way of keeping track of IOMMU * pages (which may or may not be used as page tables). In that case the "iommu" * field will point to the owner of the page, ptd_info[0].refcnt will be * PT_DESC_IOMMU_GRANTED_REFCOUNT or PT_DESC_IOMMU_ACCEPTED_REFCOUNT, and * ptd_info[0].wiredcnt can be used as an arbitrary refcnt controlled by the * IOMMU driver. */ typedef struct pt_desc { /** * This queue chain provides a mechanism for keeping a list of pages * being used as page tables. This is used to potentially reclaim userspace * page tables as a fast way of "allocating" a page. * * Refer to osfmk/kern/queue.h for more information about queue chains. */ queue_chain_t pt_page; /* Each page table is either owned by a pmap or a specific IOMMU. */ union { struct pmap *pmap; }; /** * The following fields contain per-page-table properties, and as such, * might have multiple elements each. This is due to a single PTD * potentially representing multiple page tables (in address spaces where * the VM page size differs from the hardware page size). Use the * ptd_get_index() function to get the correct index for a specific page * table. */ /** * The first address of the virtual address space this page table is * translating for, or a value set by an IOMMU driver if this PTD is being * used to track an IOMMU page. */ vm_offset_t va[PT_INDEX_MAX]; /** * ptd_info_t's are allocated separately so as to reduce false sharing * with the va field. This is desirable because ptd_info_t's are updated * atomically from all CPUs. */ ptd_info_t *ptd_info; } pt_desc_t; /** * Convert a pv_head_table entry/pointer into a page table descriptor pointer. * This should only be done if the type of this entry is PVH_TYPE_PTDP. * * @param pvh The pv_head_table entry/pointer to convert into a safe to * dereference pt_desc_t*. * * @return Return back a safe to derefence pointer to the page table descriptor * for this physical page by masking off the TYPE bits and adding any * missing flags to the upper portion of the pointer. */ static inline pt_desc_t* pvh_ptd(pv_entry_t **pvh) { return (pt_desc_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS); } /** * Given an arbitrary page table entry, return back the page table descriptor * (PTD) object for the page table that contains that entry. * * @param ptep Pointer to a PTE whose page table descriptor object to return. * * @return The PTD object for the passed in page table. */ static inline pt_desc_t * ptep_get_ptd(const pt_entry_t *ptep) { assert(ptep != NULL); const vm_offset_t pt_base_va = (vm_offset_t)ptep; pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop(pt_base_va))); if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) { panic("%s: invalid PV head 0x%llx for PTE %p", __func__, (uint64_t)(*pvh), ptep); } return pvh_ptd(pvh); } /** * Given an arbitrary page table entry, return back the pmap that owns that * page table. * * @note This won't work correctly for page tables owned by IOMMUs, because * those table aren't owned by any specific pmap. * * @param ptep Pointer to a page table entry whose owner we're trying to return. * * @return The pmap that owns the given page table entry. */ static inline struct pmap * ptep_get_pmap(const pt_entry_t *ptep) { return ptep_get_ptd(ptep)->pmap; } /** * Given an arbitrary translation table entry, get the page table descriptor * (PTD) object for the page table pointed to by the TTE. * * @param tte The translation table entry to parse. For instance, if this is an * L2 TTE, then the PTD for the L3 table this entry points to will be * returned. * * @return The page table descriptor (PTD) for the page table pointed to by this * TTE. */ static inline pt_desc_t * tte_get_ptd(const tt_entry_t tte) { const vm_offset_t pt_base_va = (vm_offset_t)(tte & ~((tt_entry_t)PAGE_MASK)); pv_entry_t **pvh = pai_to_pvh(pa_index(pt_base_va)); if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) { panic("%s: invalid PV head 0x%llx for TTE 0x%llx", __func__, (uint64_t)(*pvh), (uint64_t)tte); } return pvh_ptd(pvh); } /** * In address spaces where the VM page size doesn't match the underlying * hardware page size, one PTD could represent multiple page tables. This * function returns the correct index value depending on which page table is * being accessed. That index value can then be used to access the * per-page-table properties stored within a PTD. * * @note See the description above the PT_INDEX_MAX definition for a more * detailed explanation of why multiple page tables can be represented * by a single PTD object in the pv_head_table. * * @param ptd The page table descriptor that's being accessed. * @param ttep Pointer to the translation table entry that's being accessed. * * @return The correct index value for a specific, hardware-sized page * table. */ static inline unsigned ptd_get_index(__unused const pt_desc_t *ptd, __unused const tt_entry_t *ttep) { #if PT_INDEX_MAX == 1 return 0; #else assert(ptd != NULL); const uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap)); const vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift; /** * Use the difference between the VM page shift and the hardware page shift * to get the index of the correct page table. In practice, this equates to * masking out the bottom two bits of the L3 table index in address spaces * where the VM page size is greater than the hardware page size. In address * spaces where they're identical, the index will always be zero. */ const unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1); assert(ttep_index < PT_INDEX_MAX); return ttep_index; #endif } /** * In address spaces where the VM page size doesn't match the underlying * hardware page size, one PTD could represent multiple page tables. This * function returns the correct ptd_info_t structure depending on which page * table is being accessed. * * @note See the description above the PT_INDEX_MAX definition for a more * detailed explanation of why multiple page tables can be represented * by a single PTD object in the pv_head_table. * * @param ptd The page table descriptor that's being accessed. * @param ttep Pointer to the translation table entry that's being accessed. * * @return The correct ptd_info_t structure for a specific, hardware-sized page * table. */ static inline ptd_info_t * ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep) { assert((ptd != NULL) && (ptd->ptd_info[0].refcnt < PT_DESC_IOMMU_GRANTED_REFCOUNT)); return &ptd->ptd_info[ptd_get_index(ptd, ttep)]; } /** * Given a pointer to a page table entry, return back the ptd_info structure * for the page table that contains that entry. * * @param ptep Pointer to a PTE whose ptd_info object to return. * * @return The ptd_info object for the page table that contains the passed in * page table entry. */ static inline ptd_info_t * ptep_get_info(const pt_entry_t *ptep) { return ptd_get_info(ptep_get_ptd(ptep), ptep); } /** * Return the virtual address mapped by the passed in leaf page table entry, * using an already-retrieved pagetable descriptor. * * @param ptdp pointer to the descriptor for the pagetable containing ptep * @param ptep Pointer to a PTE to parse */ static inline vm_map_address_t ptd_get_va(const pt_desc_t *ptdp, const pt_entry_t *ptep) { const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptdp->pmap); vm_map_address_t va = ptdp->va[ptd_get_index(ptdp, ptep)]; vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep); va += (ptep_index << pt_attr_leaf_shift(pt_attr)); return va; } /** * Return the virtual address that is being mapped by the passed in leaf page * table entry. * * @param ptep Pointer to a PTE to parse. */ static inline vm_map_address_t ptep_get_va(const pt_entry_t *ptep) { return ptd_get_va(ptep_get_ptd(ptep), ptep); } /** * Physical Page Attribute Table (pp_attr_table) defines and helper functions. */ /* How many bits to use for flags on a per-VM-page basis. */ typedef uint16_t pp_attr_t; /* See the definition of pp_attr_table for more information. */ extern volatile pp_attr_t* pp_attr_table; /** * Flags stored in the pp_attr_table on a per-physical-page basis. * * Please update the pv_walk LLDB macro if these flags are changed or added to. */ /** * The bottom 6-bits are used to store the default WIMG (cacheability and memory * type) setting for this physical page. This can be changed by calling * pmap_set_cache_attributes(). * * If a default WIMG setting isn't set for a page, then the default is Normal, * Cached memory (VM_WIMG_DEFAULT). */ #define PP_ATTR_WIMG_MASK 0x003F #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK) /** * The reference and modify bits keep track of whether a page has been accessed * or modified since the last time the bits were cleared. These bits are used to * enforce policy decisions in the VM layer. */ #define PP_ATTR_REFERENCED 0x0040 #define PP_ATTR_MODIFIED 0x0080 /** * This physical page is being used as anonymous memory that's internally * managed by the VM and is not connected to an external pager. This flag is * only set/cleared on the first CPU mapping of a page (see PVH_FLAG_CPU). Any * subsequent mappings won't set/clear this flag until all mappings are removed * and a new CPU mapping is added. */ #define PP_ATTR_INTERNAL 0x0100 /** * This flag is used to keep track of pages that are still resident but are not * considered dirty and can be reclaimed under memory pressure. These pages do * not count as a part of the memory footprint, so the footprint ledger does not * need to be updated for these pages. This is hinted to the VM by the * `madvise(MADV_FREE_REUSABLE)` system call. */ #define PP_ATTR_REUSABLE 0x0200 /** * This flag denotes that a page is utilizing "alternate accounting". This means * that the pmap doesn't need to keep track of these pages with regards to the * footprint ledger because the VM is already accounting for them in a different * way. These include IOKit mappings (VM adds their entire virtual size to the * footprint), and purgeable pages (VM counts them only when non-volatile and * only for one "owner"), among others. * * Note that alternate accounting status is tracked on a per-mapping basis (not * per-page). Because of that the ALTACCT flag in the pp_attr_table is only used * when there's a single mapping to a page. When there are multiple mappings, * the status of this flag is tracked in the pv_head_table (see PVE_PTEP_ALTACCT * above). */ #define PP_ATTR_ALTACCT 0x0400 /** * This bit was originally used on x86 to keep track of what pages to not * encrypt during the hibernation process as a performance optimization when * encryption was done in software. This doesn't apply to the ARM * hibernation process because all pages are automatically encrypted using * hardware acceleration. Despite that, the pmap still keeps track of this flag * as a debugging aid on internal builds. * * TODO: This bit can probably be reclaimed: * rdar://70740650 (PMAP Cleanup: Potentially reclaim the PP_ATTR_NOENCRYPT bit on ARM) */ #define PP_ATTR_NOENCRYPT 0x0800 /** * These bits denote that a physical page is expecting the next access or * modification to set the PP_ATTR_REFERENCED and PP_ATTR_MODIFIED flags * respectively. */ #define PP_ATTR_REFFAULT 0x1000 #define PP_ATTR_MODFAULT 0x2000 #if XNU_MONITOR /** * Denotes that a page is owned by the PPL. This is modified/checked with the * PVH lock held, to avoid ownership related races. This does not need to be a * PP_ATTR bit (as we have the lock), but for now this is a convenient place to * put the bit. */ #define PP_ATTR_MONITOR 0x4000 /** * Denotes that a page *cannot* be owned by the PPL. This is required in order * to temporarily 'pin' kernel pages that are used to store PPL output * parameters. Otherwise a malicious or buggy caller could pass PPL-owned memory * for these parameters and in so doing stage a write gadget against the PPL. */ #define PP_ATTR_NO_MONITOR 0x8000 /** * All of the bits owned by the PPL; kernel requests to set or clear these bits * are illegal. */ #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR) #endif /* XNU_MONITOR */ /** * Atomically set some flags in a pp_attr_table entry. * * @param pai The physical address index for the entry to update. * @param bits The flags to set in the entry. */ static inline void ppattr_set_bits(unsigned int pai, pp_attr_t bits) { volatile pp_attr_t *ppattr = &pp_attr_table[pai]; os_atomic_or(ppattr, bits, acq_rel); } /** * Atomically clear some flags in a pp_attr_table entry. * * @param pai The physical address index for the entry to update. * @param bits The flags to clear in the entry. */ static inline void ppattr_clear_bits(unsigned int pai, pp_attr_t bits) { volatile pp_attr_t *ppattr = &pp_attr_table[pai]; os_atomic_andnot(ppattr, bits, acq_rel); } /** * Return true if the pp_attr_table entry contains the passed in bits. * * @param pai The physical address index for the entry to test. * @param bits The flags to check for. */ static inline bool ppattr_test_bits(unsigned int pai, pp_attr_t bits) { const volatile pp_attr_t *ppattr = &pp_attr_table[pai]; return (*ppattr & bits) == bits; } /** * Only set some flags in a pp_attr_table entry if the passed in physical * address is a kernel-managed address. * * @param pa The physical address for the entry to update. * @param bits The flags to set in the entry. */ static inline void ppattr_pa_set_bits(pmap_paddr_t pa, pp_attr_t bits) { if (pa_valid(pa)) { ppattr_set_bits(pa_index(pa), bits); } } /** * Only clear some flags in a pp_attr_table entry if the passed in physical * address is a kernel-managed address. * * @param pa The physical address for the entry to update. * @param bits The flags to clear in the entry. */ static inline void ppattr_pa_clear_bits(pmap_paddr_t pa, pp_attr_t bits) { if (pa_valid(pa)) { ppattr_clear_bits(pa_index(pa), bits); } } /** * Only test flags in a pp_attr_table entry if the passed in physical address * is a kernel-managed page. * * @param pa The physical address for the entry to test. * @param bits The flags to check for. * * @return False if the PA isn't a kernel-managed page, otherwise true/false * depending on whether the bits are set. */ static inline bool ppattr_pa_test_bits(pmap_paddr_t pa, pp_attr_t bits) { return pa_valid(pa) ? ppattr_test_bits(pa_index(pa), bits) : false; } /** * Set the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the passed * in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_set_modify(pmap_paddr_t pa) { ppattr_pa_set_bits(pa, PP_ATTR_MODIFIED); } /** * Clear the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_clear_modify(pmap_paddr_t pa) { ppattr_pa_clear_bits(pa, PP_ATTR_MODIFIED); } /** * Set the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_set_reference(pmap_paddr_t pa) { ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED); } /** * Clear the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_clear_reference(pmap_paddr_t pa) { ppattr_pa_clear_bits(pa, PP_ATTR_REFERENCED); } #if XNU_MONITOR /** * Set the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the passed * in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_set_monitor(pmap_paddr_t pa) { ppattr_pa_set_bits(pa, PP_ATTR_MONITOR); } /** * Clear the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_clear_monitor(pmap_paddr_t pa) { ppattr_pa_clear_bits(pa, PP_ATTR_MONITOR); } /** * Only test for the PP_ATTR_MONITOR flag in a pp_attr_table entry if the passed * in physical address is a kernel-managed page. * * @param pa The physical address for the entry to test. * * @return False if the PA isn't a kernel-managed page, otherwise true/false * depending on whether the PP_ATTR_MONITOR is set. */ static inline bool ppattr_pa_test_monitor(pmap_paddr_t pa) { return ppattr_pa_test_bits(pa, PP_ATTR_MONITOR); } /** * Set the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_set_no_monitor(pmap_paddr_t pa) { ppattr_pa_set_bits(pa, PP_ATTR_NO_MONITOR); } /** * Clear the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to update. */ static inline void ppattr_pa_clear_no_monitor(pmap_paddr_t pa) { ppattr_pa_clear_bits(pa, PP_ATTR_NO_MONITOR); } /** * Only test for the PP_ATTR_NO_MONITOR flag in a pp_attr_table entry if the * passed in physical address is a kernel-managed page. * * @param pa The physical address for the entry to test. * * @return False if the PA isn't a kernel-managed page, otherwise true/false * depending on whether the PP_ATTR_NO_MONITOR is set. */ static inline bool ppattr_pa_test_no_monitor(pmap_paddr_t pa) { return ppattr_pa_test_bits(pa, PP_ATTR_NO_MONITOR); } #endif /* XNU_MONITOR */ /** * Set the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_set_internal(unsigned int pai) { ppattr_set_bits(pai, PP_ATTR_INTERNAL); } /** * Clear the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_clear_internal(unsigned int pai) { ppattr_clear_bits(pai, PP_ATTR_INTERNAL); } /** * Return true if the pp_attr_table entry has the PP_ATTR_INTERNAL flag set. * * @param pai The physical address index for the entry to test. */ static inline bool ppattr_test_internal(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_INTERNAL); } /** * Set the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_set_reusable(unsigned int pai) { ppattr_set_bits(pai, PP_ATTR_REUSABLE); } /** * Clear the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_clear_reusable(unsigned int pai) { ppattr_clear_bits(pai, PP_ATTR_REUSABLE); } /** * Return true if the pp_attr_table entry has the PP_ATTR_REUSABLE flag set. * * @param pai The physical address index for the entry to test. */ static inline bool ppattr_test_reusable(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_REUSABLE); } /** * Set the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry. * * @note This is only valid when the ALTACCT flag is being tracked using the * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and * PP_ATTR_ALTACCT definitions for more information. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_set_altacct(unsigned int pai) { ppattr_set_bits(pai, PP_ATTR_ALTACCT); } /** * Clear the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry. * * @note This is only valid when the ALTACCT flag is being tracked using the * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and * PP_ATTR_ALTACCT definitions for more information. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_clear_altacct(unsigned int pai) { ppattr_clear_bits(pai, PP_ATTR_ALTACCT); } /** * Get the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry. * * @note This is only valid when the ALTACCT flag is being tracked using the * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and * PP_ATTR_ALTACCT definitions for more information. * * @param pai The physical address index for the entry to test. * * @return True if the passed in page uses alternate accounting, false * otherwise. */ static inline bool ppattr_is_altacct(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_ALTACCT); } /** * Get the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry. * * @note This is only valid when the INTERNAL flag is being tracked using the * pp_attr_table. See the descriptions above the PVE_PTEP_INTERNAL and * PP_ATTR_INTERNAL definitions for more information. * * @param pai The physical address index for the entry to test. * * @return True if the passed in page is accounted for as "internal", false * otherwise. */ static inline bool ppattr_is_internal(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_INTERNAL); } /** * The "alternate accounting" (ALTACCT) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for determining whether alternate * accounting is set for a mapping. * * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT * definitions for more information. * * @param pai The physical address index for the entry to test. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. * * @return True if the passed in page uses alternate accounting, false * otherwise. */ static inline bool ppattr_pve_is_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx) { return (pvep == PV_ENTRY_NULL) ? ppattr_is_altacct(pai) : pve_get_altacct(pvep, idx); } /** * The "internal" (INTERNAL) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for determining whether "internal" * is set for a mapping. * * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL * definitions for more information. * * @param pai The physical address index for the entry to test. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. * * @return True if the passed in page is "internal", false otherwise. */ static inline bool ppattr_pve_is_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx) { return (pvep == PV_ENTRY_NULL) ? ppattr_is_internal(pai) : pve_get_internal(pvep, idx); } /** * The "alternate accounting" (ALTACCT) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for setting the alternate accounting status * for a mapping. * * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT * definitions for more information. * * @param pai The physical address index for the entry to update. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void ppattr_pve_set_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx) { if (pvep == PV_ENTRY_NULL) { ppattr_set_altacct(pai); } else { pve_set_altacct(pvep, idx); } } /** * The "internal" (INTERNAL) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for setting the "internal" status * for a mapping. * * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL * definitions for more information. * * @param pai The physical address index for the entry to update. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void ppattr_pve_set_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx) { if (pvep == PV_ENTRY_NULL) { ppattr_set_internal(pai); } else { pve_set_internal(pvep, idx); } } /** * The "alternate accounting" (ALTACCT) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for clearing the alternate accounting status * for a mapping. * * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT * definitions for more information. * * @param pai The physical address index for the entry to update. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void ppattr_pve_clr_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx) { if (pvep == PV_ENTRY_NULL) { ppattr_clear_altacct(pai); } else { pve_clr_altacct(pvep, idx); } } /** * The "internal" (INTERNAL) status for a page is tracked differently * depending on whether there are one or multiple mappings to a page. This * function abstracts out the difference between single and multiple mappings to * a page and provides a single function for clearing the "internal" status * for a mapping. * * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL * definitions for more information. * * @param pai The physical address index for the entry to update. * @param pvep Pointer to the pv_entry_t object containing that mapping. * @param idx Index of the chosen PTE pointer inside the PVE. */ static inline void ppattr_pve_clr_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx) { if (pvep == PV_ENTRY_NULL) { ppattr_clear_internal(pai); } else { pve_clr_internal(pvep, idx); } } /** * Set the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_set_reffault(unsigned int pai) { ppattr_set_bits(pai, PP_ATTR_REFFAULT); } /** * Clear the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_clear_reffault(unsigned int pai) { ppattr_clear_bits(pai, PP_ATTR_REFFAULT); } /** * Return true if the pp_attr_table entry has the PP_ATTR_REFFAULT flag set. * * @param pai The physical address index for the entry to test. */ static inline bool ppattr_test_reffault(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_REFFAULT); } /** * Set the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_set_modfault(unsigned int pai) { ppattr_set_bits(pai, PP_ATTR_MODFAULT); } /** * Clear the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry. * * @param pai The physical address index for the entry to update. */ static inline void ppattr_clear_modfault(unsigned int pai) { ppattr_clear_bits(pai, PP_ATTR_MODFAULT); } /** * Return true if the pp_attr_table entry has the PP_ATTR_MODFAULT flag set. * * @param pai The physical address index for the entry to test. */ static inline bool ppattr_test_modfault(unsigned int pai) { return ppattr_test_bits(pai, PP_ATTR_MODFAULT); } static inline boolean_t pmap_is_preemptible(void) { return preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT); } /** * This helper function ensures that potentially-long-running batched PPL operations are * called in preemptible context before entering the PPL, so that the PPL call may * periodically exit to allow pending urgent ASTs to be taken. */ static inline void pmap_verify_preemptible(void) { assert(pmap_is_preemptible()); } /** * The minimum number of pages to keep in the PPL page free list. * * We define our target as 8 pages: enough for 2 page table pages, a PTD page, * and a PV page; in essence, twice as many pages as may be necessary to satisfy * a single pmap_enter request. */ #define PMAP_MIN_FREE_PPL_PAGES 8 /** * Flags passed to various page allocation functions, usually accessed through * the pmap_pages_alloc_zeroed() API. Each function that can take these flags as * a part of its option field, will describe these flags in its function header. */ /** * Instruct the allocation function to return immediately if no pages are * current available. Without this flag, the function will spin and wait for a * page to become available. This flag can be required in some circumstances * (for instance, when allocating pages from within the PPL). */ #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1 /** * Instructs an allocation function to fallback to reclaiming a userspace page * table if it failed to allocate a page from the free lists. This can be useful * when allocating from within the PPL because refilling the free lists requires * exiting and re-entering the PPL (which incurs extra latency). * * This is a quick way of allocating a page at the expense of having to * reallocate the table the next time one of its mappings is accessed. */ #define PMAP_PAGE_RECLAIM_NOWAIT 0x2 /** * Global variables exported to the rest of the internal pmap implementation. */ #if XNU_MONITOR extern uint64_t pmap_ppl_free_page_count; extern pmap_paddr_t pmap_stacks_start_pa; extern pmap_paddr_t pmap_stacks_end_pa; extern pmap_paddr_t ppl_cpu_save_area_start; extern pmap_paddr_t ppl_cpu_save_area_end; #endif /* XNU_MONITOR */ extern unsigned int inuse_pmap_pages_count; extern vm_object_t pmap_object; extern uint32_t pv_alloc_initial_target; extern uint32_t pv_kern_alloc_initial_target; /** * Functions exported to the rest of the internal pmap implementation. */ extern void pmap_data_bootstrap(void); extern void pmap_enqueue_pages(vm_page_t); extern kern_return_t pmap_pages_alloc_zeroed(pmap_paddr_t *, unsigned, unsigned); extern void pmap_pages_free(pmap_paddr_t, unsigned); #if XNU_MONITOR extern void pmap_mark_page_as_ppl_page_internal(pmap_paddr_t, bool); extern void pmap_mark_page_as_ppl_page(pmap_paddr_t); extern void pmap_mark_page_as_kernel_page(pmap_paddr_t); extern pmap_paddr_t pmap_alloc_page_for_kern(unsigned int); extern void pmap_alloc_page_for_ppl(unsigned int); extern uint64_t pmap_release_ppl_pages_to_kernel(void); extern uint64_t pmap_ledger_validate(const volatile void *); void pmap_ledger_retain(ledger_t ledger); void pmap_ledger_release(ledger_t ledger); extern void pmap_ledger_check_balance(pmap_t pmap); kern_return_t pmap_alloc_pmap(pmap_t *pmap); void pmap_free_pmap(pmap_t pmap); #endif /* XNU_MONITOR */ /** * The modes in which a pmap lock can be acquired. Note that shared access * doesn't necessarily mean "read-only". As long as data is atomically updated * correctly (to account for multi-cpu accesses) data can still get written with * a shared lock held. Care just needs to be taken so as to not introduce any * race conditions when there are multiple writers. * * This is here in pmap_data.h because it's a needed parameter for pv_alloc() * and pmap_enter_pv(). This header is always included in pmap_internal.h before * the rest of the pmap locking code is defined so there shouldn't be any issues * with missing types. */ OS_ENUM(pmap_lock_mode, uint8_t, PMAP_LOCK_SHARED, PMAP_LOCK_EXCLUSIVE); /** * Possible return values for pv_alloc(). See the pv_alloc() function header for * a description of each of these values. */ typedef enum { PV_ALLOC_SUCCESS, PV_ALLOC_RETRY, PV_ALLOC_FAIL } pv_alloc_return_t; extern pv_alloc_return_t pv_alloc( pmap_t, unsigned int, pmap_lock_mode_t, unsigned int, pv_entry_t **); extern void pv_free(pv_entry_t *); extern void pv_list_free(pv_entry_t *, pv_entry_t *, int); extern void pmap_compute_pv_targets(void); extern pv_alloc_return_t pmap_enter_pv( pmap_t, pt_entry_t *, int, unsigned int, pmap_lock_mode_t, pv_entry_t **, int *new_pve_ptep_idx); extern void pmap_remove_pv(pmap_t, pt_entry_t *, int, bool, bool *, bool *); extern void ptd_bootstrap(pt_desc_t *, unsigned int); extern pt_desc_t *ptd_alloc_unlinked(void); extern pt_desc_t *ptd_alloc(pmap_t); extern void ptd_deallocate(pt_desc_t *); extern void ptd_info_init( pt_desc_t *, pmap_t, vm_map_address_t, unsigned int, pt_entry_t *); extern kern_return_t pmap_ledger_credit(pmap_t, int, ledger_amount_t); extern kern_return_t pmap_ledger_debit(pmap_t, int, ledger_amount_t); extern void validate_pmap_internal(const volatile struct pmap *, const char *); extern void validate_pmap_mutable_internal(const volatile struct pmap *, const char *); /** * Macro function wrappers around pmap validation so that the calling function * can be printed in the panic strings for easier validation failure debugging. */ #define validate_pmap(x) validate_pmap_internal(x, __func__) #define validate_pmap_mutable(x) validate_pmap_mutable_internal(x, __func__) /** * This structure describes a PPL-owned I/O range. * * @note This doesn't necessarily have to represent "I/O" only, this can also * represent non-kernel-managed DRAM (e.g., iBoot carveouts). Any physical * address region that isn't considered "kernel-managed" is fair game. * * @note The layout of this structure needs to map 1-to-1 with the pmap-io-range * device tree nodes. Astris (through the LowGlobals) also depends on the * consistency of this structure. */ typedef struct pmap_io_range { /* Physical address of the PPL-owned I/O range. */ uint64_t addr; /** * Length (in bytes) of the PPL-owned I/O range. Has to be the size * of a page if the range will be refered to by pmap_io_filter_entries. */ uint64_t len; /* Strong DSB required for pages in this range. */ #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) /* Corresponds to memory carved out by bootloader. */ #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) /* Pages in this range need to be included in the hibernation image */ #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) /* Mark the range as 'owned' by a given subsystem */ #define PMAP_IO_RANGE_OWNED (1UL << 28) /** * Lower 16 bits treated as pp_attr_t, upper 16 bits contain additional * mapping flags (defined above). */ uint32_t wimg; /** * 4 Character Code (4CC) describing what this range is. * * This has to be unique for each "type" of pages, meaning pages sharing * the same register layout, if it is used for the I/O filter descriptors * below. Otherwise it doesn't matter. */ uint32_t signature; } pmap_io_range_t; /* Reminder: be sure to change all relevant device trees if you change the layout of pmap_io_range_t */ _Static_assert(sizeof(pmap_io_range_t) == 24, "unexpected size for pmap_io_range_t"); extern pmap_io_range_t* pmap_find_io_attr(pmap_paddr_t); /** * This structure describes a sub-page-size I/O region owned by PPL but the kernel can write to. * * @note I/O filter software will use a collection of such data structures to determine access * permissions to a page owned by PPL. * * @note The {signature, offset} key is used to index a collection of such data structures to * optimize for space in the case where one page layout is repeated for many devices, such * as the memory controller channels. */ typedef struct pmap_io_filter_entry { /* 4 Character Code (4CC) describing what this range (page) is. */ uint32_t signature; /* Offset within the page. It has to be within [0, PAGE_SIZE). */ uint16_t offset; /* Length of the range, and (offset + length) has to be within [0, PAGE_SIZE). */ uint16_t length; } pmap_io_filter_entry_t; _Static_assert(sizeof(pmap_io_filter_entry_t) == 8, "unexpected size for pmap_io_filter_entry_t"); extern pmap_io_filter_entry_t *pmap_find_io_filter_entry(pmap_paddr_t, uint64_t, const pmap_io_range_t **); extern void pmap_cpu_data_init_internal(unsigned int); /** * Flush a single 16K page from noncoherent coprocessor caches. * * @note Nonocoherent cache flushes are only guaranteed to work if the participating coprocessor(s) * do not have any active VA translations for the page being flushed. Since coprocessor * mappings should always be controlled by some PPL IOMMU extension, they should always * have PV list entries. This flush should therefore be performed at a point when the PV * list is known to be either empty or at least to not contain any IOMMU entries. For * the purposes of our security model, it is sufficient to wait for the PV list to become * empty, as we really want to protect PPL-sensitive pages from malicious/accidental * coprocessor cacheline evictions, and the PV list must be empty before a page can be * handed to the PPL. * * @param paddr The base physical address of the page to flush. */ extern void pmap_flush_noncoherent_page(pmap_paddr_t paddr); #endif /* _ARM_PMAP_PMAP_DATA_H_ */