1 /*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /**
29 * This header file is used to store the types, prototypes, and inline functions
30 * that define some of the most important data structures used in the pmap. This
31 * header is only meant for sharing types within the pmap; if a type is meant to
32 * be used by the rest of the kernel, then put it into osfmk/arm/pmap.h.
33 */
34 #ifndef _ARM_PMAP_PMAP_DATA_H_
35 #define _ARM_PMAP_PMAP_DATA_H_
36
37 #include <stdint.h>
38
39 #include <kern/ledger.h>
40 #include <mach/vm_types.h>
41 #include <mach_assert.h>
42 #include <vm/vm_page.h>
43
44 #include <arm/cpu_data.h>
45 #include <arm/machine_routines.h>
46 #include <arm64/proc_reg.h>
47
48 /* Temporary include before moving all ledger functions into pmap_data.c */
49 #include <os/refcnt.h>
50
51 /**
52 * These headers are safe to be included in this file since they shouldn't rely
53 * on any of the internal pmap header files (so no circular dependencies).
54 */
55 #include <arm/pmap.h>
56 #include <arm/pmap/pmap_pt_geometry.h>
57
58 /**
59 * These values represent the first and last kernel-managed physical addresses.
60 * We keep track of extra metadata on kernel-managed pages compared to other
61 * pages (usually iBoot carved out memory or I/O).
62 */
63 extern pmap_paddr_t vm_first_phys, vm_last_phys;
64
65 /**
66 * Return whether the given address represents a kernel-managed physical page.
67 *
68 * Whether a page is considered "kernel-managed" is determined by the BootArgs
69 * passed by the bootloader. Typically memory carved out by the bootloader as
70 * well as I/O memory should return false.
71 *
72 * @param pa The physical address to check.
73 */
74 static inline bool
pa_valid(pmap_paddr_t pa)75 pa_valid(pmap_paddr_t pa)
76 {
77 return (pa >= vm_first_phys) && (pa < vm_last_phys);
78 }
79
80 /**
81 * The pmap has a variety of data structures (pv_head_table/pp_attr_table) that
82 * contain an entry for every kernel-managed page in the system. These systems
83 * are indexed with physical address indices ("pai") generated by this function.
84 *
85 * The logic is simple since there should be one entry in each of these data
86 * structures for each kernel-managed physical page in the system. These data
87 * structures are allocated on boot based on the amount of memory available.
88 *
89 * @note PAIs are defined using the VM page size, which might not be identical
90 * to the underlying hardware page size for an arbitrary address space.
91 * This means that the data structures relying on PAIs will contain one
92 * entry for each VM page, not hardware page.
93 *
94 * @note This function is only valid for physical addresses that are
95 * kernel-managed.
96 */
97
98 static inline unsigned int
pa_index(pmap_paddr_t pa)99 pa_index(pmap_paddr_t pa)
100 {
101 return (unsigned int)atop(pa - vm_first_phys);
102 }
103
104 /* See the definition of pv_head_table for more information. */
105 extern pv_entry_t **pv_head_table;
106
107 /* Represents a NULL entry in the pv_head_table. */
108 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
109
110 /**
111 * Given a physical address index, return the corresponding pv_head_table entry.
112 *
113 * @note Despite returning a pointer to a pv_entry_t pointer, the entry might
114 * actually be a different type of pointer (pt_entry_t or pt_desc_t)
115 * depending on the type for this entry. Determine the type using
116 * pvh_test_type().
117 *
118 * @param pai The index returned by pa_index() for the page whose pv_head_table
119 * entry should be retrieved.
120 */
121 static inline pv_entry_t **
pai_to_pvh(unsigned int pai)122 pai_to_pvh(unsigned int pai)
123 {
124 return &pv_head_table[pai];
125 }
126
127 /**
128 * Each pv_head_table entry can be one of four different types:
129 *
130 * - PVH_TYPE_NULL: No mappings to the physical page exist outside of the
131 * physical aperture. Physical aperture mappings are not
132 * tracked in the pv_head_table.
133 *
134 * - PVH_TYPE_PVEP: There are multiple mappings to the physical page.
135 * These entries are linked lists of pv_entry_t objects (which
136 * each contain a pointer to the associated PTE and a pointer
137 * to the next entry in the list).
138 *
139 * - PVH_TYPE_PTEP: There is a single mapping to the physical page. Once more
140 * mappings are created, this entry will get upgraded to an
141 * entry of type PVH_TYPE_PVEP. These entries are pointers
142 * directly to the page table entry that contain the mapping
143 * (pt_entry_t*).
144 *
145 * - PVH_TYPE_PTDP: The physical page is being used as a page table. These
146 * entries are pointers to page table descriptor structures
147 * (pt_desc_t) which contain metadata related to each page
148 * table.
149 *
150 * The type is stored in the bottom two bits of each pv_head_table entry. That
151 * type needs to be checked before dereferencing the pointer to determine which
152 * pointer type to dereference as.
153 */
154 __enum_closed_decl(pvh_type_t, uint8_t, {
155 PVH_TYPE_NULL = 0b00,
156 PVH_TYPE_PVEP = 0b01,
157 PVH_TYPE_PTEP = 0b10,
158 PVH_TYPE_PTDP = 0b11,
159 });
160
161 #define PVH_TYPE_MASK (0x3UL)
162
163 #if defined(__arm64__)
164
165 /**
166 * PV_HEAD_TABLE Flags.
167 *
168 * All flags listed below are stored in the pv_head_table entry/pointer
169 * (per-physical-page) unless otherwise noted.
170 *
171 * Please update the pv_walk LLDB macro if these flags are changed or added to.
172 */
173
174 /**
175 * This flag is set for every mapping created by an IOMMU.
176 *
177 * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
178 * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
179 */
180 #define PVH_FLAG_IOMMU 0x4UL
181
182 /**
183 * This flag is only valid when PVH_FLAG_IOMMU is set. For an IOMMU mapping, if
184 * this bit is set, then the PTE pointer points directly into the IOMMU page
185 * table for this mapping. If this bit is cleared, then the "PTE pointer" is
186 * actually a pointer to the IOMMU descriptor object that owns this mapping.
187 *
188 * There are cases where it's not easy to tie an IOMMU mapping directly to a
189 * specific page table, so this allows us to at least get a pointer to which
190 * IOMMU created this mapping which is useful for debugging purposes.
191 *
192 * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
193 * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
194 */
195 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
196
197 /**
198 * This flag is set when the first CPU (non-IOMMU) mapping is created. This is
199 * important to keep track of because various accounting statistics are based on
200 * the options specified for the first CPU mapping. This flag, and thus the
201 * accounting statistics, will persist as long as there *any* mappings of the
202 * page (including IOMMU mappings). This works because the accounting for a page
203 * should not need to change until the page is recycled by the VM layer, and we
204 * double-check that there are no mappings (CPU or IOMMU) when a page is
205 * recycled (see: pmap_verify_free()).
206 */
207 #define PVH_FLAG_CPU (1ULL << 62)
208
209 /* This bit is used as a lock when modifying a pv_head_table entry. */
210 #define PVH_LOCK_BIT 61
211 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
212
213 /**
214 * This flag is set when there are any executable mappings to this physical
215 * page. This is used to prevent any writable mappings from being created at
216 * the same time an executable mapping exists.
217 */
218 #define PVH_FLAG_EXEC (1ULL << 60)
219
220 /**
221 * Marking a pv_head_table entry with this flag denotes that this page is a
222 * kernelcache text or data page that shouldn't have dynamically-created
223 * mappings. See PVH_FLAG_LOCKDOWN_MASK for more details.
224 */
225 #define PVH_FLAG_LOCKDOWN_KC (1ULL << 59)
226
227 /**
228 * This flag is used to mark that a page has been hashed into the hibernation
229 * image.
230 *
231 * The hibernation driver will use this to ensure that all PPL-owned memory is
232 * correctly included into the hibernation image (a missing PPL page could be
233 * a security concern when coming out of hibernation).
234 */
235 #define PVH_FLAG_HASHED (1ULL << 58)
236
237 /**
238 * Marking a pv_head_table entry with this flag denotes that this page is a
239 * code signature page that shouldn't have dynamically-created mappings.
240 * See PVH_FLAG_LOCKDOWN_MASK for more details.
241 */
242 #define PVH_FLAG_LOCKDOWN_CS (1ULL << 57)
243
244 /**
245 * Marking a pv_head_table entry with this flag denotes that this page is a
246 * read-only allocator page that shouldn't have dynamically-created mappings.
247 * See PVH_FLAG_LOCKDOWN_MASK for more details.
248 */
249 #define PVH_FLAG_LOCKDOWN_RO (1ULL << 56)
250
251 /**
252 * Marking a pv_head_table entry with this flag denotes that this page is
253 * retired without any mappings and never should be mapped again.
254 */
255 #define PVH_FLAG_RETIRED (1ULL << 55)
256
257 /**
258 * Flags which disallow a new mapping to a page.
259 */
260 #define PVH_FLAG_NOMAP_MASK (PVH_FLAG_RETIRED)
261
262 /**
263 * Marking a pv_head_table entry with this flag denotes that this page has
264 * been mapped into a non-coherent coprocessor address space and requires a
265 * cache flush operation once all mappings have been removed.
266 */
267 #define PVH_FLAG_FLUSH_NEEDED (1ULL << 54)
268
269 /**
270 * Marking a pv_head_table entry with any bit in this mask denotes that this page
271 * has been locked down by the PPL. Locked down pages can't have new mappings
272 * created or existing mappings removed, and all existing mappings will have been
273 * converted to read-only. This essentially makes the page immutable.
274 */
275 #define PVH_FLAG_LOCKDOWN_MASK (PVH_FLAG_LOCKDOWN_KC | PVH_FLAG_LOCKDOWN_CS | PVH_FLAG_LOCKDOWN_RO)
276
277
278 /**
279 * These bits need to be set to safely dereference a pv_head_table
280 * entry/pointer.
281 *
282 * Any change to this #define should also update the copy located in the pmap.py
283 * LLDB macros file.
284 */
285 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN_MASK | \
286 PVH_FLAG_HASHED | PVH_FLAG_FLUSH_NEEDED | PVH_FLAG_RETIRED)
287
288 #endif /* defined(__arm64__) */
289
290 /* Mask used to clear out the TYPE bits from a pv_head_table entry/pointer. */
291 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
292
293 /* Which 32-bit word in each pv_head_table entry/pointer contains the LOCK bit. */
294 #if defined(__arm64__)
295 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
296 #endif /* defined(__arm64__) */
297
298 /**
299 * Assert that a pv_head_table entry is locked. Will panic if the lock isn't
300 * acquired.
301 *
302 * @param index The physical address index to check.
303 */
304 static inline void
pvh_assert_locked(__assert_only unsigned int index)305 pvh_assert_locked(__assert_only unsigned int index)
306 {
307 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK);
308 }
309
310
311 /**
312 * Lock a pv_head_table entry.
313 *
314 * @param index The physical address index of the pv_head_table entry to lock.
315 */
316 static inline void
pvh_lock(unsigned int index)317 pvh_lock(unsigned int index)
318 {
319 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
320 PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
321 }
322
323 /**
324 * Unlock a pv_head_table entry.
325 *
326 * @param index The physical address index of the pv_head_table entry to unlock.
327 */
328 static inline void
pvh_unlock(unsigned int index)329 pvh_unlock(unsigned int index)
330 {
331 pvh_assert_locked(index);
332
333 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
334 PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
335 }
336
337 /**
338 * Check that a pv_head_table entry/pointer is a specific type.
339 *
340 * @param pvh The pv_head_table entry/pointer to check.
341 * @param type The type to check for.
342 *
343 * @return True if the pv_head_table entry is of the passed in type, false
344 * otherwise.
345 */
346 static inline bool
pvh_test_type(pv_entry_t ** pvh,pvh_type_t type)347 pvh_test_type(pv_entry_t **pvh, pvh_type_t type)
348 {
349 return ((*(vm_offset_t *)pvh) & PVH_TYPE_MASK) == type;
350 }
351
352 /**
353 * Convert a pv_head_table entry/pointer into a page table entry pointer. This
354 * should only be done if the type of this entry is PVH_TYPE_PTEP.
355 *
356 * @param pvh The pv_head_table entry/pointer to convert into a pt_entry_t*.
357 *
358 * @return Return back a safe to derefence pointer to the single mapping of this
359 * physical page by masking off the TYPE bits and adding any missing
360 * flags to the upper portion of the pointer.
361 */
362 static inline pt_entry_t*
pvh_ptep(pv_entry_t ** pvh)363 pvh_ptep(pv_entry_t **pvh)
364 {
365 return (pt_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
366 }
367
368 /**
369 * Convert a pv_head_table entry/pointer into a PVE list pointer. This
370 * should only be done if the type of this entry is PVH_TYPE_PVEP.
371 *
372 * @param pvh The pv_head_table entry/pointer to convert into a safe to
373 * dereference pv_entry_t*.
374 *
375 * @return Return back a safe to derefence pointer to the first mapping of this
376 * physical page by masking off the TYPE bits and adding any missing
377 * flags to the upper portion of the pointer.
378 */
379 static inline pv_entry_t*
pvh_pve_list(pv_entry_t ** pvh)380 pvh_pve_list(pv_entry_t **pvh)
381 {
382 return (pv_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
383 }
384
385 /**
386 * Return the flags associated with a pv_head_table entry/pointer.
387 *
388 * @param pvh The pv_head_table entry whose flags to get.
389 */
390 static inline vm_offset_t
pvh_get_flags(pv_entry_t ** pvh)391 pvh_get_flags(pv_entry_t **pvh)
392 {
393 return (*(vm_offset_t *)pvh) & PVH_HIGH_FLAGS;
394 }
395
396 /**
397 * Atomically set the flags associated with a pv_head_table entry/pointer.
398 *
399 * @param pvh The pv_head_table entry whose flags are getting set.
400 */
401 static inline void
pvh_set_flags(pv_entry_t ** pvh,vm_offset_t flags)402 pvh_set_flags(pv_entry_t **pvh, vm_offset_t flags)
403 {
404 os_atomic_store((vm_offset_t *)pvh, ((*(vm_offset_t *)pvh) & ~PVH_HIGH_FLAGS) | flags, relaxed);
405 }
406
407 /**
408 * Update a pv_head_table entry/pointer to be a different type and/or point to
409 * a different object.
410 *
411 * @note The pv_head_table entry MUST already be locked.
412 *
413 * @note This function will clobber any existing flags stored in the PVH pointer
414 * (except PVH_FLAG_LOCK). It's up to the caller to preserve flags if that
415 * functionality is needed (either by ensuring `pvep` contains those
416 * flags, or by manually setting the flags after this call).
417 *
418 * @param pvh The pv_head_table entry/pointer to update.
419 * @param pvep The new entry to use. This could be either a pt_entry_t*,
420 * pv_entry_t*, or pt_desc_t* depending on the type.
421 * @param type The type of the new entry.
422 */
423 static inline void
pvh_update_head(pv_entry_t ** pvh,void * pvep,unsigned int type)424 pvh_update_head(pv_entry_t **pvh, void *pvep, unsigned int type)
425 {
426 assert((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK);
427 os_atomic_store((vm_offset_t *)pvh, (vm_offset_t)pvep | type | PVH_FLAG_LOCK, relaxed);
428 }
429
430 /**
431 * Update a pv_head_table entry/pointer to be a different type and/or point to
432 * a different object.
433 *
434 * @note The pv_head_table entry CAN'T already be locked.
435 *
436 * @note This function will clobber any existing flags stored in the PVH
437 * pointer. It's up to the caller to preserve flags if that functionality
438 * is needed (either by ensuring `pvep` contains those flags, or by
439 * manually setting the flags after this call).
440 *
441 * @param pvh The pv_head_table entry/pointer to update.
442 * @param pvep The new entry to use. This could be either a pt_entry_t*,
443 * pv_entry_t*, or pt_desc_t* depending on the type.
444 * @param type The type of the new entry.
445 */
446 static inline void
pvh_update_head_unlocked(pv_entry_t ** pvh,void * pvep,unsigned int type)447 pvh_update_head_unlocked(pv_entry_t **pvh, void *pvep, unsigned int type)
448 {
449 assert(!((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK));
450 *(vm_offset_t *)pvh = ((vm_offset_t)pvep | type) & ~PVH_FLAG_LOCK;
451 }
452
453 /**
454 * Given a page table entry pointer retrieved from the pv_head_table (from an
455 * entry of type PVH_TYPE_PTEP or PVH_TYPE_PVEP), return back whether the PTE is
456 * an IOMMU mapping.
457 *
458 * @note The way this function determines whether the passed in pointer is
459 * pointing to an IOMMU PTE, is by checking for a special flag stored in
460 * the lower bits of the pointer. This flag is only set on pointers stored
461 * in the pv_head_table, and as such, this function will only work on
462 * pointers retrieved from the pv_head_table. If a pointer to a PTE was
463 * directly retrieved from an IOMMU's page tables, this function would
464 * always return false despite actually being an IOMMU PTE.
465 *
466 * @param ptep A PTE pointer obtained from the pv_head_table to check.
467 *
468 * @return True if the entry is an IOMMU mapping, false otherwise.
469 */
470 static inline bool
pvh_ptep_is_iommu(const pt_entry_t * ptep)471 pvh_ptep_is_iommu(const pt_entry_t *ptep)
472 {
473 #ifdef PVH_FLAG_IOMMU
474 return (vm_offset_t)ptep & PVH_FLAG_IOMMU;
475 #else /* PVH_FLAG_IOMMU */
476 #pragma unused(ptep)
477 return false;
478 #endif /* PVH_FLAG_IOMMU */
479 }
480
481 /**
482 * Sometimes the PTE pointers retrieved from the pv_head_table (from an entry of
483 * type PVH_TYPE_PTEP or PVH_TYPE_PVEP) contain flags themselves. This function
484 * strips out those flags and returns back a dereferencable pointer.
485 *
486 * @param ptep The PTE pointer to strip out the unwanted flags.
487 *
488 * @return A valid dereferencable pointer to the page table entry.
489 */
490 static inline const pt_entry_t*
pvh_strip_ptep(const pt_entry_t * ptep)491 pvh_strip_ptep(const pt_entry_t *ptep)
492 {
493 #ifdef PVH_FLAG_IOMMU
494 const vm_offset_t pte_va = (vm_offset_t)ptep;
495 return (const pt_entry_t*)((pte_va & ~PVH_FLAG_IOMMU) | PVH_FLAG_IOMMU_TABLE);
496 #else /* PVH_FLAG_IOMMU */
497 return ptep;
498 #endif /* PVH_FLAG_IOMMU */
499 }
500
501 /**
502 * PVH_TYPE_PVEP Helper Functions.
503 *
504 * The following are methods used to manipulate PVE lists. This is the type of
505 * pv_head_table entry used when there are multiple mappings to a single
506 * physical page.
507 */
508
509 /**
510 * Whether a physical page is using "alternate accounting" (ALTACCT) for its
511 * ledger statistics is something that needs to be tracked on a per-mapping
512 * basis, not on a per-physical-page basis. Because of that, it's tracked
513 * differently depending on whether there's a single mapping to a page
514 * (PVH_TYPE_PTEP) or multiple (PVH_TYPE_PVEP). For single mappings, the bit is
515 * tracked in the pp_attr_table. But when there are multiple mappings, the least
516 * significant bit of the corresponding "pve_pte" pointer in each pv_entry object
517 * is used as a marker for pages using alternate accounting.
518 *
519 * @note See the definition for PP_ATTR_ALTACCT for a more detailed description
520 * of what "alternate accounting" actually means in respect to the
521 * footprint ledger.
522 *
523 * Since some code (KernelDiskImages, e.g.) might map a phsyical page as
524 * "device" memory (i.e. external) while it's also being used as regular
525 * "anonymous" memory (i.e. internal) in user space, we have to manage the
526 * "internal" attribute per mapping rather than per physical page.
527 * When there are multiple mappings, we use the next least significant bit of
528 * the corresponding "pve_pte" pointer for that.
529 */
530 #define PVE_PTEP_ALTACCT ((uintptr_t) 0x1)
531 #define PVE_PTEP_INTERNAL ((uintptr_t) 0x2)
532 #define PVE_PTEP_FLAGS (PVE_PTEP_ALTACCT | PVE_PTEP_INTERNAL)
533
534 /**
535 * Set the ALTACCT bit for a specific PTE pointer.
536 *
537 * @param pvep A pointer to the current pv_entry mapping in the linked list of
538 * mappings.
539 * @param idx Index of the chosen PTE pointer inside the PVE.
540 */
541 static inline void
pve_set_altacct(pv_entry_t * pvep,unsigned idx)542 pve_set_altacct(pv_entry_t *pvep, unsigned idx)
543 {
544 assert(idx < PTE_PER_PVE);
545 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_ALTACCT);
546 }
547 /**
548 * Set the INTERNAL bit for a specific PTE pointer.
549 *
550 * @param pvep A pointer to the current pv_entry mapping in the linked list of
551 * mappings.
552 * @param idx Index of the chosen PTE pointer inside the PVE.
553 */
554 static inline void
pve_set_internal(pv_entry_t * pvep,unsigned idx)555 pve_set_internal(pv_entry_t *pvep, unsigned idx)
556 {
557 assert(idx < PTE_PER_PVE);
558 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_INTERNAL);
559 }
560
561 /**
562 * Clear the ALTACCT bit for a specific PTE pointer.
563 *
564 * @param pvep A pointer to the current pv_entry mapping in the linked list of
565 * mappings.
566 * @param idx Index of the chosen PTE pointer inside the PVE.
567 */
568 static inline void
pve_clr_altacct(pv_entry_t * pvep,unsigned idx)569 pve_clr_altacct(pv_entry_t *pvep, unsigned idx)
570 {
571 assert(idx < PTE_PER_PVE);
572 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_ALTACCT);
573 }
574 /**
575 * Clear the INTERNAL bit for a specific PTE pointer.
576 *
577 * @param pvep A pointer to the current pv_entry mapping in the linked list of
578 * mappings.
579 * @param idx Index of the chosen PTE pointer inside the PVE.
580 */
581 static inline void
pve_clr_internal(pv_entry_t * pvep,unsigned idx)582 pve_clr_internal(pv_entry_t *pvep, unsigned idx)
583 {
584 assert(idx < PTE_PER_PVE);
585 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_INTERNAL);
586 }
587
588 /**
589 * Return the ALTACCT bit for a specific PTE pointer.
590 *
591 * @param pvep A pointer to the current pv_entry mapping in the linked list of
592 * mappings.
593 * @param idx Index of the chosen PTE pointer inside the PVE.
594 */
595 static inline bool
pve_get_altacct(pv_entry_t * pvep,unsigned idx)596 pve_get_altacct(pv_entry_t *pvep, unsigned idx)
597 {
598 assert(idx < PTE_PER_PVE);
599 return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_ALTACCT;
600 }
601 /**
602 * Return the INTERNAL bit for a specific PTE pointer.
603 *
604 * @param pvep A pointer to the current pv_entry mapping in the linked list of
605 * mappings.
606 * @param idx Index of the chosen PTE pointer inside the PVE.
607 */
608 static inline bool
pve_get_internal(pv_entry_t * pvep,unsigned idx)609 pve_get_internal(pv_entry_t *pvep, unsigned idx)
610 {
611 assert(idx < PTE_PER_PVE);
612 return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_INTERNAL;
613 }
614
615 /**
616 * Return the next mapping (pv_entry) in a linked list of mappings. This applies
617 * to pv_head_table entries of type PVH_TYPE_PVEP.
618 *
619 * @param pvep A pointer to the current pv_entry mapping in the linked list of
620 * mappings.
621 *
622 * @return The next virtual mapping for a physical page, or PV_ENTRY_NULL if the
623 * end of the list has been reached.
624 */
625 static inline pv_entry_t *
pve_next(pv_entry_t * pvep)626 pve_next(pv_entry_t *pvep)
627 {
628 return pvep->pve_next;
629 }
630
631 /**
632 * Return a pointer to the pve_next field in a pv_entry. This value is used
633 * when adding and removing entries to a PVE list.
634 *
635 * @param pvep The pv_entry whose pve_next field is being accessed.
636 *
637 * @return Pointer to the pve_next field.
638 */
639 static inline pv_entry_t **
pve_next_ptr(pv_entry_t * pvep)640 pve_next_ptr(pv_entry_t *pvep)
641 {
642 return &pvep->pve_next;
643 }
644
645 /**
646 * Return a pointer to the page table entry for this mapping.
647 *
648 * @param pvep The pv_entry whose pve_ptep field is to be returned.
649 * @param idx Index of the chosen PTE pointer inside the PVE.
650 *
651 * @return Pointer to the page table entry.
652 */
653 static inline pt_entry_t *
pve_get_ptep(pv_entry_t * pvep,unsigned idx)654 pve_get_ptep(pv_entry_t *pvep, unsigned idx)
655 {
656 assert(idx < PTE_PER_PVE);
657 return (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_FLAGS);
658 }
659
660 /**
661 * Update the page table entry for a specific physical to virtual mapping.
662 *
663 * @param pvep The pv_entry to update.
664 * @param idx Index of the chosen PTE pointer inside the PVE.
665 * @param ptep_new The new page table entry.
666 */
667 static inline void
pve_set_ptep(pv_entry_t * pvep,unsigned idx,pt_entry_t * ptep_new)668 pve_set_ptep(pv_entry_t *pvep, unsigned idx, pt_entry_t *ptep_new)
669 {
670 assert(idx < PTE_PER_PVE);
671 pvep->pve_ptep[idx] = ptep_new;
672 }
673
674 /**
675 * Initialize all fields in a PVE to NULL.
676 *
677 * @param pvep The pv_entry to initialize.
678 */
679 static inline void
pve_init(pv_entry_t * pvep)680 pve_init(pv_entry_t *pvep)
681 {
682 pvep->pve_next = PV_ENTRY_NULL;
683 for (int i = 0; i < PTE_PER_PVE; i++) {
684 pvep->pve_ptep[i] = PT_ENTRY_NULL;
685 }
686 }
687
688 /**
689 * Find PTE pointer in PVE and return its index.
690 *
691 * @param pvep The PVE to search.
692 * @param ptep PTE to search for.
693 *
694 * @return Index of the found entry, or -1 if no entry exists.
695 */
696 static inline int
pve_find_ptep_index(pv_entry_t * pvep,pt_entry_t * ptep)697 pve_find_ptep_index(pv_entry_t *pvep, pt_entry_t *ptep)
698 {
699 for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
700 if (pve_get_ptep(pvep, i) == ptep) {
701 return (int)i;
702 }
703 }
704
705 return -1;
706 }
707
708 /**
709 * Checks if no PTEs are currently associated with this PVE.
710 *
711 * @param pvep The PVE to search.
712 *
713 * @return True if no PTEs are currently associated with this PVE, or false.
714 */
715 static inline bool
pve_is_empty(pv_entry_t * pvep)716 pve_is_empty(pv_entry_t *pvep)
717 {
718 for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
719 if (pve_get_ptep(pvep, i) != PT_ENTRY_NULL) {
720 return false;
721 }
722 }
723
724 return true;
725 }
726
727 /**
728 * Prepend a new pv_entry node to a PVE list.
729 *
730 * @note This function will clobber any existing flags stored in the PVH
731 * pointer. It's up to the caller to preserve flags if that functionality
732 * is needed (either by ensuring `pvep` contains those flags, or by
733 * manually setting the flags after this call).
734 *
735 * @param pvh The linked list of mappings to update.
736 * @param pvep The new mapping to add to the linked list.
737 */
738 static inline void
pve_add(pv_entry_t ** pvh,pv_entry_t * pvep)739 pve_add(pv_entry_t **pvh, pv_entry_t *pvep)
740 {
741 assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
742
743 pvep->pve_next = pvh_pve_list(pvh);
744 pvh_update_head(pvh, pvep, PVH_TYPE_PVEP);
745 }
746
747 /**
748 * Remove an entry from a PVE list of mappings.
749 *
750 * @note This function will clobber any existing flags stored in the PVH
751 * pointer. It's up to the caller to preserve flags if that functionality
752 * is needed.
753 *
754 * @param pvh The pv_head_table entry of the PVE list to remove a mapping from.
755 * This is the first entry in the list of pv_entry_t mappings.
756 * @param pvepp A pointer to the pv_entry_t* that's being removed. If this entry
757 * is the first in the linked list of mappings, then this should be
758 * identical to the pv_head_table entry. If the mapping isn't the
759 * first, then this is a pointer to the pve_next field in the
760 * previous mapping.
761 * @param pvep The entry that should be removed. Should be identical to a
762 * dereference of the pvepp parameter (unless it's the pv_head_table
763 * entry).
764 */
765 static inline void
pve_remove(pv_entry_t ** pvh,pv_entry_t ** pvepp,pv_entry_t * pvep)766 pve_remove(pv_entry_t **pvh, pv_entry_t **pvepp, pv_entry_t *pvep)
767 {
768 assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
769
770 if (pvepp == pvh) {
771 if (pve_next(pvep) == PV_ENTRY_NULL) {
772 /* The last mapping to this page is being removed. */
773 pvh_update_head(pvh, PV_ENTRY_NULL, PVH_TYPE_NULL);
774 } else {
775 /**
776 * There are still mappings left, make the next one the new head of
777 * the list. This effectively removes the first entry from the list.
778 */
779 pvh_update_head(pvh, pve_next(pvep), PVH_TYPE_PVEP);
780 }
781 } else {
782 /**
783 * Move the previous entry's next field to the entry after the one being
784 * removed. This will clobber the ALTACCT and INTERNAL bits.
785 */
786 *pvepp = pve_next(pvep);
787 }
788 }
789
790 /**
791 * PVH_TYPE_PTDP Types and Helper Functions.
792 *
793 * The following are types and methods used to manipulate page table descriptor
794 * (PTD) objects. This is the type of pv_head_table entry used when a page is
795 * being used as a page table.
796 */
797
798 /**
799 * When the pmap layer allocates memory, it always does so in chunks of the VM
800 * page size (which are represented by the PAGE_SIZE/PAGE_SHIFT macros). The VM
801 * page size might not match up with the hardware page size for a given address
802 * space (this is especially true on systems that support more than one page
803 * size).
804 *
805 * The pv_head_table is allocated to have one entry per VM page, not hardware
806 * page (which can change depending on the address space). Because of that, a
807 * single VM-page-sized region (single pv_head_table entry) can potentially hold
808 * up to four page tables. Only one page table descriptor (PTD) is allocated per
809 * pv_head_table entry (per VM page), so on some systems, one PTD might have to
810 * keep track of up to four different page tables.
811 */
812
813 #if __ARM_MIXED_PAGE_SIZE__
814 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
815 #elif (ARM_PGSHIFT == 14)
816 #define PT_INDEX_MAX 1
817 #elif (ARM_PGSHIFT == 12)
818 #define PT_INDEX_MAX 4
819 #else
820 #error Unsupported ARM_PGSHIFT
821 #endif /* __ARM_MIXED_PAGE_SIZE__ || ARM_PGSHIFT == 14 || ARM_PGSHIFT == 12 */
822
823
824 /**
825 * Page table descriptor (PTD) info structure.
826 *
827 * Contains information about a page table. These pieces of data are separate
828 * from the PTD itself because in address spaces where the VM page size doesn't
829 * match the underlying hardware page size, one PTD could represent multiple
830 * page tables (and so will need multiple PTD info structures).
831 *
832 * These fields are also in their own struct so that they can be allocated
833 * separately from the associated pt_desc_t object. This allows us to allocate
834 * the counts in this structure in a way that ensures they don't fall within the
835 * same cache line as the main pt_desc_t object. This is important because the
836 * fields in this structure are atomically updated which could cause false
837 * sharing cache performance issues with the "va" field in pt_desc_t if all of
838 * the fields were within the same structure.
839 */
840 typedef struct {
841 /**
842 * Pre-defined sentinel values for ptd_info_t.refcnt. If these refcnt values
843 * change, make sure to update the showpte LLDB macro to reflect the
844 * changes.
845 */
846 #define PT_DESC_REFCOUNT 0x4000U
847 #define PT_DESC_IOMMU_GRANTED_REFCOUNT 0x8000U
848 #define PT_DESC_IOMMU_ACCEPTED_REFCOUNT 0x8001U
849
850 /*
851 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT.
852 * For leaf pagetables, should reflect the number of non-empty PTEs.
853 * For IOMMU pages, should always be either PT_DESC_IOMMU_GRANTED_REFCOUNT
854 * or PT_DESC_IOMMU_ACCEPTED_REFCOUNT.
855 */
856 unsigned short refcnt;
857
858 /*
859 * For non-leaf pagetables, should be 0.
860 * For leaf pagetables, should reflect the number of wired entries.
861 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU
862 * operations are implicitly wired).
863 */
864 unsigned short wiredcnt;
865 } ptd_info_t;
866
867 /**
868 * Page Table Descriptor (PTD).
869 *
870 * Provides a per-table data structure and a way of keeping track of all page
871 * tables in the system.
872 *
873 * This structure is also used as a convenient way of keeping track of IOMMU
874 * pages (which may or may not be used as page tables). In that case the "iommu"
875 * field will point to the owner of the page, ptd_info[0].refcnt will be
876 * PT_DESC_IOMMU_GRANTED_REFCOUNT or PT_DESC_IOMMU_ACCEPTED_REFCOUNT, and
877 * ptd_info[0].wiredcnt can be used as an arbitrary refcnt controlled by the
878 * IOMMU driver.
879 */
880 typedef struct pt_desc {
881 /**
882 * This queue chain provides a mechanism for keeping a list of pages
883 * being used as page tables. This is used to potentially reclaim userspace
884 * page tables as a fast way of "allocating" a page.
885 *
886 * Refer to osfmk/kern/queue.h for more information about queue chains.
887 */
888 queue_chain_t pt_page;
889
890 /* Each page table is either owned by a pmap or a specific IOMMU. */
891 union {
892 struct pmap *pmap;
893 };
894
895 /**
896 * The following fields contain per-page-table properties, and as such,
897 * might have multiple elements each. This is due to a single PTD
898 * potentially representing multiple page tables (in address spaces where
899 * the VM page size differs from the hardware page size). Use the
900 * ptd_get_index() function to get the correct index for a specific page
901 * table.
902 */
903
904 /**
905 * The first address of the virtual address space this page table is
906 * translating for, or a value set by an IOMMU driver if this PTD is being
907 * used to track an IOMMU page.
908 */
909 vm_offset_t va[PT_INDEX_MAX];
910
911 /**
912 * ptd_info_t's are allocated separately so as to reduce false sharing
913 * with the va field. This is desirable because ptd_info_t's are updated
914 * atomically from all CPUs.
915 */
916 ptd_info_t *ptd_info;
917 } pt_desc_t;
918
919 /**
920 * Convert a pv_head_table entry/pointer into a page table descriptor pointer.
921 * This should only be done if the type of this entry is PVH_TYPE_PTDP.
922 *
923 * @param pvh The pv_head_table entry/pointer to convert into a safe to
924 * dereference pt_desc_t*.
925 *
926 * @return Return back a safe to derefence pointer to the page table descriptor
927 * for this physical page by masking off the TYPE bits and adding any
928 * missing flags to the upper portion of the pointer.
929 */
930 static inline pt_desc_t*
pvh_ptd(pv_entry_t ** pvh)931 pvh_ptd(pv_entry_t **pvh)
932 {
933 return (pt_desc_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
934 }
935
936 /**
937 * Given an arbitrary page table entry, return back the page table descriptor
938 * (PTD) object for the page table that contains that entry.
939 *
940 * @param ptep Pointer to a PTE whose page table descriptor object to return.
941 *
942 * @return The PTD object for the passed in page table.
943 */
944 static inline pt_desc_t *
ptep_get_ptd(const pt_entry_t * ptep)945 ptep_get_ptd(const pt_entry_t *ptep)
946 {
947 assert(ptep != NULL);
948
949 const vm_offset_t pt_base_va = (vm_offset_t)ptep;
950 pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop(pt_base_va)));
951
952 if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
953 panic("%s: invalid PV head 0x%llx for PTE %p", __func__, (uint64_t)(*pvh), ptep);
954 }
955
956 return pvh_ptd(pvh);
957 }
958
959 /**
960 * Given an arbitrary page table entry, return back the pmap that owns that
961 * page table.
962 *
963 * @note This won't work correctly for page tables owned by IOMMUs, because
964 * those table aren't owned by any specific pmap.
965 *
966 * @param ptep Pointer to a page table entry whose owner we're trying to return.
967 *
968 * @return The pmap that owns the given page table entry.
969 */
970 static inline struct pmap *
ptep_get_pmap(const pt_entry_t * ptep)971 ptep_get_pmap(const pt_entry_t *ptep)
972 {
973 return ptep_get_ptd(ptep)->pmap;
974 }
975
976
977 /**
978 * Given an arbitrary translation table entry, get the page table descriptor
979 * (PTD) object for the page table pointed to by the TTE.
980 *
981 * @param tte The translation table entry to parse. For instance, if this is an
982 * L2 TTE, then the PTD for the L3 table this entry points to will be
983 * returned.
984 *
985 * @return The page table descriptor (PTD) for the page table pointed to by this
986 * TTE.
987 */
988 static inline pt_desc_t *
tte_get_ptd(const tt_entry_t tte)989 tte_get_ptd(const tt_entry_t tte)
990 {
991 const vm_offset_t pt_base_va = (vm_offset_t)(tte & ~((tt_entry_t)PAGE_MASK));
992 pv_entry_t **pvh = pai_to_pvh(pa_index(pt_base_va));
993
994 if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
995 panic("%s: invalid PV head 0x%llx for TTE 0x%llx", __func__, (uint64_t)(*pvh), (uint64_t)tte);
996 }
997
998 return pvh_ptd(pvh);
999 }
1000
1001 /**
1002 * In address spaces where the VM page size doesn't match the underlying
1003 * hardware page size, one PTD could represent multiple page tables. This
1004 * function returns the correct index value depending on which page table is
1005 * being accessed. That index value can then be used to access the
1006 * per-page-table properties stored within a PTD.
1007 *
1008 * @note See the description above the PT_INDEX_MAX definition for a more
1009 * detailed explanation of why multiple page tables can be represented
1010 * by a single PTD object in the pv_head_table.
1011 *
1012 * @param ptd The page table descriptor that's being accessed.
1013 * @param ttep Pointer to the translation table entry that's being accessed.
1014 *
1015 * @return The correct index value for a specific, hardware-sized page
1016 * table.
1017 */
1018 static inline unsigned
ptd_get_index(__unused const pt_desc_t * ptd,__unused const tt_entry_t * ttep)1019 ptd_get_index(__unused const pt_desc_t *ptd, __unused const tt_entry_t *ttep)
1020 {
1021 #if PT_INDEX_MAX == 1
1022 return 0;
1023 #else
1024 assert(ptd != NULL);
1025
1026 const uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1027 const vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1028
1029 /**
1030 * Use the difference between the VM page shift and the hardware page shift
1031 * to get the index of the correct page table. In practice, this equates to
1032 * masking out the bottom two bits of the L3 table index in address spaces
1033 * where the VM page size is greater than the hardware page size. In address
1034 * spaces where they're identical, the index will always be zero.
1035 */
1036 const unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1037 assert(ttep_index < PT_INDEX_MAX);
1038
1039 return ttep_index;
1040 #endif
1041 }
1042
1043 /**
1044 * In address spaces where the VM page size doesn't match the underlying
1045 * hardware page size, one PTD could represent multiple page tables. This
1046 * function returns the correct ptd_info_t structure depending on which page
1047 * table is being accessed.
1048 *
1049 * @note See the description above the PT_INDEX_MAX definition for a more
1050 * detailed explanation of why multiple page tables can be represented
1051 * by a single PTD object in the pv_head_table.
1052 *
1053 * @param ptd The page table descriptor that's being accessed.
1054 * @param ttep Pointer to the translation table entry that's being accessed.
1055 *
1056 * @return The correct ptd_info_t structure for a specific, hardware-sized page
1057 * table.
1058 */
1059 static inline ptd_info_t *
ptd_get_info(pt_desc_t * ptd,const tt_entry_t * ttep)1060 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1061 {
1062 assert((ptd != NULL) && (ptd->ptd_info[0].refcnt < PT_DESC_IOMMU_GRANTED_REFCOUNT));
1063
1064 return &ptd->ptd_info[ptd_get_index(ptd, ttep)];
1065 }
1066
1067 /**
1068 * Given a pointer to a page table entry, return back the ptd_info structure
1069 * for the page table that contains that entry.
1070 *
1071 * @param ptep Pointer to a PTE whose ptd_info object to return.
1072 *
1073 * @return The ptd_info object for the page table that contains the passed in
1074 * page table entry.
1075 */
1076 static inline ptd_info_t *
ptep_get_info(const pt_entry_t * ptep)1077 ptep_get_info(const pt_entry_t *ptep)
1078 {
1079 return ptd_get_info(ptep_get_ptd(ptep), ptep);
1080 }
1081
1082 /**
1083 * Return the virtual address mapped by the passed in leaf page table entry,
1084 * using an already-retrieved pagetable descriptor.
1085 *
1086 * @param ptdp pointer to the descriptor for the pagetable containing ptep
1087 * @param ptep Pointer to a PTE to parse
1088 */
1089 static inline vm_map_address_t
ptd_get_va(const pt_desc_t * ptdp,const pt_entry_t * ptep)1090 ptd_get_va(const pt_desc_t *ptdp, const pt_entry_t *ptep)
1091 {
1092 const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptdp->pmap);
1093
1094 vm_map_address_t va = ptdp->va[ptd_get_index(ptdp, ptep)];
1095 vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1096
1097 va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1098
1099 return va;
1100 }
1101
1102 /**
1103 * Return the virtual address that is being mapped by the passed in leaf page
1104 * table entry.
1105 *
1106 * @param ptep Pointer to a PTE to parse.
1107 */
1108 static inline vm_map_address_t
ptep_get_va(const pt_entry_t * ptep)1109 ptep_get_va(const pt_entry_t *ptep)
1110 {
1111 return ptd_get_va(ptep_get_ptd(ptep), ptep);
1112 }
1113
1114 /**
1115 * Physical Page Attribute Table (pp_attr_table) defines and helper functions.
1116 */
1117
1118 /* How many bits to use for flags on a per-VM-page basis. */
1119 typedef uint16_t pp_attr_t;
1120
1121 /* See the definition of pp_attr_table for more information. */
1122 extern volatile pp_attr_t* pp_attr_table;
1123
1124 /**
1125 * Flags stored in the pp_attr_table on a per-physical-page basis.
1126 *
1127 * Please update the pv_walk LLDB macro if these flags are changed or added to.
1128 */
1129
1130 /**
1131 * The bottom 6-bits are used to store the default WIMG (cacheability and memory
1132 * type) setting for this physical page. This can be changed by calling
1133 * pmap_set_cache_attributes().
1134 *
1135 * If a default WIMG setting isn't set for a page, then the default is Normal,
1136 * Cached memory (VM_WIMG_DEFAULT).
1137 */
1138 #define PP_ATTR_WIMG_MASK 0x003F
1139 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1140
1141 /**
1142 * The reference and modify bits keep track of whether a page has been accessed
1143 * or modified since the last time the bits were cleared. These bits are used to
1144 * enforce policy decisions in the VM layer.
1145 */
1146 #define PP_ATTR_REFERENCED 0x0040
1147 #define PP_ATTR_MODIFIED 0x0080
1148
1149 /**
1150 * This physical page is being used as anonymous memory that's internally
1151 * managed by the VM and is not connected to an external pager. This flag is
1152 * only set/cleared on the first CPU mapping of a page (see PVH_FLAG_CPU). Any
1153 * subsequent mappings won't set/clear this flag until all mappings are removed
1154 * and a new CPU mapping is added.
1155 */
1156 #define PP_ATTR_INTERNAL 0x0100
1157
1158 /**
1159 * This flag is used to keep track of pages that are still resident but are not
1160 * considered dirty and can be reclaimed under memory pressure. These pages do
1161 * not count as a part of the memory footprint, so the footprint ledger does not
1162 * need to be updated for these pages. This is hinted to the VM by the
1163 * `madvise(MADV_FREE_REUSABLE)` system call.
1164 */
1165 #define PP_ATTR_REUSABLE 0x0200
1166
1167 /**
1168 * This flag denotes that a page is utilizing "alternate accounting". This means
1169 * that the pmap doesn't need to keep track of these pages with regards to the
1170 * footprint ledger because the VM is already accounting for them in a different
1171 * way. These include IOKit mappings (VM adds their entire virtual size to the
1172 * footprint), and purgeable pages (VM counts them only when non-volatile and
1173 * only for one "owner"), among others.
1174 *
1175 * Note that alternate accounting status is tracked on a per-mapping basis (not
1176 * per-page). Because of that the ALTACCT flag in the pp_attr_table is only used
1177 * when there's a single mapping to a page. When there are multiple mappings,
1178 * the status of this flag is tracked in the pv_head_table (see PVE_PTEP_ALTACCT
1179 * above).
1180 */
1181 #define PP_ATTR_ALTACCT 0x0400
1182
1183 /**
1184 * This bit was originally used on x86 to keep track of what pages to not
1185 * encrypt during the hibernation process as a performance optimization when
1186 * encryption was done in software. This doesn't apply to the ARM
1187 * hibernation process because all pages are automatically encrypted using
1188 * hardware acceleration. Despite that, the pmap still keeps track of this flag
1189 * as a debugging aid on internal builds.
1190 *
1191 * TODO: This bit can probably be reclaimed:
1192 * rdar://70740650 (PMAP Cleanup: Potentially reclaim the PP_ATTR_NOENCRYPT bit on ARM)
1193 */
1194 #define PP_ATTR_NOENCRYPT 0x0800
1195
1196 /**
1197 * These bits denote that a physical page is expecting the next access or
1198 * modification to set the PP_ATTR_REFERENCED and PP_ATTR_MODIFIED flags
1199 * respectively.
1200 */
1201 #define PP_ATTR_REFFAULT 0x1000
1202 #define PP_ATTR_MODFAULT 0x2000
1203
1204 #if XNU_MONITOR
1205 /**
1206 * Denotes that a page is owned by the PPL. This is modified/checked with the
1207 * PVH lock held, to avoid ownership related races. This does not need to be a
1208 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1209 * put the bit.
1210 */
1211 #define PP_ATTR_MONITOR 0x4000
1212
1213 /**
1214 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1215 * to temporarily 'pin' kernel pages that are used to store PPL output
1216 * parameters. Otherwise a malicious or buggy caller could pass PPL-owned memory
1217 * for these parameters and in so doing stage a write gadget against the PPL.
1218 */
1219 #define PP_ATTR_NO_MONITOR 0x8000
1220
1221 /**
1222 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1223 * are illegal.
1224 */
1225 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1226 #endif /* XNU_MONITOR */
1227
1228 /**
1229 * Atomically set some flags in a pp_attr_table entry.
1230 *
1231 * @param pai The physical address index for the entry to update.
1232 * @param bits The flags to set in the entry.
1233 */
1234 static inline void
ppattr_set_bits(unsigned int pai,pp_attr_t bits)1235 ppattr_set_bits(unsigned int pai, pp_attr_t bits)
1236 {
1237 volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1238 os_atomic_or(ppattr, bits, acq_rel);
1239 }
1240
1241 /**
1242 * Atomically clear some flags in a pp_attr_table entry.
1243 *
1244 * @param pai The physical address index for the entry to update.
1245 * @param bits The flags to clear in the entry.
1246 */
1247 static inline void
ppattr_clear_bits(unsigned int pai,pp_attr_t bits)1248 ppattr_clear_bits(unsigned int pai, pp_attr_t bits)
1249 {
1250 volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1251 os_atomic_andnot(ppattr, bits, acq_rel);
1252 }
1253
1254 /**
1255 * Return true if the pp_attr_table entry contains the passed in bits.
1256 *
1257 * @param pai The physical address index for the entry to test.
1258 * @param bits The flags to check for.
1259 */
1260 static inline bool
ppattr_test_bits(unsigned int pai,pp_attr_t bits)1261 ppattr_test_bits(unsigned int pai, pp_attr_t bits)
1262 {
1263 const volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1264 return (*ppattr & bits) == bits;
1265 }
1266
1267 /**
1268 * Only set some flags in a pp_attr_table entry if the passed in physical
1269 * address is a kernel-managed address.
1270 *
1271 * @param pa The physical address for the entry to update.
1272 * @param bits The flags to set in the entry.
1273 */
1274 static inline void
ppattr_pa_set_bits(pmap_paddr_t pa,pp_attr_t bits)1275 ppattr_pa_set_bits(pmap_paddr_t pa, pp_attr_t bits)
1276 {
1277 if (pa_valid(pa)) {
1278 ppattr_set_bits(pa_index(pa), bits);
1279 }
1280 }
1281
1282 /**
1283 * Only clear some flags in a pp_attr_table entry if the passed in physical
1284 * address is a kernel-managed address.
1285 *
1286 * @param pa The physical address for the entry to update.
1287 * @param bits The flags to clear in the entry.
1288 */
1289 static inline void
ppattr_pa_clear_bits(pmap_paddr_t pa,pp_attr_t bits)1290 ppattr_pa_clear_bits(pmap_paddr_t pa, pp_attr_t bits)
1291 {
1292 if (pa_valid(pa)) {
1293 ppattr_clear_bits(pa_index(pa), bits);
1294 }
1295 }
1296
1297 /**
1298 * Only test flags in a pp_attr_table entry if the passed in physical address
1299 * is a kernel-managed page.
1300 *
1301 * @param pa The physical address for the entry to test.
1302 * @param bits The flags to check for.
1303 *
1304 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1305 * depending on whether the bits are set.
1306 */
1307 static inline bool
ppattr_pa_test_bits(pmap_paddr_t pa,pp_attr_t bits)1308 ppattr_pa_test_bits(pmap_paddr_t pa, pp_attr_t bits)
1309 {
1310 return pa_valid(pa) ? ppattr_test_bits(pa_index(pa), bits) : false;
1311 }
1312
1313 /**
1314 * Set the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the passed
1315 * in physical address is a kernel-managed page.
1316 *
1317 * @param pa The physical address for the entry to update.
1318 */
1319 static inline void
ppattr_pa_set_modify(pmap_paddr_t pa)1320 ppattr_pa_set_modify(pmap_paddr_t pa)
1321 {
1322 ppattr_pa_set_bits(pa, PP_ATTR_MODIFIED);
1323 }
1324
1325 /**
1326 * Clear the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the
1327 * passed in physical address is a kernel-managed page.
1328 *
1329 * @param pa The physical address for the entry to update.
1330 */
1331 static inline void
ppattr_pa_clear_modify(pmap_paddr_t pa)1332 ppattr_pa_clear_modify(pmap_paddr_t pa)
1333 {
1334 ppattr_pa_clear_bits(pa, PP_ATTR_MODIFIED);
1335 }
1336
1337 /**
1338 * Set the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1339 * passed in physical address is a kernel-managed page.
1340 *
1341 * @param pa The physical address for the entry to update.
1342 */
1343 static inline void
ppattr_pa_set_reference(pmap_paddr_t pa)1344 ppattr_pa_set_reference(pmap_paddr_t pa)
1345 {
1346 ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
1347 }
1348
1349 /**
1350 * Clear the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1351 * passed in physical address is a kernel-managed page.
1352 *
1353 * @param pa The physical address for the entry to update.
1354 */
1355 static inline void
ppattr_pa_clear_reference(pmap_paddr_t pa)1356 ppattr_pa_clear_reference(pmap_paddr_t pa)
1357 {
1358 ppattr_pa_clear_bits(pa, PP_ATTR_REFERENCED);
1359 }
1360
1361 #if XNU_MONITOR
1362
1363 /**
1364 * Set the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the passed
1365 * in physical address is a kernel-managed page.
1366 *
1367 * @param pa The physical address for the entry to update.
1368 */
1369 static inline void
ppattr_pa_set_monitor(pmap_paddr_t pa)1370 ppattr_pa_set_monitor(pmap_paddr_t pa)
1371 {
1372 ppattr_pa_set_bits(pa, PP_ATTR_MONITOR);
1373 }
1374
1375 /**
1376 * Clear the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the
1377 * passed in physical address is a kernel-managed page.
1378 *
1379 * @param pa The physical address for the entry to update.
1380 */
1381 static inline void
ppattr_pa_clear_monitor(pmap_paddr_t pa)1382 ppattr_pa_clear_monitor(pmap_paddr_t pa)
1383 {
1384 ppattr_pa_clear_bits(pa, PP_ATTR_MONITOR);
1385 }
1386
1387 /**
1388 * Only test for the PP_ATTR_MONITOR flag in a pp_attr_table entry if the passed
1389 * in physical address is a kernel-managed page.
1390 *
1391 * @param pa The physical address for the entry to test.
1392 *
1393 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1394 * depending on whether the PP_ATTR_MONITOR is set.
1395 */
1396 static inline bool
ppattr_pa_test_monitor(pmap_paddr_t pa)1397 ppattr_pa_test_monitor(pmap_paddr_t pa)
1398 {
1399 return ppattr_pa_test_bits(pa, PP_ATTR_MONITOR);
1400 }
1401
1402 /**
1403 * Set the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1404 * passed in physical address is a kernel-managed page.
1405 *
1406 * @param pa The physical address for the entry to update.
1407 */
1408 static inline void
ppattr_pa_set_no_monitor(pmap_paddr_t pa)1409 ppattr_pa_set_no_monitor(pmap_paddr_t pa)
1410 {
1411 ppattr_pa_set_bits(pa, PP_ATTR_NO_MONITOR);
1412 }
1413
1414 /**
1415 * Clear the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1416 * passed in physical address is a kernel-managed page.
1417 *
1418 * @param pa The physical address for the entry to update.
1419 */
1420 static inline void
ppattr_pa_clear_no_monitor(pmap_paddr_t pa)1421 ppattr_pa_clear_no_monitor(pmap_paddr_t pa)
1422 {
1423 ppattr_pa_clear_bits(pa, PP_ATTR_NO_MONITOR);
1424 }
1425
1426 /**
1427 * Only test for the PP_ATTR_NO_MONITOR flag in a pp_attr_table entry if the
1428 * passed in physical address is a kernel-managed page.
1429 *
1430 * @param pa The physical address for the entry to test.
1431 *
1432 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1433 * depending on whether the PP_ATTR_NO_MONITOR is set.
1434 */
1435 static inline bool
ppattr_pa_test_no_monitor(pmap_paddr_t pa)1436 ppattr_pa_test_no_monitor(pmap_paddr_t pa)
1437 {
1438 return ppattr_pa_test_bits(pa, PP_ATTR_NO_MONITOR);
1439 }
1440
1441 #endif /* XNU_MONITOR */
1442
1443 /**
1444 * Set the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1445 *
1446 * @param pai The physical address index for the entry to update.
1447 */
1448 static inline void
ppattr_set_internal(unsigned int pai)1449 ppattr_set_internal(unsigned int pai)
1450 {
1451 ppattr_set_bits(pai, PP_ATTR_INTERNAL);
1452 }
1453
1454 /**
1455 * Clear the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1456 *
1457 * @param pai The physical address index for the entry to update.
1458 */
1459 static inline void
ppattr_clear_internal(unsigned int pai)1460 ppattr_clear_internal(unsigned int pai)
1461 {
1462 ppattr_clear_bits(pai, PP_ATTR_INTERNAL);
1463 }
1464
1465 /**
1466 * Return true if the pp_attr_table entry has the PP_ATTR_INTERNAL flag set.
1467 *
1468 * @param pai The physical address index for the entry to test.
1469 */
1470 static inline bool
ppattr_test_internal(unsigned int pai)1471 ppattr_test_internal(unsigned int pai)
1472 {
1473 return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1474 }
1475
1476 /**
1477 * Set the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1478 *
1479 * @param pai The physical address index for the entry to update.
1480 */
1481 static inline void
ppattr_set_reusable(unsigned int pai)1482 ppattr_set_reusable(unsigned int pai)
1483 {
1484 ppattr_set_bits(pai, PP_ATTR_REUSABLE);
1485 }
1486
1487 /**
1488 * Clear the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1489 *
1490 * @param pai The physical address index for the entry to update.
1491 */
1492 static inline void
ppattr_clear_reusable(unsigned int pai)1493 ppattr_clear_reusable(unsigned int pai)
1494 {
1495 ppattr_clear_bits(pai, PP_ATTR_REUSABLE);
1496 }
1497
1498 /**
1499 * Return true if the pp_attr_table entry has the PP_ATTR_REUSABLE flag set.
1500 *
1501 * @param pai The physical address index for the entry to test.
1502 */
1503 static inline bool
ppattr_test_reusable(unsigned int pai)1504 ppattr_test_reusable(unsigned int pai)
1505 {
1506 return ppattr_test_bits(pai, PP_ATTR_REUSABLE);
1507 }
1508
1509 /**
1510 * Set the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1511 *
1512 * @note This is only valid when the ALTACCT flag is being tracked using the
1513 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1514 * PP_ATTR_ALTACCT definitions for more information.
1515 *
1516 * @param pai The physical address index for the entry to update.
1517 */
1518 static inline void
ppattr_set_altacct(unsigned int pai)1519 ppattr_set_altacct(unsigned int pai)
1520 {
1521 ppattr_set_bits(pai, PP_ATTR_ALTACCT);
1522 }
1523
1524 /**
1525 * Clear the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1526 *
1527 * @note This is only valid when the ALTACCT flag is being tracked using the
1528 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1529 * PP_ATTR_ALTACCT definitions for more information.
1530 *
1531 * @param pai The physical address index for the entry to update.
1532 */
1533 static inline void
ppattr_clear_altacct(unsigned int pai)1534 ppattr_clear_altacct(unsigned int pai)
1535 {
1536 ppattr_clear_bits(pai, PP_ATTR_ALTACCT);
1537 }
1538
1539 /**
1540 * Get the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1541 *
1542 * @note This is only valid when the ALTACCT flag is being tracked using the
1543 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1544 * PP_ATTR_ALTACCT definitions for more information.
1545 *
1546 * @param pai The physical address index for the entry to test.
1547 *
1548 * @return True if the passed in page uses alternate accounting, false
1549 * otherwise.
1550 */
1551 static inline bool
ppattr_is_altacct(unsigned int pai)1552 ppattr_is_altacct(unsigned int pai)
1553 {
1554 return ppattr_test_bits(pai, PP_ATTR_ALTACCT);
1555 }
1556 /**
1557 * Get the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1558 *
1559 * @note This is only valid when the INTERNAL flag is being tracked using the
1560 * pp_attr_table. See the descriptions above the PVE_PTEP_INTERNAL and
1561 * PP_ATTR_INTERNAL definitions for more information.
1562 *
1563 * @param pai The physical address index for the entry to test.
1564 *
1565 * @return True if the passed in page is accounted for as "internal", false
1566 * otherwise.
1567 */
1568 static inline bool
ppattr_is_internal(unsigned int pai)1569 ppattr_is_internal(unsigned int pai)
1570 {
1571 return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1572 }
1573
1574 /**
1575 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1576 * depending on whether there are one or multiple mappings to a page. This
1577 * function abstracts out the difference between single and multiple mappings to
1578 * a page and provides a single function for determining whether alternate
1579 * accounting is set for a mapping.
1580 *
1581 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1582 * definitions for more information.
1583 *
1584 * @param pai The physical address index for the entry to test.
1585 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1586 * @param idx Index of the chosen PTE pointer inside the PVE.
1587 *
1588 * @return True if the passed in page uses alternate accounting, false
1589 * otherwise.
1590 */
1591 static inline bool
ppattr_pve_is_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1592 ppattr_pve_is_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1593 {
1594 return (pvep == PV_ENTRY_NULL) ? ppattr_is_altacct(pai) : pve_get_altacct(pvep, idx);
1595 }
1596 /**
1597 * The "internal" (INTERNAL) status for a page is tracked differently
1598 * depending on whether there are one or multiple mappings to a page. This
1599 * function abstracts out the difference between single and multiple mappings to
1600 * a page and provides a single function for determining whether "internal"
1601 * is set for a mapping.
1602 *
1603 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1604 * definitions for more information.
1605 *
1606 * @param pai The physical address index for the entry to test.
1607 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1608 * @param idx Index of the chosen PTE pointer inside the PVE.
1609 *
1610 * @return True if the passed in page is "internal", false otherwise.
1611 */
1612 static inline bool
ppattr_pve_is_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1613 ppattr_pve_is_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1614 {
1615 return (pvep == PV_ENTRY_NULL) ? ppattr_is_internal(pai) : pve_get_internal(pvep, idx);
1616 }
1617
1618 /**
1619 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1620 * depending on whether there are one or multiple mappings to a page. This
1621 * function abstracts out the difference between single and multiple mappings to
1622 * a page and provides a single function for setting the alternate accounting status
1623 * for a mapping.
1624 *
1625 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1626 * definitions for more information.
1627 *
1628 * @param pai The physical address index for the entry to update.
1629 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1630 * @param idx Index of the chosen PTE pointer inside the PVE.
1631 */
1632 static inline void
ppattr_pve_set_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1633 ppattr_pve_set_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1634 {
1635 if (pvep == PV_ENTRY_NULL) {
1636 ppattr_set_altacct(pai);
1637 } else {
1638 pve_set_altacct(pvep, idx);
1639 }
1640 }
1641 /**
1642 * The "internal" (INTERNAL) status for a page is tracked differently
1643 * depending on whether there are one or multiple mappings to a page. This
1644 * function abstracts out the difference between single and multiple mappings to
1645 * a page and provides a single function for setting the "internal" status
1646 * for a mapping.
1647 *
1648 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1649 * definitions for more information.
1650 *
1651 * @param pai The physical address index for the entry to update.
1652 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1653 * @param idx Index of the chosen PTE pointer inside the PVE.
1654 */
1655 static inline void
ppattr_pve_set_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1656 ppattr_pve_set_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1657 {
1658 if (pvep == PV_ENTRY_NULL) {
1659 ppattr_set_internal(pai);
1660 } else {
1661 pve_set_internal(pvep, idx);
1662 }
1663 }
1664
1665 /**
1666 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1667 * depending on whether there are one or multiple mappings to a page. This
1668 * function abstracts out the difference between single and multiple mappings to
1669 * a page and provides a single function for clearing the alternate accounting status
1670 * for a mapping.
1671 *
1672 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1673 * definitions for more information.
1674 *
1675 * @param pai The physical address index for the entry to update.
1676 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1677 * @param idx Index of the chosen PTE pointer inside the PVE.
1678 */
1679 static inline void
ppattr_pve_clr_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1680 ppattr_pve_clr_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1681 {
1682 if (pvep == PV_ENTRY_NULL) {
1683 ppattr_clear_altacct(pai);
1684 } else {
1685 pve_clr_altacct(pvep, idx);
1686 }
1687 }
1688 /**
1689 * The "internal" (INTERNAL) status for a page is tracked differently
1690 * depending on whether there are one or multiple mappings to a page. This
1691 * function abstracts out the difference between single and multiple mappings to
1692 * a page and provides a single function for clearing the "internal" status
1693 * for a mapping.
1694 *
1695 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1696 * definitions for more information.
1697 *
1698 * @param pai The physical address index for the entry to update.
1699 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1700 * @param idx Index of the chosen PTE pointer inside the PVE.
1701 */
1702 static inline void
ppattr_pve_clr_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1703 ppattr_pve_clr_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1704 {
1705 if (pvep == PV_ENTRY_NULL) {
1706 ppattr_clear_internal(pai);
1707 } else {
1708 pve_clr_internal(pvep, idx);
1709 }
1710 }
1711
1712 /**
1713 * Set the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1714 *
1715 * @param pai The physical address index for the entry to update.
1716 */
1717 static inline void
ppattr_set_reffault(unsigned int pai)1718 ppattr_set_reffault(unsigned int pai)
1719 {
1720 ppattr_set_bits(pai, PP_ATTR_REFFAULT);
1721 }
1722
1723 /**
1724 * Clear the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1725 *
1726 * @param pai The physical address index for the entry to update.
1727 */
1728 static inline void
ppattr_clear_reffault(unsigned int pai)1729 ppattr_clear_reffault(unsigned int pai)
1730 {
1731 ppattr_clear_bits(pai, PP_ATTR_REFFAULT);
1732 }
1733
1734 /**
1735 * Return true if the pp_attr_table entry has the PP_ATTR_REFFAULT flag set.
1736 *
1737 * @param pai The physical address index for the entry to test.
1738 */
1739 static inline bool
ppattr_test_reffault(unsigned int pai)1740 ppattr_test_reffault(unsigned int pai)
1741 {
1742 return ppattr_test_bits(pai, PP_ATTR_REFFAULT);
1743 }
1744
1745 /**
1746 * Set the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1747 *
1748 * @param pai The physical address index for the entry to update.
1749 */
1750 static inline void
ppattr_set_modfault(unsigned int pai)1751 ppattr_set_modfault(unsigned int pai)
1752 {
1753 ppattr_set_bits(pai, PP_ATTR_MODFAULT);
1754 }
1755
1756 /**
1757 * Clear the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1758 *
1759 * @param pai The physical address index for the entry to update.
1760 */
1761 static inline void
ppattr_clear_modfault(unsigned int pai)1762 ppattr_clear_modfault(unsigned int pai)
1763 {
1764 ppattr_clear_bits(pai, PP_ATTR_MODFAULT);
1765 }
1766
1767 /**
1768 * Return true if the pp_attr_table entry has the PP_ATTR_MODFAULT flag set.
1769 *
1770 * @param pai The physical address index for the entry to test.
1771 */
1772 static inline bool
ppattr_test_modfault(unsigned int pai)1773 ppattr_test_modfault(unsigned int pai)
1774 {
1775 return ppattr_test_bits(pai, PP_ATTR_MODFAULT);
1776 }
1777
1778 static inline boolean_t
pmap_is_preemptible(void)1779 pmap_is_preemptible(void)
1780 {
1781 return preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT);
1782 }
1783
1784 /**
1785 * This helper function ensures that potentially-long-running batched PPL operations are
1786 * called in preemptible context before entering the PPL, so that the PPL call may
1787 * periodically exit to allow pending urgent ASTs to be taken.
1788 */
1789 static inline void
pmap_verify_preemptible(void)1790 pmap_verify_preemptible(void)
1791 {
1792 assert(pmap_is_preemptible());
1793 }
1794
1795 /**
1796 * The minimum number of pages to keep in the PPL page free list.
1797 *
1798 * We define our target as 8 pages: enough for 2 page table pages, a PTD page,
1799 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1800 * a single pmap_enter request.
1801 */
1802 #define PMAP_MIN_FREE_PPL_PAGES 8
1803
1804 /**
1805 * Flags passed to various page allocation functions, usually accessed through
1806 * the pmap_pages_alloc_zeroed() API. Each function that can take these flags as
1807 * a part of its option field, will describe these flags in its function header.
1808 */
1809
1810 /**
1811 * Instruct the allocation function to return immediately if no pages are
1812 * current available. Without this flag, the function will spin and wait for a
1813 * page to become available. This flag can be required in some circumstances
1814 * (for instance, when allocating pages from within the PPL).
1815 */
1816 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1817
1818 /**
1819 * Instructs an allocation function to fallback to reclaiming a userspace page
1820 * table if it failed to allocate a page from the free lists. This can be useful
1821 * when allocating from within the PPL because refilling the free lists requires
1822 * exiting and re-entering the PPL (which incurs extra latency).
1823 *
1824 * This is a quick way of allocating a page at the expense of having to
1825 * reallocate the table the next time one of its mappings is accessed.
1826 */
1827 #define PMAP_PAGE_RECLAIM_NOWAIT 0x2
1828
1829 /**
1830 * Global variables exported to the rest of the internal pmap implementation.
1831 */
1832 #if XNU_MONITOR
1833 extern uint64_t pmap_ppl_free_page_count;
1834 extern pmap_paddr_t pmap_stacks_start_pa;
1835 extern pmap_paddr_t pmap_stacks_end_pa;
1836 extern pmap_paddr_t ppl_cpu_save_area_start;
1837 extern pmap_paddr_t ppl_cpu_save_area_end;
1838 #endif /* XNU_MONITOR */
1839 extern unsigned int inuse_pmap_pages_count;
1840 extern vm_object_t pmap_object;
1841 extern uint32_t pv_alloc_initial_target;
1842 extern uint32_t pv_kern_alloc_initial_target;
1843
1844 /**
1845 * Functions exported to the rest of the internal pmap implementation.
1846 */
1847 extern void pmap_data_bootstrap(void);
1848 extern void pmap_enqueue_pages(vm_page_t);
1849 extern kern_return_t pmap_pages_alloc_zeroed(pmap_paddr_t *, unsigned, unsigned);
1850 extern void pmap_pages_free(pmap_paddr_t, unsigned);
1851
1852 #if XNU_MONITOR
1853
1854 extern void pmap_mark_page_as_ppl_page_internal(pmap_paddr_t, bool);
1855 extern void pmap_mark_page_as_ppl_page(pmap_paddr_t);
1856 extern void pmap_mark_page_as_kernel_page(pmap_paddr_t);
1857 extern pmap_paddr_t pmap_alloc_page_for_kern(unsigned int);
1858 extern void pmap_alloc_page_for_ppl(unsigned int);
1859 extern uint64_t pmap_release_ppl_pages_to_kernel(void);
1860
1861 extern uint64_t pmap_ledger_validate(const volatile void *);
1862 void pmap_ledger_retain(ledger_t ledger);
1863 void pmap_ledger_release(ledger_t ledger);
1864 extern void pmap_ledger_check_balance(pmap_t pmap);
1865
1866 kern_return_t pmap_alloc_pmap(pmap_t *pmap);
1867 void pmap_free_pmap(pmap_t pmap);
1868
1869 #endif /* XNU_MONITOR */
1870
1871 /**
1872 * The modes in which a pmap lock can be acquired. Note that shared access
1873 * doesn't necessarily mean "read-only". As long as data is atomically updated
1874 * correctly (to account for multi-cpu accesses) data can still get written with
1875 * a shared lock held. Care just needs to be taken so as to not introduce any
1876 * race conditions when there are multiple writers.
1877 *
1878 * This is here in pmap_data.h because it's a needed parameter for pv_alloc()
1879 * and pmap_enter_pv(). This header is always included in pmap_internal.h before
1880 * the rest of the pmap locking code is defined so there shouldn't be any issues
1881 * with missing types.
1882 */
1883 OS_ENUM(pmap_lock_mode, uint8_t,
1884 PMAP_LOCK_SHARED,
1885 PMAP_LOCK_EXCLUSIVE);
1886
1887 /**
1888 * Possible return values for pv_alloc(). See the pv_alloc() function header for
1889 * a description of each of these values.
1890 */
1891 typedef enum {
1892 PV_ALLOC_SUCCESS,
1893 PV_ALLOC_RETRY,
1894 PV_ALLOC_FAIL
1895 } pv_alloc_return_t;
1896
1897 extern pv_alloc_return_t pv_alloc(
1898 pmap_t, unsigned int, pmap_lock_mode_t, unsigned int, pv_entry_t **);
1899 extern void pv_free(pv_entry_t *);
1900 extern void pv_list_free(pv_entry_t *, pv_entry_t *, int);
1901 extern void pmap_compute_pv_targets(void);
1902 extern pv_alloc_return_t pmap_enter_pv(
1903 pmap_t, pt_entry_t *, int, unsigned int, pmap_lock_mode_t, pv_entry_t **, int *new_pve_ptep_idx);
1904 extern void pmap_remove_pv(pmap_t, pt_entry_t *, int, bool, bool *, bool *);
1905
1906 extern void ptd_bootstrap(pt_desc_t *, unsigned int);
1907 extern pt_desc_t *ptd_alloc_unlinked(void);
1908 extern pt_desc_t *ptd_alloc(pmap_t);
1909 extern void ptd_deallocate(pt_desc_t *);
1910 extern void ptd_info_init(
1911 pt_desc_t *, pmap_t, vm_map_address_t, unsigned int, pt_entry_t *);
1912
1913 extern kern_return_t pmap_ledger_credit(pmap_t, int, ledger_amount_t);
1914 extern kern_return_t pmap_ledger_debit(pmap_t, int, ledger_amount_t);
1915
1916 extern void validate_pmap_internal(const volatile struct pmap *, const char *);
1917 extern void validate_pmap_mutable_internal(const volatile struct pmap *, const char *);
1918
1919 /**
1920 * Macro function wrappers around pmap validation so that the calling function
1921 * can be printed in the panic strings for easier validation failure debugging.
1922 */
1923 #define validate_pmap(x) validate_pmap_internal(x, __func__)
1924 #define validate_pmap_mutable(x) validate_pmap_mutable_internal(x, __func__)
1925
1926 /**
1927 * This structure describes a PPL-owned I/O range.
1928 *
1929 * @note This doesn't necessarily have to represent "I/O" only, this can also
1930 * represent non-kernel-managed DRAM (e.g., iBoot carveouts). Any physical
1931 * address region that isn't considered "kernel-managed" is fair game.
1932 *
1933 * @note The layout of this structure needs to map 1-to-1 with the pmap-io-range
1934 * device tree nodes. Astris (through the LowGlobals) also depends on the
1935 * consistency of this structure.
1936 */
1937 typedef struct pmap_io_range {
1938 /* Physical address of the PPL-owned I/O range. */
1939 uint64_t addr;
1940
1941 /**
1942 * Length (in bytes) of the PPL-owned I/O range. Has to be the size
1943 * of a page if the range will be refered to by pmap_io_filter_entries.
1944 */
1945 uint64_t len;
1946
1947 /* Strong DSB required for pages in this range. */
1948 #define PMAP_IO_RANGE_STRONG_SYNC (1U << 31)
1949
1950 /* Corresponds to memory carved out by bootloader. */
1951 #define PMAP_IO_RANGE_CARVEOUT (1U << 30)
1952
1953 /* Pages in this range need to be included in the hibernation image */
1954 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1U << 29)
1955
1956 /* Mark the range as 'owned' by a given subsystem */
1957 #define PMAP_IO_RANGE_OWNED (1U << 28)
1958
1959 /**
1960 * Denotes a range that is *not* to be treated as an I/O range that
1961 * needs to be mapped, but only to decorate arbitrary physical
1962 * memory ranges (including of managed memory) with extra
1963 * flags. I.e. this allows tagging of "ordinary" managed memory
1964 * pages with flags like `PMAP_IO_RANGE_PROHIBIT_HIB_WRITE`, or
1965 * informing the SPTM that some (nominally) managed memory pages are
1966 * unavailable for some reason.
1967 *
1968 * Notably, `pmap_find_io_attr()`, and anything else that uses
1969 * `pmap_io_range`s for denoting to-be-mapped I/O ranges, ignores
1970 * entries with this flag.
1971 */
1972 #define PMAP_IO_RANGE_NOT_IO (1U << 27)
1973
1974 /* Pages in this range may never be written during hibernation restore. */
1975 #define PMAP_IO_RANGE_PROHIBIT_HIB_WRITE (1U << 26)
1976
1977 /**
1978 * Lower 16 bits treated as pp_attr_t, upper 16 bits contain additional
1979 * mapping flags (defined above).
1980 */
1981 uint32_t wimg;
1982
1983 /**
1984 * 4 Character Code (4CC) describing what this range is.
1985 *
1986 * This has to be unique for each "type" of pages, meaning pages sharing
1987 * the same register layout, if it is used for the I/O filter descriptors
1988 * below. Otherwise it doesn't matter.
1989 */
1990 uint32_t signature;
1991 } pmap_io_range_t;
1992
1993 /* Reminder: be sure to change all relevant device trees if you change the layout of pmap_io_range_t */
1994 _Static_assert(sizeof(pmap_io_range_t) == 24, "unexpected size for pmap_io_range_t");
1995
1996 extern pmap_io_range_t* pmap_find_io_attr(pmap_paddr_t);
1997
1998 /**
1999 * This structure describes a sub-page-size I/O region owned by PPL but the kernel can write to.
2000 *
2001 * @note I/O filter software will use a collection of such data structures to determine access
2002 * permissions to a page owned by PPL.
2003 *
2004 * @note The {signature, offset} key is used to index a collection of such data structures to
2005 * optimize for space in the case where one page layout is repeated for many devices, such
2006 * as the memory controller channels.
2007 */
2008 typedef struct pmap_io_filter_entry {
2009 /* 4 Character Code (4CC) describing what this range (page) is. */
2010 uint32_t signature;
2011
2012 /* Offset within the page. It has to be within [0, PAGE_SIZE). */
2013 uint16_t offset;
2014
2015 /* Length of the range, and (offset + length) has to be within [0, PAGE_SIZE). */
2016 uint16_t length;
2017 } pmap_io_filter_entry_t;
2018
2019 _Static_assert(sizeof(pmap_io_filter_entry_t) == 8, "unexpected size for pmap_io_filter_entry_t");
2020
2021 extern pmap_io_filter_entry_t *pmap_find_io_filter_entry(pmap_paddr_t, uint64_t, const pmap_io_range_t **);
2022
2023 extern void pmap_cpu_data_init_internal(unsigned int);
2024
2025 /**
2026 * Flush a single 16K page from noncoherent coprocessor caches.
2027 *
2028 * @note Nonocoherent cache flushes are only guaranteed to work if the participating coprocessor(s)
2029 * do not have any active VA translations for the page being flushed. Since coprocessor
2030 * mappings should always be controlled by some PPL IOMMU extension, they should always
2031 * have PV list entries. This flush should therefore be performed at a point when the PV
2032 * list is known to be either empty or at least to not contain any IOMMU entries. For
2033 * the purposes of our security model, it is sufficient to wait for the PV list to become
2034 * empty, as we really want to protect PPL-sensitive pages from malicious/accidental
2035 * coprocessor cacheline evictions, and the PV list must be empty before a page can be
2036 * handed to the PPL.
2037 *
2038 * @param paddr The base physical address of the page to flush.
2039 */
2040 extern void pmap_flush_noncoherent_page(pmap_paddr_t paddr);
2041
2042 #if DEBUG || DEVELOPMENT
2043 extern unsigned int pmap_wcrt_on_non_dram_count_get(void);
2044 extern void pmap_wcrt_on_non_dram_count_increment_atomic(void);
2045 #endif /* DEBUG || DEVELOPMENT */
2046 #endif /* _ARM_PMAP_PMAP_DATA_H_ */
2047