1 /*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /**
29 * This header file is used to store the types, prototypes, and inline functions
30 * that define some of the most important data structures used in the pmap. This
31 * header is only meant for sharing types within the pmap; if a type is meant to
32 * be used by the rest of the kernel, then put it into osfmk/arm/pmap.h.
33 */
34 #ifndef _ARM_PMAP_PMAP_DATA_H_
35 #define _ARM_PMAP_PMAP_DATA_H_
36
37 #include <stdint.h>
38
39 #include <kern/ledger.h>
40 #include <mach/vm_types.h>
41 #include <mach_assert.h>
42 #include <vm/vm_page.h>
43
44 #include <arm/cpu_data.h>
45 #include <arm/machine_routines.h>
46 #include <arm64/proc_reg.h>
47
48 /* Temporary include before moving all ledger functions into pmap_data.c */
49 #include <os/refcnt.h>
50
51 /**
52 * These headers are safe to be included in this file since they shouldn't rely
53 * on any of the internal pmap header files (so no circular dependencies).
54 */
55 #include <arm/pmap.h>
56 #include <arm/pmap/pmap_pt_geometry.h>
57
58 /**
59 * These values represent the first and last kernel-managed physical addresses.
60 * We keep track of extra metadata on kernel-managed pages compared to other
61 * pages (usually iBoot carved out memory or I/O).
62 */
63 extern pmap_paddr_t vm_first_phys, vm_last_phys;
64
65 /**
66 * Return whether the given address represents a kernel-managed physical page.
67 *
68 * Whether a page is considered "kernel-managed" is determined by the BootArgs
69 * passed by the bootloader. Typically memory carved out by the bootloader as
70 * well as I/O memory should return false.
71 *
72 * @param pa The physical address to check.
73 */
74 static inline bool
pa_valid(pmap_paddr_t pa)75 pa_valid(pmap_paddr_t pa)
76 {
77 return (pa >= vm_first_phys) && (pa < vm_last_phys);
78 }
79
80 /**
81 * The pmap has a variety of data structures (pv_head_table/pp_attr_table) that
82 * contain an entry for every kernel-managed page in the system. These systems
83 * are indexed with physical address indices ("pai") generated by this function.
84 *
85 * The logic is simple since there should be one entry in each of these data
86 * structures for each kernel-managed physical page in the system. These data
87 * structures are allocated on boot based on the amount of memory available.
88 *
89 * @note PAIs are defined using the VM page size, which might not be identical
90 * to the underlying hardware page size for an arbitrary address space.
91 * This means that the data structures relying on PAIs will contain one
92 * entry for each VM page, not hardware page.
93 *
94 * @note This function is only valid for physical addresses that are
95 * kernel-managed.
96 */
97
98 static inline unsigned int
pa_index(pmap_paddr_t pa)99 pa_index(pmap_paddr_t pa)
100 {
101 return (unsigned int)atop(pa - vm_first_phys);
102 }
103
104 /* See the definition of pv_head_table for more information. */
105 extern pv_entry_t **pv_head_table;
106
107 /* Represents a NULL entry in the pv_head_table. */
108 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
109
110 /**
111 * Given a physical address index, return the corresponding pv_head_table entry.
112 *
113 * @note Despite returning a pointer to a pv_entry_t pointer, the entry might
114 * actually be a different type of pointer (pt_entry_t or pt_desc_t)
115 * depending on the type for this entry. Determine the type using
116 * pvh_test_type().
117 *
118 * @param pai The index returned by pa_index() for the page whose pv_head_table
119 * entry should be retrieved.
120 */
121 static inline pv_entry_t **
pai_to_pvh(unsigned int pai)122 pai_to_pvh(unsigned int pai)
123 {
124 return &pv_head_table[pai];
125 }
126
127 /**
128 * Each pv_head_table entry can be one of four different types:
129 *
130 * - PVH_TYPE_NULL: No mappings to the physical page exist outside of the
131 * physical aperture. Physical aperture mappings are not
132 * tracked in the pv_head_table.
133 *
134 * - PVH_TYPE_PVEP: There are multiple mappings to the physical page.
135 * These entries are linked lists of pv_entry_t objects (which
136 * each contain a pointer to the associated PTE and a pointer
137 * to the next entry in the list).
138 *
139 * - PVH_TYPE_PTEP: There is a single mapping to the physical page. Once more
140 * mappings are created, this entry will get upgraded to an
141 * entry of type PVH_TYPE_PVEP. These entries are pointers
142 * directly to the page table entry that contain the mapping
143 * (pt_entry_t*).
144 *
145 * - PVH_TYPE_PTDP: The physical page is being used as a page table. These
146 * entries are pointers to page table descriptor structures
147 * (pt_desc_t) which contain metadata related to each page
148 * table.
149 *
150 * The type is stored in the bottom two bits of each pv_head_table entry. That
151 * type needs to be checked before dereferencing the pointer to determine which
152 * pointer type to dereference as.
153 */
154 #define PVH_TYPE_NULL 0x0UL
155 #define PVH_TYPE_PVEP 0x1UL
156 #define PVH_TYPE_PTEP 0x2UL
157 #define PVH_TYPE_PTDP 0x3UL
158
159 #define PVH_TYPE_MASK (0x3UL)
160
161 #if defined(__arm64__)
162
163 /**
164 * PV_HEAD_TABLE Flags.
165 *
166 * All flags listed below are stored in the pv_head_table entry/pointer
167 * (per-physical-page) unless otherwise noted.
168 *
169 * Please update the pv_walk LLDB macro if these flags are changed or added to.
170 */
171
172 /**
173 * This flag is set for every mapping created by an IOMMU.
174 *
175 * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
176 * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
177 */
178 #define PVH_FLAG_IOMMU 0x4UL
179
180 /**
181 * This flag is only valid when PVH_FLAG_IOMMU is set. For an IOMMU mapping, if
182 * this bit is set, then the PTE pointer points directly into the IOMMU page
183 * table for this mapping. If this bit is cleared, then the "PTE pointer" is
184 * actually a pointer to the IOMMU descriptor object that owns this mapping.
185 *
186 * There are cases where it's not easy to tie an IOMMU mapping directly to a
187 * specific page table, so this allows us to at least get a pointer to which
188 * IOMMU created this mapping which is useful for debugging purposes.
189 *
190 * Stored in each PTE pointer (for PVH_TYPE_PVEP lists), or in the pv_head_table
191 * entry/pointer for single-PTE entries (PVH_TYPE_PTEP).
192 */
193 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
194
195 /**
196 * This flag is set when the first CPU (non-IOMMU) mapping is created. This is
197 * important to keep track of because various accounting statistics are based on
198 * the options specified for the first CPU mapping. This flag, and thus the
199 * accounting statistics, will persist as long as there *any* mappings of the
200 * page (including IOMMU mappings). This works because the accounting for a page
201 * should not need to change until the page is recycled by the VM layer, and we
202 * double-check that there are no mappings (CPU or IOMMU) when a page is
203 * recycled (see: pmap_verify_free()).
204 */
205 #define PVH_FLAG_CPU (1ULL << 62)
206
207 /* This bit is used as a lock when modifying a pv_head_table entry. */
208 #define PVH_LOCK_BIT 61
209 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
210
211 /**
212 * This flag is set when there are any executable mappings to this physical
213 * page. This is used to prevent any writable mappings from being created at
214 * the same time an executable mapping exists.
215 */
216 #define PVH_FLAG_EXEC (1ULL << 60)
217
218 /**
219 * Marking a pv_head_table entry with this flag denotes that this page is a
220 * kernelcache text or data page that shouldn't have dynamically-created
221 * mappings. See PVH_FLAG_LOCKDOWN_MASK for more details.
222 */
223 #define PVH_FLAG_LOCKDOWN_KC (1ULL << 59)
224
225 /**
226 * This flag is used to mark that a page has been hashed into the hibernation
227 * image.
228 *
229 * The hibernation driver will use this to ensure that all PPL-owned memory is
230 * correctly included into the hibernation image (a missing PPL page could be
231 * a security concern when coming out of hibernation).
232 */
233 #define PVH_FLAG_HASHED (1ULL << 58)
234
235 /**
236 * Marking a pv_head_table entry with this flag denotes that this page is a
237 * code signature page that shouldn't have dynamically-created mappings.
238 * See PVH_FLAG_LOCKDOWN_MASK for more details.
239 */
240 #define PVH_FLAG_LOCKDOWN_CS (1ULL << 57)
241
242 /**
243 * Marking a pv_head_table entry with this flag denotes that this page is a
244 * read-only allocator page that shouldn't have dynamically-created mappings.
245 * See PVH_FLAG_LOCKDOWN_MASK for more details.
246 */
247 #define PVH_FLAG_LOCKDOWN_RO (1ULL << 56)
248
249 #define PVH_FLAG_RETIRED 0
250
251 #define PVH_FLAG_TAGS 0
252 #define PVH_FLAG_TAGGGED 0
253
254 /**
255 * Flags which disallow a new mapping to a page.
256 */
257 #define PVH_FLAG_NOMAP_MASK (PVH_FLAG_RETIRED | PVH_FLAG_TAGS)
258
259 /**
260 * Marking a pv_head_table entry with this flag denotes that this page has
261 * been mapped into a non-coherent coprocessor address space and requires a
262 * cache flush operation once all mappings have been removed.
263 */
264 #define PVH_FLAG_FLUSH_NEEDED (1ULL << 52)
265
266 /**
267 * Marking a pv_head_table entry with any bit in this mask denotes that this page
268 * has been locked down by the PPL. Locked down pages can't have new mappings
269 * created or existing mappings removed, and all existing mappings will have been
270 * converted to read-only. This essentially makes the page immutable.
271 */
272 #define PVH_FLAG_LOCKDOWN_MASK (PVH_FLAG_LOCKDOWN_KC | PVH_FLAG_LOCKDOWN_CS | PVH_FLAG_LOCKDOWN_RO)
273
274
275 /**
276 * These bits need to be set to safely dereference a pv_head_table
277 * entry/pointer.
278 *
279 * Any change to this #define should also update the copy located in the pmap.py
280 * LLDB macros file.
281 */
282
283 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN_MASK | \
284 PVH_FLAG_HASHED | PVH_FLAG_FLUSH_NEEDED | PVH_FLAG_RETIRED)
285
286
287 #endif /* defined(__arm64__) */
288
289 /* Mask used to clear out the TYPE bits from a pv_head_table entry/pointer. */
290 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
291
292 /* Which 32-bit word in each pv_head_table entry/pointer contains the LOCK bit. */
293 #if defined(__arm64__)
294 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
295 #endif /* defined(__arm64__) */
296
297 /**
298 * Assert that a pv_head_table entry is locked. Will panic if the lock isn't
299 * acquired.
300 *
301 * @param index The physical address index to check.
302 */
303 static inline void
pvh_assert_locked(__assert_only unsigned int index)304 pvh_assert_locked(__assert_only unsigned int index)
305 {
306 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK);
307 }
308
309
310 /**
311 * Lock a pv_head_table entry.
312 *
313 * @param index The physical address index of the pv_head_table entry to lock.
314 */
315 static inline void
pvh_lock(unsigned int index)316 pvh_lock(unsigned int index)
317 {
318 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
319 PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
320 }
321
322 /**
323 * Unlock a pv_head_table entry.
324 *
325 * @param index The physical address index of the pv_head_table entry to unlock.
326 */
327 static inline void
pvh_unlock(unsigned int index)328 pvh_unlock(unsigned int index)
329 {
330 pvh_assert_locked(index);
331
332 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD,
333 PVH_LOCK_BIT - (PVH_LOCK_WORD * 32));
334 }
335
336 /**
337 * Check that a pv_head_table entry/pointer is a specific type.
338 *
339 * @param pvh The pv_head_table entry/pointer to check.
340 * @param type The type to check for.
341 *
342 * @return True if the pv_head_table entry is of the passed in type, false
343 * otherwise.
344 */
345 static inline bool
pvh_test_type(pv_entry_t ** pvh,vm_offset_t type)346 pvh_test_type(pv_entry_t **pvh, vm_offset_t type)
347 {
348 return ((*(vm_offset_t *)pvh) & PVH_TYPE_MASK) == type;
349 }
350
351 /**
352 * Convert a pv_head_table entry/pointer into a page table entry pointer. This
353 * should only be done if the type of this entry is PVH_TYPE_PTEP.
354 *
355 * @param pvh The pv_head_table entry/pointer to convert into a pt_entry_t*.
356 *
357 * @return Return back a safe to derefence pointer to the single mapping of this
358 * physical page by masking off the TYPE bits and adding any missing
359 * flags to the upper portion of the pointer.
360 */
361 static inline pt_entry_t*
pvh_ptep(pv_entry_t ** pvh)362 pvh_ptep(pv_entry_t **pvh)
363 {
364 return (pt_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
365 }
366
367 /**
368 * Convert a pv_head_table entry/pointer into a PVE list pointer. This
369 * should only be done if the type of this entry is PVH_TYPE_PVEP.
370 *
371 * @param pvh The pv_head_table entry/pointer to convert into a safe to
372 * dereference pv_entry_t*.
373 *
374 * @return Return back a safe to derefence pointer to the first mapping of this
375 * physical page by masking off the TYPE bits and adding any missing
376 * flags to the upper portion of the pointer.
377 */
378 static inline pv_entry_t*
pvh_pve_list(pv_entry_t ** pvh)379 pvh_pve_list(pv_entry_t **pvh)
380 {
381 return (pv_entry_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
382 }
383
384 /**
385 * Return the flags associated with a pv_head_table entry/pointer.
386 *
387 * @param pvh The pv_head_table entry whose flags to get.
388 */
389 static inline vm_offset_t
pvh_get_flags(pv_entry_t ** pvh)390 pvh_get_flags(pv_entry_t **pvh)
391 {
392 return (*(vm_offset_t *)pvh) & PVH_HIGH_FLAGS;
393 }
394
395 /**
396 * Atomically set the flags associated with a pv_head_table entry/pointer.
397 *
398 * @param pvh The pv_head_table entry whose flags are getting set.
399 */
400 static inline void
pvh_set_flags(pv_entry_t ** pvh,vm_offset_t flags)401 pvh_set_flags(pv_entry_t **pvh, vm_offset_t flags)
402 {
403 os_atomic_store((vm_offset_t *)pvh, ((*(vm_offset_t *)pvh) & ~PVH_HIGH_FLAGS) | flags, relaxed);
404 }
405
406 /**
407 * Update a pv_head_table entry/pointer to be a different type and/or point to
408 * a different object.
409 *
410 * @note The pv_head_table entry MUST already be locked.
411 *
412 * @note This function will clobber any existing flags stored in the PVH pointer
413 * (except PVH_FLAG_LOCK). It's up to the caller to preserve flags if that
414 * functionality is needed (either by ensuring `pvep` contains those
415 * flags, or by manually setting the flags after this call).
416 *
417 * @param pvh The pv_head_table entry/pointer to update.
418 * @param pvep The new entry to use. This could be either a pt_entry_t*,
419 * pv_entry_t*, or pt_desc_t* depending on the type.
420 * @param type The type of the new entry.
421 */
422 static inline void
pvh_update_head(pv_entry_t ** pvh,void * pvep,unsigned int type)423 pvh_update_head(pv_entry_t **pvh, void *pvep, unsigned int type)
424 {
425 assert((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK);
426 os_atomic_store((vm_offset_t *)pvh, (vm_offset_t)pvep | type | PVH_FLAG_LOCK, relaxed);
427 }
428
429 /**
430 * Update a pv_head_table entry/pointer to be a different type and/or point to
431 * a different object.
432 *
433 * @note The pv_head_table entry CAN'T already be locked.
434 *
435 * @note This function will clobber any existing flags stored in the PVH
436 * pointer. It's up to the caller to preserve flags if that functionality
437 * is needed (either by ensuring `pvep` contains those flags, or by
438 * manually setting the flags after this call).
439 *
440 * @param pvh The pv_head_table entry/pointer to update.
441 * @param pvep The new entry to use. This could be either a pt_entry_t*,
442 * pv_entry_t*, or pt_desc_t* depending on the type.
443 * @param type The type of the new entry.
444 */
445 static inline void
pvh_update_head_unlocked(pv_entry_t ** pvh,void * pvep,unsigned int type)446 pvh_update_head_unlocked(pv_entry_t **pvh, void *pvep, unsigned int type)
447 {
448 assert(!((*(vm_offset_t *)pvh) & PVH_FLAG_LOCK));
449 *(vm_offset_t *)pvh = ((vm_offset_t)pvep | type) & ~PVH_FLAG_LOCK;
450 }
451
452 /**
453 * Given a page table entry pointer retrieved from the pv_head_table (from an
454 * entry of type PVH_TYPE_PTEP or PVH_TYPE_PVEP), return back whether the PTE is
455 * an IOMMU mapping.
456 *
457 * @note The way this function determines whether the passed in pointer is
458 * pointing to an IOMMU PTE, is by checking for a special flag stored in
459 * the lower bits of the pointer. This flag is only set on pointers stored
460 * in the pv_head_table, and as such, this function will only work on
461 * pointers retrieved from the pv_head_table. If a pointer to a PTE was
462 * directly retrieved from an IOMMU's page tables, this function would
463 * always return false despite actually being an IOMMU PTE.
464 *
465 * @param ptep A PTE pointer obtained from the pv_head_table to check.
466 *
467 * @return True if the entry is an IOMMU mapping, false otherwise.
468 */
469 static inline bool
pvh_ptep_is_iommu(const pt_entry_t * ptep)470 pvh_ptep_is_iommu(const pt_entry_t *ptep)
471 {
472 #ifdef PVH_FLAG_IOMMU
473 return (vm_offset_t)ptep & PVH_FLAG_IOMMU;
474 #else /* PVH_FLAG_IOMMU */
475 #pragma unused(ptep)
476 return false;
477 #endif /* PVH_FLAG_IOMMU */
478 }
479
480 /**
481 * Sometimes the PTE pointers retrieved from the pv_head_table (from an entry of
482 * type PVH_TYPE_PTEP or PVH_TYPE_PVEP) contain flags themselves. This function
483 * strips out those flags and returns back a dereferencable pointer.
484 *
485 * @param ptep The PTE pointer to strip out the unwanted flags.
486 *
487 * @return A valid dereferencable pointer to the page table entry.
488 */
489 static inline const pt_entry_t*
pvh_strip_ptep(const pt_entry_t * ptep)490 pvh_strip_ptep(const pt_entry_t *ptep)
491 {
492 #ifdef PVH_FLAG_IOMMU
493 const vm_offset_t pte_va = (vm_offset_t)ptep;
494 return (const pt_entry_t*)((pte_va & ~PVH_FLAG_IOMMU) | PVH_FLAG_IOMMU_TABLE);
495 #else /* PVH_FLAG_IOMMU */
496 return ptep;
497 #endif /* PVH_FLAG_IOMMU */
498 }
499
500 /**
501 * PVH_TYPE_PVEP Helper Functions.
502 *
503 * The following are methods used to manipulate PVE lists. This is the type of
504 * pv_head_table entry used when there are multiple mappings to a single
505 * physical page.
506 */
507
508 /**
509 * Whether a physical page is using "alternate accounting" (ALTACCT) for its
510 * ledger statistics is something that needs to be tracked on a per-mapping
511 * basis, not on a per-physical-page basis. Because of that, it's tracked
512 * differently depending on whether there's a single mapping to a page
513 * (PVH_TYPE_PTEP) or multiple (PVH_TYPE_PVEP). For single mappings, the bit is
514 * tracked in the pp_attr_table. But when there are multiple mappings, the least
515 * significant bit of the corresponding "pve_pte" pointer in each pv_entry object
516 * is used as a marker for pages using alternate accounting.
517 *
518 * @note See the definition for PP_ATTR_ALTACCT for a more detailed description
519 * of what "alternate accounting" actually means in respect to the
520 * footprint ledger.
521 *
522 * Since some code (KernelDiskImages, e.g.) might map a phsyical page as
523 * "device" memory (i.e. external) while it's also being used as regular
524 * "anonymous" memory (i.e. internal) in user space, we have to manage the
525 * "internal" attribute per mapping rather than per physical page.
526 * When there are multiple mappings, we use the next least significant bit of
527 * the corresponding "pve_pte" pointer for that.
528 */
529 #define PVE_PTEP_ALTACCT ((uintptr_t) 0x1)
530 #define PVE_PTEP_INTERNAL ((uintptr_t) 0x2)
531 #define PVE_PTEP_FLAGS (PVE_PTEP_ALTACCT | PVE_PTEP_INTERNAL)
532
533 /**
534 * Set the ALTACCT bit for a specific PTE pointer.
535 *
536 * @param pvep A pointer to the current pv_entry mapping in the linked list of
537 * mappings.
538 * @param idx Index of the chosen PTE pointer inside the PVE.
539 */
540 static inline void
pve_set_altacct(pv_entry_t * pvep,unsigned idx)541 pve_set_altacct(pv_entry_t *pvep, unsigned idx)
542 {
543 assert(idx < PTE_PER_PVE);
544 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_ALTACCT);
545 }
546 /**
547 * Set the INTERNAL bit for a specific PTE pointer.
548 *
549 * @param pvep A pointer to the current pv_entry mapping in the linked list of
550 * mappings.
551 * @param idx Index of the chosen PTE pointer inside the PVE.
552 */
553 static inline void
pve_set_internal(pv_entry_t * pvep,unsigned idx)554 pve_set_internal(pv_entry_t *pvep, unsigned idx)
555 {
556 assert(idx < PTE_PER_PVE);
557 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] | PVE_PTEP_INTERNAL);
558 }
559
560 /**
561 * Clear the ALTACCT bit for a specific PTE pointer.
562 *
563 * @param pvep A pointer to the current pv_entry mapping in the linked list of
564 * mappings.
565 * @param idx Index of the chosen PTE pointer inside the PVE.
566 */
567 static inline void
pve_clr_altacct(pv_entry_t * pvep,unsigned idx)568 pve_clr_altacct(pv_entry_t *pvep, unsigned idx)
569 {
570 assert(idx < PTE_PER_PVE);
571 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_ALTACCT);
572 }
573 /**
574 * Clear the INTERNAL bit for a specific PTE pointer.
575 *
576 * @param pvep A pointer to the current pv_entry mapping in the linked list of
577 * mappings.
578 * @param idx Index of the chosen PTE pointer inside the PVE.
579 */
580 static inline void
pve_clr_internal(pv_entry_t * pvep,unsigned idx)581 pve_clr_internal(pv_entry_t *pvep, unsigned idx)
582 {
583 assert(idx < PTE_PER_PVE);
584 pvep->pve_ptep[idx] = (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_INTERNAL);
585 }
586
587 /**
588 * Return the ALTACCT bit for a specific PTE pointer.
589 *
590 * @param pvep A pointer to the current pv_entry mapping in the linked list of
591 * mappings.
592 * @param idx Index of the chosen PTE pointer inside the PVE.
593 */
594 static inline bool
pve_get_altacct(pv_entry_t * pvep,unsigned idx)595 pve_get_altacct(pv_entry_t *pvep, unsigned idx)
596 {
597 assert(idx < PTE_PER_PVE);
598 return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_ALTACCT;
599 }
600 /**
601 * Return the INTERNAL bit for a specific PTE pointer.
602 *
603 * @param pvep A pointer to the current pv_entry mapping in the linked list of
604 * mappings.
605 * @param idx Index of the chosen PTE pointer inside the PVE.
606 */
607 static inline bool
pve_get_internal(pv_entry_t * pvep,unsigned idx)608 pve_get_internal(pv_entry_t *pvep, unsigned idx)
609 {
610 assert(idx < PTE_PER_PVE);
611 return (uintptr_t)pvep->pve_ptep[idx] & PVE_PTEP_INTERNAL;
612 }
613
614 /**
615 * Return the next mapping (pv_entry) in a linked list of mappings. This applies
616 * to pv_head_table entries of type PVH_TYPE_PVEP.
617 *
618 * @param pvep A pointer to the current pv_entry mapping in the linked list of
619 * mappings.
620 *
621 * @return The next virtual mapping for a physical page, or PV_ENTRY_NULL if the
622 * end of the list has been reached.
623 */
624 static inline pv_entry_t *
pve_next(pv_entry_t * pvep)625 pve_next(pv_entry_t *pvep)
626 {
627 return pvep->pve_next;
628 }
629
630 /**
631 * Return a pointer to the pve_next field in a pv_entry. This value is used
632 * when adding and removing entries to a PVE list.
633 *
634 * @param pvep The pv_entry whose pve_next field is being accessed.
635 *
636 * @return Pointer to the pve_next field.
637 */
638 static inline pv_entry_t **
pve_next_ptr(pv_entry_t * pvep)639 pve_next_ptr(pv_entry_t *pvep)
640 {
641 return &pvep->pve_next;
642 }
643
644 /**
645 * Return a pointer to the page table entry for this mapping.
646 *
647 * @param pvep The pv_entry whose pve_ptep field is to be returned.
648 * @param idx Index of the chosen PTE pointer inside the PVE.
649 *
650 * @return Pointer to the page table entry.
651 */
652 static inline pt_entry_t *
pve_get_ptep(pv_entry_t * pvep,unsigned idx)653 pve_get_ptep(pv_entry_t *pvep, unsigned idx)
654 {
655 assert(idx < PTE_PER_PVE);
656 return (pt_entry_t *)((uintptr_t)pvep->pve_ptep[idx] & ~PVE_PTEP_FLAGS);
657 }
658
659 /**
660 * Update the page table entry for a specific physical to virtual mapping.
661 *
662 * @param pvep The pv_entry to update.
663 * @param idx Index of the chosen PTE pointer inside the PVE.
664 * @param ptep_new The new page table entry.
665 */
666 static inline void
pve_set_ptep(pv_entry_t * pvep,unsigned idx,pt_entry_t * ptep_new)667 pve_set_ptep(pv_entry_t *pvep, unsigned idx, pt_entry_t *ptep_new)
668 {
669 assert(idx < PTE_PER_PVE);
670 pvep->pve_ptep[idx] = ptep_new;
671 }
672
673 /**
674 * Initialize all fields in a PVE to NULL.
675 *
676 * @param pvep The pv_entry to initialize.
677 */
678 static inline void
pve_init(pv_entry_t * pvep)679 pve_init(pv_entry_t *pvep)
680 {
681 pvep->pve_next = PV_ENTRY_NULL;
682 for (int i = 0; i < PTE_PER_PVE; i++) {
683 pvep->pve_ptep[i] = PT_ENTRY_NULL;
684 }
685 }
686
687 /**
688 * Find PTE pointer in PVE and return its index.
689 *
690 * @param pvep The PVE to search.
691 * @param ptep PTE to search for.
692 *
693 * @return Index of the found entry, or -1 if no entry exists.
694 */
695 static inline int
pve_find_ptep_index(pv_entry_t * pvep,pt_entry_t * ptep)696 pve_find_ptep_index(pv_entry_t *pvep, pt_entry_t *ptep)
697 {
698 for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
699 if (pve_get_ptep(pvep, i) == ptep) {
700 return (int)i;
701 }
702 }
703
704 return -1;
705 }
706
707 /**
708 * Checks if no PTEs are currently associated with this PVE.
709 *
710 * @param pvep The PVE to search.
711 *
712 * @return True if no PTEs are currently associated with this PVE, or false.
713 */
714 static inline bool
pve_is_empty(pv_entry_t * pvep)715 pve_is_empty(pv_entry_t *pvep)
716 {
717 for (unsigned int i = 0; i < PTE_PER_PVE; i++) {
718 if (pve_get_ptep(pvep, i) != PT_ENTRY_NULL) {
719 return false;
720 }
721 }
722
723 return true;
724 }
725
726 /**
727 * Prepend a new pv_entry node to a PVE list.
728 *
729 * @note This function will clobber any existing flags stored in the PVH
730 * pointer. It's up to the caller to preserve flags if that functionality
731 * is needed (either by ensuring `pvep` contains those flags, or by
732 * manually setting the flags after this call).
733 *
734 * @param pvh The linked list of mappings to update.
735 * @param pvep The new mapping to add to the linked list.
736 */
737 static inline void
pve_add(pv_entry_t ** pvh,pv_entry_t * pvep)738 pve_add(pv_entry_t **pvh, pv_entry_t *pvep)
739 {
740 assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
741
742 pvep->pve_next = pvh_pve_list(pvh);
743 pvh_update_head(pvh, pvep, PVH_TYPE_PVEP);
744 }
745
746 /**
747 * Remove an entry from a PVE list of mappings.
748 *
749 * @note This function will clobber any existing flags stored in the PVH
750 * pointer. It's up to the caller to preserve flags if that functionality
751 * is needed.
752 *
753 * @param pvh The pv_head_table entry of the PVE list to remove a mapping from.
754 * This is the first entry in the list of pv_entry_t mappings.
755 * @param pvepp A pointer to the pv_entry_t* that's being removed. If this entry
756 * is the first in the linked list of mappings, then this should be
757 * identical to the pv_head_table entry. If the mapping isn't the
758 * first, then this is a pointer to the pve_next field in the
759 * previous mapping.
760 * @param pvep The entry that should be removed. Should be identical to a
761 * dereference of the pvepp parameter (unless it's the pv_head_table
762 * entry).
763 */
764 static inline void
pve_remove(pv_entry_t ** pvh,pv_entry_t ** pvepp,pv_entry_t * pvep)765 pve_remove(pv_entry_t **pvh, pv_entry_t **pvepp, pv_entry_t *pvep)
766 {
767 assert(pvh_test_type(pvh, PVH_TYPE_PVEP));
768
769 if (pvepp == pvh) {
770 if (pve_next(pvep) == PV_ENTRY_NULL) {
771 /* The last mapping to this page is being removed. */
772 pvh_update_head(pvh, PV_ENTRY_NULL, PVH_TYPE_NULL);
773 } else {
774 /**
775 * There are still mappings left, make the next one the new head of
776 * the list. This effectively removes the first entry from the list.
777 */
778 pvh_update_head(pvh, pve_next(pvep), PVH_TYPE_PVEP);
779 }
780 } else {
781 /**
782 * Move the previous entry's next field to the entry after the one being
783 * removed. This will clobber the ALTACCT and INTERNAL bits.
784 */
785 *pvepp = pve_next(pvep);
786 }
787 }
788
789 /**
790 * PVH_TYPE_PTDP Types and Helper Functions.
791 *
792 * The following are types and methods used to manipulate page table descriptor
793 * (PTD) objects. This is the type of pv_head_table entry used when a page is
794 * being used as a page table.
795 */
796
797 /**
798 * When the pmap layer allocates memory, it always does so in chunks of the VM
799 * page size (which are represented by the PAGE_SIZE/PAGE_SHIFT macros). The VM
800 * page size might not match up with the hardware page size for a given address
801 * space (this is especially true on systems that support more than one page
802 * size).
803 *
804 * The pv_head_table is allocated to have one entry per VM page, not hardware
805 * page (which can change depending on the address space). Because of that, a
806 * single VM-page-sized region (single pv_head_table entry) can potentially hold
807 * up to four page tables. Only one page table descriptor (PTD) is allocated per
808 * pv_head_table entry (per VM page), so on some systems, one PTD might have to
809 * keep track of up to four different page tables.
810 */
811
812 #if __ARM_MIXED_PAGE_SIZE__
813 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
814 #elif (ARM_PGSHIFT == 14)
815 #define PT_INDEX_MAX 1
816 #elif (ARM_PGSHIFT == 12)
817 #define PT_INDEX_MAX 4
818 #else
819 #error Unsupported ARM_PGSHIFT
820 #endif /* __ARM_MIXED_PAGE_SIZE__ || ARM_PGSHIFT == 14 || ARM_PGSHIFT == 12 */
821
822
823 /**
824 * Page table descriptor (PTD) info structure.
825 *
826 * Contains information about a page table. These pieces of data are separate
827 * from the PTD itself because in address spaces where the VM page size doesn't
828 * match the underlying hardware page size, one PTD could represent multiple
829 * page tables (and so will need multiple PTD info structures).
830 *
831 * These fields are also in their own struct so that they can be allocated
832 * separately from the associated pt_desc_t object. This allows us to allocate
833 * the counts in this structure in a way that ensures they don't fall within the
834 * same cache line as the main pt_desc_t object. This is important because the
835 * fields in this structure are atomically updated which could cause false
836 * sharing cache performance issues with the "va" field in pt_desc_t if all of
837 * the fields were within the same structure.
838 */
839 typedef struct {
840 /**
841 * Pre-defined sentinel values for ptd_info_t.refcnt. If these refcnt values
842 * change, make sure to update the showpte LLDB macro to reflect the
843 * changes.
844 */
845 #define PT_DESC_REFCOUNT 0x4000U
846 #define PT_DESC_IOMMU_GRANTED_REFCOUNT 0x8000U
847 #define PT_DESC_IOMMU_ACCEPTED_REFCOUNT 0x8001U
848
849 /*
850 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT.
851 * For leaf pagetables, should reflect the number of non-empty PTEs.
852 * For IOMMU pages, should always be either PT_DESC_IOMMU_GRANTED_REFCOUNT
853 * or PT_DESC_IOMMU_ACCEPTED_REFCOUNT.
854 */
855 unsigned short refcnt;
856
857 /*
858 * For non-leaf pagetables, should be 0.
859 * For leaf pagetables, should reflect the number of wired entries.
860 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU
861 * operations are implicitly wired).
862 */
863 unsigned short wiredcnt;
864 } ptd_info_t;
865
866 /**
867 * Page Table Descriptor (PTD).
868 *
869 * Provides a per-table data structure and a way of keeping track of all page
870 * tables in the system.
871 *
872 * This structure is also used as a convenient way of keeping track of IOMMU
873 * pages (which may or may not be used as page tables). In that case the "iommu"
874 * field will point to the owner of the page, ptd_info[0].refcnt will be
875 * PT_DESC_IOMMU_GRANTED_REFCOUNT or PT_DESC_IOMMU_ACCEPTED_REFCOUNT, and
876 * ptd_info[0].wiredcnt can be used as an arbitrary refcnt controlled by the
877 * IOMMU driver.
878 */
879 typedef struct pt_desc {
880 /**
881 * This queue chain provides a mechanism for keeping a list of pages
882 * being used as page tables. This is used to potentially reclaim userspace
883 * page tables as a fast way of "allocating" a page.
884 *
885 * Refer to osfmk/kern/queue.h for more information about queue chains.
886 */
887 queue_chain_t pt_page;
888
889 /* Each page table is either owned by a pmap or a specific IOMMU. */
890 union {
891 struct pmap *pmap;
892 };
893
894 /**
895 * The following fields contain per-page-table properties, and as such,
896 * might have multiple elements each. This is due to a single PTD
897 * potentially representing multiple page tables (in address spaces where
898 * the VM page size differs from the hardware page size). Use the
899 * ptd_get_index() function to get the correct index for a specific page
900 * table.
901 */
902
903 /**
904 * The first address of the virtual address space this page table is
905 * translating for, or a value set by an IOMMU driver if this PTD is being
906 * used to track an IOMMU page.
907 */
908 vm_offset_t va[PT_INDEX_MAX];
909
910 /**
911 * ptd_info_t's are allocated separately so as to reduce false sharing
912 * with the va field. This is desirable because ptd_info_t's are updated
913 * atomically from all CPUs.
914 */
915 ptd_info_t *ptd_info;
916 } pt_desc_t;
917
918 /**
919 * Convert a pv_head_table entry/pointer into a page table descriptor pointer.
920 * This should only be done if the type of this entry is PVH_TYPE_PTDP.
921 *
922 * @param pvh The pv_head_table entry/pointer to convert into a safe to
923 * dereference pt_desc_t*.
924 *
925 * @return Return back a safe to derefence pointer to the page table descriptor
926 * for this physical page by masking off the TYPE bits and adding any
927 * missing flags to the upper portion of the pointer.
928 */
929 static inline pt_desc_t*
pvh_ptd(pv_entry_t ** pvh)930 pvh_ptd(pv_entry_t **pvh)
931 {
932 return (pt_desc_t *)(((*(vm_offset_t *)pvh) & PVH_LIST_MASK) | PVH_HIGH_FLAGS);
933 }
934
935 /**
936 * Given an arbitrary page table entry, return back the page table descriptor
937 * (PTD) object for the page table that contains that entry.
938 *
939 * @param ptep Pointer to a PTE whose page table descriptor object to return.
940 *
941 * @return The PTD object for the passed in page table.
942 */
943 static inline pt_desc_t *
ptep_get_ptd(const pt_entry_t * ptep)944 ptep_get_ptd(const pt_entry_t *ptep)
945 {
946 assert(ptep != NULL);
947
948 const vm_offset_t pt_base_va = (vm_offset_t)ptep;
949 pv_entry_t **pvh = pai_to_pvh(pa_index(ml_static_vtop(pt_base_va)));
950
951 if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
952 panic("%s: invalid PV head 0x%llx for PTE %p", __func__, (uint64_t)(*pvh), ptep);
953 }
954
955 return pvh_ptd(pvh);
956 }
957
958 /**
959 * Given an arbitrary page table entry, return back the pmap that owns that
960 * page table.
961 *
962 * @note This won't work correctly for page tables owned by IOMMUs, because
963 * those table aren't owned by any specific pmap.
964 *
965 * @param ptep Pointer to a page table entry whose owner we're trying to return.
966 *
967 * @return The pmap that owns the given page table entry.
968 */
969 static inline struct pmap *
ptep_get_pmap(const pt_entry_t * ptep)970 ptep_get_pmap(const pt_entry_t *ptep)
971 {
972 return ptep_get_ptd(ptep)->pmap;
973 }
974
975
976 /**
977 * Given an arbitrary translation table entry, get the page table descriptor
978 * (PTD) object for the page table pointed to by the TTE.
979 *
980 * @param tte The translation table entry to parse. For instance, if this is an
981 * L2 TTE, then the PTD for the L3 table this entry points to will be
982 * returned.
983 *
984 * @return The page table descriptor (PTD) for the page table pointed to by this
985 * TTE.
986 */
987 static inline pt_desc_t *
tte_get_ptd(const tt_entry_t tte)988 tte_get_ptd(const tt_entry_t tte)
989 {
990 const vm_offset_t pt_base_va = (vm_offset_t)(tte & ~((tt_entry_t)PAGE_MASK));
991 pv_entry_t **pvh = pai_to_pvh(pa_index(pt_base_va));
992
993 if (__improbable(!pvh_test_type(pvh, PVH_TYPE_PTDP))) {
994 panic("%s: invalid PV head 0x%llx for TTE 0x%llx", __func__, (uint64_t)(*pvh), (uint64_t)tte);
995 }
996
997 return pvh_ptd(pvh);
998 }
999
1000 /**
1001 * In address spaces where the VM page size doesn't match the underlying
1002 * hardware page size, one PTD could represent multiple page tables. This
1003 * function returns the correct index value depending on which page table is
1004 * being accessed. That index value can then be used to access the
1005 * per-page-table properties stored within a PTD.
1006 *
1007 * @note See the description above the PT_INDEX_MAX definition for a more
1008 * detailed explanation of why multiple page tables can be represented
1009 * by a single PTD object in the pv_head_table.
1010 *
1011 * @param ptd The page table descriptor that's being accessed.
1012 * @param ttep Pointer to the translation table entry that's being accessed.
1013 *
1014 * @return The correct index value for a specific, hardware-sized page
1015 * table.
1016 */
1017 static inline unsigned
ptd_get_index(__unused const pt_desc_t * ptd,__unused const tt_entry_t * ttep)1018 ptd_get_index(__unused const pt_desc_t *ptd, __unused const tt_entry_t *ttep)
1019 {
1020 #if PT_INDEX_MAX == 1
1021 return 0;
1022 #else
1023 assert(ptd != NULL);
1024
1025 const uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1026 const vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1027
1028 /**
1029 * Use the difference between the VM page shift and the hardware page shift
1030 * to get the index of the correct page table. In practice, this equates to
1031 * masking out the bottom two bits of the L3 table index in address spaces
1032 * where the VM page size is greater than the hardware page size. In address
1033 * spaces where they're identical, the index will always be zero.
1034 */
1035 const unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1036 assert(ttep_index < PT_INDEX_MAX);
1037
1038 return ttep_index;
1039 #endif
1040 }
1041
1042 /**
1043 * In address spaces where the VM page size doesn't match the underlying
1044 * hardware page size, one PTD could represent multiple page tables. This
1045 * function returns the correct ptd_info_t structure depending on which page
1046 * table is being accessed.
1047 *
1048 * @note See the description above the PT_INDEX_MAX definition for a more
1049 * detailed explanation of why multiple page tables can be represented
1050 * by a single PTD object in the pv_head_table.
1051 *
1052 * @param ptd The page table descriptor that's being accessed.
1053 * @param ttep Pointer to the translation table entry that's being accessed.
1054 *
1055 * @return The correct ptd_info_t structure for a specific, hardware-sized page
1056 * table.
1057 */
1058 static inline ptd_info_t *
ptd_get_info(pt_desc_t * ptd,const tt_entry_t * ttep)1059 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1060 {
1061 assert((ptd != NULL) && (ptd->ptd_info[0].refcnt < PT_DESC_IOMMU_GRANTED_REFCOUNT));
1062
1063 return &ptd->ptd_info[ptd_get_index(ptd, ttep)];
1064 }
1065
1066 /**
1067 * Given a pointer to a page table entry, return back the ptd_info structure
1068 * for the page table that contains that entry.
1069 *
1070 * @param ptep Pointer to a PTE whose ptd_info object to return.
1071 *
1072 * @return The ptd_info object for the page table that contains the passed in
1073 * page table entry.
1074 */
1075 static inline ptd_info_t *
ptep_get_info(const pt_entry_t * ptep)1076 ptep_get_info(const pt_entry_t *ptep)
1077 {
1078 return ptd_get_info(ptep_get_ptd(ptep), ptep);
1079 }
1080
1081 /**
1082 * Return the virtual address mapped by the passed in leaf page table entry,
1083 * using an already-retrieved pagetable descriptor.
1084 *
1085 * @param ptdp pointer to the descriptor for the pagetable containing ptep
1086 * @param ptep Pointer to a PTE to parse
1087 */
1088 static inline vm_map_address_t
ptd_get_va(const pt_desc_t * ptdp,const pt_entry_t * ptep)1089 ptd_get_va(const pt_desc_t *ptdp, const pt_entry_t *ptep)
1090 {
1091 const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptdp->pmap);
1092
1093 vm_map_address_t va = ptdp->va[ptd_get_index(ptdp, ptep)];
1094 vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1095
1096 va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1097
1098 return va;
1099 }
1100
1101 /**
1102 * Return the virtual address that is being mapped by the passed in leaf page
1103 * table entry.
1104 *
1105 * @param ptep Pointer to a PTE to parse.
1106 */
1107 static inline vm_map_address_t
ptep_get_va(const pt_entry_t * ptep)1108 ptep_get_va(const pt_entry_t *ptep)
1109 {
1110 return ptd_get_va(ptep_get_ptd(ptep), ptep);
1111 }
1112
1113 /**
1114 * Physical Page Attribute Table (pp_attr_table) defines and helper functions.
1115 */
1116
1117 /* How many bits to use for flags on a per-VM-page basis. */
1118 typedef uint16_t pp_attr_t;
1119
1120 /* See the definition of pp_attr_table for more information. */
1121 extern volatile pp_attr_t* pp_attr_table;
1122
1123 /**
1124 * Flags stored in the pp_attr_table on a per-physical-page basis.
1125 *
1126 * Please update the pv_walk LLDB macro if these flags are changed or added to.
1127 */
1128
1129 /**
1130 * The bottom 6-bits are used to store the default WIMG (cacheability and memory
1131 * type) setting for this physical page. This can be changed by calling
1132 * pmap_set_cache_attributes().
1133 *
1134 * If a default WIMG setting isn't set for a page, then the default is Normal,
1135 * Cached memory (VM_WIMG_DEFAULT).
1136 */
1137 #define PP_ATTR_WIMG_MASK 0x003F
1138 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1139
1140 /**
1141 * The reference and modify bits keep track of whether a page has been accessed
1142 * or modified since the last time the bits were cleared. These bits are used to
1143 * enforce policy decisions in the VM layer.
1144 */
1145 #define PP_ATTR_REFERENCED 0x0040
1146 #define PP_ATTR_MODIFIED 0x0080
1147
1148 /**
1149 * This physical page is being used as anonymous memory that's internally
1150 * managed by the VM and is not connected to an external pager. This flag is
1151 * only set/cleared on the first CPU mapping of a page (see PVH_FLAG_CPU). Any
1152 * subsequent mappings won't set/clear this flag until all mappings are removed
1153 * and a new CPU mapping is added.
1154 */
1155 #define PP_ATTR_INTERNAL 0x0100
1156
1157 /**
1158 * This flag is used to keep track of pages that are still resident but are not
1159 * considered dirty and can be reclaimed under memory pressure. These pages do
1160 * not count as a part of the memory footprint, so the footprint ledger does not
1161 * need to be updated for these pages. This is hinted to the VM by the
1162 * `madvise(MADV_FREE_REUSABLE)` system call.
1163 */
1164 #define PP_ATTR_REUSABLE 0x0200
1165
1166 /**
1167 * This flag denotes that a page is utilizing "alternate accounting". This means
1168 * that the pmap doesn't need to keep track of these pages with regards to the
1169 * footprint ledger because the VM is already accounting for them in a different
1170 * way. These include IOKit mappings (VM adds their entire virtual size to the
1171 * footprint), and purgeable pages (VM counts them only when non-volatile and
1172 * only for one "owner"), among others.
1173 *
1174 * Note that alternate accounting status is tracked on a per-mapping basis (not
1175 * per-page). Because of that the ALTACCT flag in the pp_attr_table is only used
1176 * when there's a single mapping to a page. When there are multiple mappings,
1177 * the status of this flag is tracked in the pv_head_table (see PVE_PTEP_ALTACCT
1178 * above).
1179 */
1180 #define PP_ATTR_ALTACCT 0x0400
1181
1182 /**
1183 * This bit was originally used on x86 to keep track of what pages to not
1184 * encrypt during the hibernation process as a performance optimization when
1185 * encryption was done in software. This doesn't apply to the ARM
1186 * hibernation process because all pages are automatically encrypted using
1187 * hardware acceleration. Despite that, the pmap still keeps track of this flag
1188 * as a debugging aid on internal builds.
1189 *
1190 * TODO: This bit can probably be reclaimed:
1191 * rdar://70740650 (PMAP Cleanup: Potentially reclaim the PP_ATTR_NOENCRYPT bit on ARM)
1192 */
1193 #define PP_ATTR_NOENCRYPT 0x0800
1194
1195 /**
1196 * These bits denote that a physical page is expecting the next access or
1197 * modification to set the PP_ATTR_REFERENCED and PP_ATTR_MODIFIED flags
1198 * respectively.
1199 */
1200 #define PP_ATTR_REFFAULT 0x1000
1201 #define PP_ATTR_MODFAULT 0x2000
1202
1203 #if XNU_MONITOR
1204 /**
1205 * Denotes that a page is owned by the PPL. This is modified/checked with the
1206 * PVH lock held, to avoid ownership related races. This does not need to be a
1207 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1208 * put the bit.
1209 */
1210 #define PP_ATTR_MONITOR 0x4000
1211
1212 /**
1213 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1214 * to temporarily 'pin' kernel pages that are used to store PPL output
1215 * parameters. Otherwise a malicious or buggy caller could pass PPL-owned memory
1216 * for these parameters and in so doing stage a write gadget against the PPL.
1217 */
1218 #define PP_ATTR_NO_MONITOR 0x8000
1219
1220 /**
1221 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1222 * are illegal.
1223 */
1224 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1225 #endif /* XNU_MONITOR */
1226
1227 /**
1228 * Atomically set some flags in a pp_attr_table entry.
1229 *
1230 * @param pai The physical address index for the entry to update.
1231 * @param bits The flags to set in the entry.
1232 */
1233 static inline void
ppattr_set_bits(unsigned int pai,pp_attr_t bits)1234 ppattr_set_bits(unsigned int pai, pp_attr_t bits)
1235 {
1236 volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1237 os_atomic_or(ppattr, bits, acq_rel);
1238 }
1239
1240 /**
1241 * Atomically clear some flags in a pp_attr_table entry.
1242 *
1243 * @param pai The physical address index for the entry to update.
1244 * @param bits The flags to clear in the entry.
1245 */
1246 static inline void
ppattr_clear_bits(unsigned int pai,pp_attr_t bits)1247 ppattr_clear_bits(unsigned int pai, pp_attr_t bits)
1248 {
1249 volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1250 os_atomic_andnot(ppattr, bits, acq_rel);
1251 }
1252
1253 /**
1254 * Return true if the pp_attr_table entry contains the passed in bits.
1255 *
1256 * @param pai The physical address index for the entry to test.
1257 * @param bits The flags to check for.
1258 */
1259 static inline bool
ppattr_test_bits(unsigned int pai,pp_attr_t bits)1260 ppattr_test_bits(unsigned int pai, pp_attr_t bits)
1261 {
1262 const volatile pp_attr_t *ppattr = &pp_attr_table[pai];
1263 return (*ppattr & bits) == bits;
1264 }
1265
1266 /**
1267 * Only set some flags in a pp_attr_table entry if the passed in physical
1268 * address is a kernel-managed address.
1269 *
1270 * @param pa The physical address for the entry to update.
1271 * @param bits The flags to set in the entry.
1272 */
1273 static inline void
ppattr_pa_set_bits(pmap_paddr_t pa,pp_attr_t bits)1274 ppattr_pa_set_bits(pmap_paddr_t pa, pp_attr_t bits)
1275 {
1276 if (pa_valid(pa)) {
1277 ppattr_set_bits(pa_index(pa), bits);
1278 }
1279 }
1280
1281 /**
1282 * Only clear some flags in a pp_attr_table entry if the passed in physical
1283 * address is a kernel-managed address.
1284 *
1285 * @param pa The physical address for the entry to update.
1286 * @param bits The flags to clear in the entry.
1287 */
1288 static inline void
ppattr_pa_clear_bits(pmap_paddr_t pa,pp_attr_t bits)1289 ppattr_pa_clear_bits(pmap_paddr_t pa, pp_attr_t bits)
1290 {
1291 if (pa_valid(pa)) {
1292 ppattr_clear_bits(pa_index(pa), bits);
1293 }
1294 }
1295
1296 /**
1297 * Only test flags in a pp_attr_table entry if the passed in physical address
1298 * is a kernel-managed page.
1299 *
1300 * @param pa The physical address for the entry to test.
1301 * @param bits The flags to check for.
1302 *
1303 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1304 * depending on whether the bits are set.
1305 */
1306 static inline bool
ppattr_pa_test_bits(pmap_paddr_t pa,pp_attr_t bits)1307 ppattr_pa_test_bits(pmap_paddr_t pa, pp_attr_t bits)
1308 {
1309 return pa_valid(pa) ? ppattr_test_bits(pa_index(pa), bits) : false;
1310 }
1311
1312 /**
1313 * Set the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the passed
1314 * in physical address is a kernel-managed page.
1315 *
1316 * @param pa The physical address for the entry to update.
1317 */
1318 static inline void
ppattr_pa_set_modify(pmap_paddr_t pa)1319 ppattr_pa_set_modify(pmap_paddr_t pa)
1320 {
1321 ppattr_pa_set_bits(pa, PP_ATTR_MODIFIED);
1322 }
1323
1324 /**
1325 * Clear the PP_ATTR_MODIFIED flag on a specific pp_attr_table entry if the
1326 * passed in physical address is a kernel-managed page.
1327 *
1328 * @param pa The physical address for the entry to update.
1329 */
1330 static inline void
ppattr_pa_clear_modify(pmap_paddr_t pa)1331 ppattr_pa_clear_modify(pmap_paddr_t pa)
1332 {
1333 ppattr_pa_clear_bits(pa, PP_ATTR_MODIFIED);
1334 }
1335
1336 /**
1337 * Set the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1338 * passed in physical address is a kernel-managed page.
1339 *
1340 * @param pa The physical address for the entry to update.
1341 */
1342 static inline void
ppattr_pa_set_reference(pmap_paddr_t pa)1343 ppattr_pa_set_reference(pmap_paddr_t pa)
1344 {
1345 ppattr_pa_set_bits(pa, PP_ATTR_REFERENCED);
1346 }
1347
1348 /**
1349 * Clear the PP_ATTR_REFERENCED flag on a specific pp_attr_table entry if the
1350 * passed in physical address is a kernel-managed page.
1351 *
1352 * @param pa The physical address for the entry to update.
1353 */
1354 static inline void
ppattr_pa_clear_reference(pmap_paddr_t pa)1355 ppattr_pa_clear_reference(pmap_paddr_t pa)
1356 {
1357 ppattr_pa_clear_bits(pa, PP_ATTR_REFERENCED);
1358 }
1359
1360 #if XNU_MONITOR
1361
1362 /**
1363 * Set the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the passed
1364 * in physical address is a kernel-managed page.
1365 *
1366 * @param pa The physical address for the entry to update.
1367 */
1368 static inline void
ppattr_pa_set_monitor(pmap_paddr_t pa)1369 ppattr_pa_set_monitor(pmap_paddr_t pa)
1370 {
1371 ppattr_pa_set_bits(pa, PP_ATTR_MONITOR);
1372 }
1373
1374 /**
1375 * Clear the PP_ATTR_MONITOR flag on a specific pp_attr_table entry if the
1376 * passed in physical address is a kernel-managed page.
1377 *
1378 * @param pa The physical address for the entry to update.
1379 */
1380 static inline void
ppattr_pa_clear_monitor(pmap_paddr_t pa)1381 ppattr_pa_clear_monitor(pmap_paddr_t pa)
1382 {
1383 ppattr_pa_clear_bits(pa, PP_ATTR_MONITOR);
1384 }
1385
1386 /**
1387 * Only test for the PP_ATTR_MONITOR flag in a pp_attr_table entry if the passed
1388 * in physical address is a kernel-managed page.
1389 *
1390 * @param pa The physical address for the entry to test.
1391 *
1392 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1393 * depending on whether the PP_ATTR_MONITOR is set.
1394 */
1395 static inline bool
ppattr_pa_test_monitor(pmap_paddr_t pa)1396 ppattr_pa_test_monitor(pmap_paddr_t pa)
1397 {
1398 return ppattr_pa_test_bits(pa, PP_ATTR_MONITOR);
1399 }
1400
1401 /**
1402 * Set the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1403 * passed in physical address is a kernel-managed page.
1404 *
1405 * @param pa The physical address for the entry to update.
1406 */
1407 static inline void
ppattr_pa_set_no_monitor(pmap_paddr_t pa)1408 ppattr_pa_set_no_monitor(pmap_paddr_t pa)
1409 {
1410 ppattr_pa_set_bits(pa, PP_ATTR_NO_MONITOR);
1411 }
1412
1413 /**
1414 * Clear the PP_ATTR_NO_MONITOR flag on a specific pp_attr_table entry if the
1415 * passed in physical address is a kernel-managed page.
1416 *
1417 * @param pa The physical address for the entry to update.
1418 */
1419 static inline void
ppattr_pa_clear_no_monitor(pmap_paddr_t pa)1420 ppattr_pa_clear_no_monitor(pmap_paddr_t pa)
1421 {
1422 ppattr_pa_clear_bits(pa, PP_ATTR_NO_MONITOR);
1423 }
1424
1425 /**
1426 * Only test for the PP_ATTR_NO_MONITOR flag in a pp_attr_table entry if the
1427 * passed in physical address is a kernel-managed page.
1428 *
1429 * @param pa The physical address for the entry to test.
1430 *
1431 * @return False if the PA isn't a kernel-managed page, otherwise true/false
1432 * depending on whether the PP_ATTR_NO_MONITOR is set.
1433 */
1434 static inline bool
ppattr_pa_test_no_monitor(pmap_paddr_t pa)1435 ppattr_pa_test_no_monitor(pmap_paddr_t pa)
1436 {
1437 return ppattr_pa_test_bits(pa, PP_ATTR_NO_MONITOR);
1438 }
1439
1440 #endif /* XNU_MONITOR */
1441
1442 /**
1443 * Set the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1444 *
1445 * @param pai The physical address index for the entry to update.
1446 */
1447 static inline void
ppattr_set_internal(unsigned int pai)1448 ppattr_set_internal(unsigned int pai)
1449 {
1450 ppattr_set_bits(pai, PP_ATTR_INTERNAL);
1451 }
1452
1453 /**
1454 * Clear the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1455 *
1456 * @param pai The physical address index for the entry to update.
1457 */
1458 static inline void
ppattr_clear_internal(unsigned int pai)1459 ppattr_clear_internal(unsigned int pai)
1460 {
1461 ppattr_clear_bits(pai, PP_ATTR_INTERNAL);
1462 }
1463
1464 /**
1465 * Return true if the pp_attr_table entry has the PP_ATTR_INTERNAL flag set.
1466 *
1467 * @param pai The physical address index for the entry to test.
1468 */
1469 static inline bool
ppattr_test_internal(unsigned int pai)1470 ppattr_test_internal(unsigned int pai)
1471 {
1472 return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1473 }
1474
1475 /**
1476 * Set the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1477 *
1478 * @param pai The physical address index for the entry to update.
1479 */
1480 static inline void
ppattr_set_reusable(unsigned int pai)1481 ppattr_set_reusable(unsigned int pai)
1482 {
1483 ppattr_set_bits(pai, PP_ATTR_REUSABLE);
1484 }
1485
1486 /**
1487 * Clear the PP_ATTR_REUSABLE flag on a specific pp_attr_table entry.
1488 *
1489 * @param pai The physical address index for the entry to update.
1490 */
1491 static inline void
ppattr_clear_reusable(unsigned int pai)1492 ppattr_clear_reusable(unsigned int pai)
1493 {
1494 ppattr_clear_bits(pai, PP_ATTR_REUSABLE);
1495 }
1496
1497 /**
1498 * Return true if the pp_attr_table entry has the PP_ATTR_REUSABLE flag set.
1499 *
1500 * @param pai The physical address index for the entry to test.
1501 */
1502 static inline bool
ppattr_test_reusable(unsigned int pai)1503 ppattr_test_reusable(unsigned int pai)
1504 {
1505 return ppattr_test_bits(pai, PP_ATTR_REUSABLE);
1506 }
1507
1508 /**
1509 * Set the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1510 *
1511 * @note This is only valid when the ALTACCT flag is being tracked using the
1512 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1513 * PP_ATTR_ALTACCT definitions for more information.
1514 *
1515 * @param pai The physical address index for the entry to update.
1516 */
1517 static inline void
ppattr_set_altacct(unsigned int pai)1518 ppattr_set_altacct(unsigned int pai)
1519 {
1520 ppattr_set_bits(pai, PP_ATTR_ALTACCT);
1521 }
1522
1523 /**
1524 * Clear the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1525 *
1526 * @note This is only valid when the ALTACCT flag is being tracked using the
1527 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1528 * PP_ATTR_ALTACCT definitions for more information.
1529 *
1530 * @param pai The physical address index for the entry to update.
1531 */
1532 static inline void
ppattr_clear_altacct(unsigned int pai)1533 ppattr_clear_altacct(unsigned int pai)
1534 {
1535 ppattr_clear_bits(pai, PP_ATTR_ALTACCT);
1536 }
1537
1538 /**
1539 * Get the PP_ATTR_ALTACCT flag on a specific pp_attr_table entry.
1540 *
1541 * @note This is only valid when the ALTACCT flag is being tracked using the
1542 * pp_attr_table. See the descriptions above the PVE_PTEP_ALTACCT and
1543 * PP_ATTR_ALTACCT definitions for more information.
1544 *
1545 * @param pai The physical address index for the entry to test.
1546 *
1547 * @return True if the passed in page uses alternate accounting, false
1548 * otherwise.
1549 */
1550 static inline bool
ppattr_is_altacct(unsigned int pai)1551 ppattr_is_altacct(unsigned int pai)
1552 {
1553 return ppattr_test_bits(pai, PP_ATTR_ALTACCT);
1554 }
1555 /**
1556 * Get the PP_ATTR_INTERNAL flag on a specific pp_attr_table entry.
1557 *
1558 * @note This is only valid when the INTERNAL flag is being tracked using the
1559 * pp_attr_table. See the descriptions above the PVE_PTEP_INTERNAL and
1560 * PP_ATTR_INTERNAL definitions for more information.
1561 *
1562 * @param pai The physical address index for the entry to test.
1563 *
1564 * @return True if the passed in page is accounted for as "internal", false
1565 * otherwise.
1566 */
1567 static inline bool
ppattr_is_internal(unsigned int pai)1568 ppattr_is_internal(unsigned int pai)
1569 {
1570 return ppattr_test_bits(pai, PP_ATTR_INTERNAL);
1571 }
1572
1573 /**
1574 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1575 * depending on whether there are one or multiple mappings to a page. This
1576 * function abstracts out the difference between single and multiple mappings to
1577 * a page and provides a single function for determining whether alternate
1578 * accounting is set for a mapping.
1579 *
1580 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1581 * definitions for more information.
1582 *
1583 * @param pai The physical address index for the entry to test.
1584 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1585 * @param idx Index of the chosen PTE pointer inside the PVE.
1586 *
1587 * @return True if the passed in page uses alternate accounting, false
1588 * otherwise.
1589 */
1590 static inline bool
ppattr_pve_is_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1591 ppattr_pve_is_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1592 {
1593 return (pvep == PV_ENTRY_NULL) ? ppattr_is_altacct(pai) : pve_get_altacct(pvep, idx);
1594 }
1595 /**
1596 * The "internal" (INTERNAL) status for a page is tracked differently
1597 * depending on whether there are one or multiple mappings to a page. This
1598 * function abstracts out the difference between single and multiple mappings to
1599 * a page and provides a single function for determining whether "internal"
1600 * is set for a mapping.
1601 *
1602 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1603 * definitions for more information.
1604 *
1605 * @param pai The physical address index for the entry to test.
1606 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1607 * @param idx Index of the chosen PTE pointer inside the PVE.
1608 *
1609 * @return True if the passed in page is "internal", false otherwise.
1610 */
1611 static inline bool
ppattr_pve_is_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1612 ppattr_pve_is_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1613 {
1614 return (pvep == PV_ENTRY_NULL) ? ppattr_is_internal(pai) : pve_get_internal(pvep, idx);
1615 }
1616
1617 /**
1618 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1619 * depending on whether there are one or multiple mappings to a page. This
1620 * function abstracts out the difference between single and multiple mappings to
1621 * a page and provides a single function for setting the alternate accounting status
1622 * for a mapping.
1623 *
1624 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1625 * definitions for more information.
1626 *
1627 * @param pai The physical address index for the entry to update.
1628 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1629 * @param idx Index of the chosen PTE pointer inside the PVE.
1630 */
1631 static inline void
ppattr_pve_set_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1632 ppattr_pve_set_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1633 {
1634 if (pvep == PV_ENTRY_NULL) {
1635 ppattr_set_altacct(pai);
1636 } else {
1637 pve_set_altacct(pvep, idx);
1638 }
1639 }
1640 /**
1641 * The "internal" (INTERNAL) status for a page is tracked differently
1642 * depending on whether there are one or multiple mappings to a page. This
1643 * function abstracts out the difference between single and multiple mappings to
1644 * a page and provides a single function for setting the "internal" status
1645 * for a mapping.
1646 *
1647 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1648 * definitions for more information.
1649 *
1650 * @param pai The physical address index for the entry to update.
1651 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1652 * @param idx Index of the chosen PTE pointer inside the PVE.
1653 */
1654 static inline void
ppattr_pve_set_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1655 ppattr_pve_set_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1656 {
1657 if (pvep == PV_ENTRY_NULL) {
1658 ppattr_set_internal(pai);
1659 } else {
1660 pve_set_internal(pvep, idx);
1661 }
1662 }
1663
1664 /**
1665 * The "alternate accounting" (ALTACCT) status for a page is tracked differently
1666 * depending on whether there are one or multiple mappings to a page. This
1667 * function abstracts out the difference between single and multiple mappings to
1668 * a page and provides a single function for clearing the alternate accounting status
1669 * for a mapping.
1670 *
1671 * @note See the descriptions above the PVE_PTEP_ALTACCT and PP_ATTR_ALTACCT
1672 * definitions for more information.
1673 *
1674 * @param pai The physical address index for the entry to update.
1675 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1676 * @param idx Index of the chosen PTE pointer inside the PVE.
1677 */
1678 static inline void
ppattr_pve_clr_altacct(unsigned int pai,pv_entry_t * pvep,unsigned idx)1679 ppattr_pve_clr_altacct(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1680 {
1681 if (pvep == PV_ENTRY_NULL) {
1682 ppattr_clear_altacct(pai);
1683 } else {
1684 pve_clr_altacct(pvep, idx);
1685 }
1686 }
1687 /**
1688 * The "internal" (INTERNAL) status for a page is tracked differently
1689 * depending on whether there are one or multiple mappings to a page. This
1690 * function abstracts out the difference between single and multiple mappings to
1691 * a page and provides a single function for clearing the "internal" status
1692 * for a mapping.
1693 *
1694 * @note See the descriptions above the PVE_PTEP_INTERNAL and PP_ATTR_INTERNAL
1695 * definitions for more information.
1696 *
1697 * @param pai The physical address index for the entry to update.
1698 * @param pvep Pointer to the pv_entry_t object containing that mapping.
1699 * @param idx Index of the chosen PTE pointer inside the PVE.
1700 */
1701 static inline void
ppattr_pve_clr_internal(unsigned int pai,pv_entry_t * pvep,unsigned idx)1702 ppattr_pve_clr_internal(unsigned int pai, pv_entry_t *pvep, unsigned idx)
1703 {
1704 if (pvep == PV_ENTRY_NULL) {
1705 ppattr_clear_internal(pai);
1706 } else {
1707 pve_clr_internal(pvep, idx);
1708 }
1709 }
1710
1711 /**
1712 * Set the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1713 *
1714 * @param pai The physical address index for the entry to update.
1715 */
1716 static inline void
ppattr_set_reffault(unsigned int pai)1717 ppattr_set_reffault(unsigned int pai)
1718 {
1719 ppattr_set_bits(pai, PP_ATTR_REFFAULT);
1720 }
1721
1722 /**
1723 * Clear the PP_ATTR_REFFAULT flag on a specific pp_attr_table entry.
1724 *
1725 * @param pai The physical address index for the entry to update.
1726 */
1727 static inline void
ppattr_clear_reffault(unsigned int pai)1728 ppattr_clear_reffault(unsigned int pai)
1729 {
1730 ppattr_clear_bits(pai, PP_ATTR_REFFAULT);
1731 }
1732
1733 /**
1734 * Return true if the pp_attr_table entry has the PP_ATTR_REFFAULT flag set.
1735 *
1736 * @param pai The physical address index for the entry to test.
1737 */
1738 static inline bool
ppattr_test_reffault(unsigned int pai)1739 ppattr_test_reffault(unsigned int pai)
1740 {
1741 return ppattr_test_bits(pai, PP_ATTR_REFFAULT);
1742 }
1743
1744 /**
1745 * Set the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1746 *
1747 * @param pai The physical address index for the entry to update.
1748 */
1749 static inline void
ppattr_set_modfault(unsigned int pai)1750 ppattr_set_modfault(unsigned int pai)
1751 {
1752 ppattr_set_bits(pai, PP_ATTR_MODFAULT);
1753 }
1754
1755 /**
1756 * Clear the PP_ATTR_MODFAULT flag on a specific pp_attr_table entry.
1757 *
1758 * @param pai The physical address index for the entry to update.
1759 */
1760 static inline void
ppattr_clear_modfault(unsigned int pai)1761 ppattr_clear_modfault(unsigned int pai)
1762 {
1763 ppattr_clear_bits(pai, PP_ATTR_MODFAULT);
1764 }
1765
1766 /**
1767 * Return true if the pp_attr_table entry has the PP_ATTR_MODFAULT flag set.
1768 *
1769 * @param pai The physical address index for the entry to test.
1770 */
1771 static inline bool
ppattr_test_modfault(unsigned int pai)1772 ppattr_test_modfault(unsigned int pai)
1773 {
1774 return ppattr_test_bits(pai, PP_ATTR_MODFAULT);
1775 }
1776
1777 static inline boolean_t
pmap_is_preemptible(void)1778 pmap_is_preemptible(void)
1779 {
1780 return preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT);
1781 }
1782
1783 /**
1784 * This helper function ensures that potentially-long-running batched PPL operations are
1785 * called in preemptible context before entering the PPL, so that the PPL call may
1786 * periodically exit to allow pending urgent ASTs to be taken.
1787 */
1788 static inline void
pmap_verify_preemptible(void)1789 pmap_verify_preemptible(void)
1790 {
1791 assert(pmap_is_preemptible());
1792 }
1793
1794 /**
1795 * The minimum number of pages to keep in the PPL page free list.
1796 *
1797 * We define our target as 8 pages: enough for 2 page table pages, a PTD page,
1798 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1799 * a single pmap_enter request.
1800 */
1801 #define PMAP_MIN_FREE_PPL_PAGES 8
1802
1803 /**
1804 * Flags passed to various page allocation functions, usually accessed through
1805 * the pmap_pages_alloc_zeroed() API. Each function that can take these flags as
1806 * a part of its option field, will describe these flags in its function header.
1807 */
1808
1809 /**
1810 * Instruct the allocation function to return immediately if no pages are
1811 * current available. Without this flag, the function will spin and wait for a
1812 * page to become available. This flag can be required in some circumstances
1813 * (for instance, when allocating pages from within the PPL).
1814 */
1815 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1816
1817 /**
1818 * Instructs an allocation function to fallback to reclaiming a userspace page
1819 * table if it failed to allocate a page from the free lists. This can be useful
1820 * when allocating from within the PPL because refilling the free lists requires
1821 * exiting and re-entering the PPL (which incurs extra latency).
1822 *
1823 * This is a quick way of allocating a page at the expense of having to
1824 * reallocate the table the next time one of its mappings is accessed.
1825 */
1826 #define PMAP_PAGE_RECLAIM_NOWAIT 0x2
1827
1828 /**
1829 * Global variables exported to the rest of the internal pmap implementation.
1830 */
1831 #if XNU_MONITOR
1832 extern uint64_t pmap_ppl_free_page_count;
1833 extern pmap_paddr_t pmap_stacks_start_pa;
1834 extern pmap_paddr_t pmap_stacks_end_pa;
1835 extern pmap_paddr_t ppl_cpu_save_area_start;
1836 extern pmap_paddr_t ppl_cpu_save_area_end;
1837 #endif /* XNU_MONITOR */
1838 extern unsigned int inuse_pmap_pages_count;
1839 extern vm_object_t pmap_object;
1840 extern uint32_t pv_alloc_initial_target;
1841 extern uint32_t pv_kern_alloc_initial_target;
1842
1843 /**
1844 * Functions exported to the rest of the internal pmap implementation.
1845 */
1846 extern void pmap_data_bootstrap(void);
1847 extern void pmap_enqueue_pages(vm_page_t);
1848 extern kern_return_t pmap_pages_alloc_zeroed(pmap_paddr_t *, unsigned, unsigned);
1849 extern void pmap_pages_free(pmap_paddr_t, unsigned);
1850
1851 #if XNU_MONITOR
1852
1853 extern void pmap_mark_page_as_ppl_page_internal(pmap_paddr_t, bool);
1854 extern void pmap_mark_page_as_ppl_page(pmap_paddr_t);
1855 extern void pmap_mark_page_as_kernel_page(pmap_paddr_t);
1856 extern pmap_paddr_t pmap_alloc_page_for_kern(unsigned int);
1857 extern void pmap_alloc_page_for_ppl(unsigned int);
1858 extern uint64_t pmap_release_ppl_pages_to_kernel(void);
1859
1860 extern uint64_t pmap_ledger_validate(const volatile void *);
1861 void pmap_ledger_retain(ledger_t ledger);
1862 void pmap_ledger_release(ledger_t ledger);
1863 extern void pmap_ledger_check_balance(pmap_t pmap);
1864
1865 kern_return_t pmap_alloc_pmap(pmap_t *pmap);
1866 void pmap_free_pmap(pmap_t pmap);
1867
1868 #endif /* XNU_MONITOR */
1869
1870 /**
1871 * The modes in which a pmap lock can be acquired. Note that shared access
1872 * doesn't necessarily mean "read-only". As long as data is atomically updated
1873 * correctly (to account for multi-cpu accesses) data can still get written with
1874 * a shared lock held. Care just needs to be taken so as to not introduce any
1875 * race conditions when there are multiple writers.
1876 *
1877 * This is here in pmap_data.h because it's a needed parameter for pv_alloc()
1878 * and pmap_enter_pv(). This header is always included in pmap_internal.h before
1879 * the rest of the pmap locking code is defined so there shouldn't be any issues
1880 * with missing types.
1881 */
1882 OS_ENUM(pmap_lock_mode, uint8_t,
1883 PMAP_LOCK_SHARED,
1884 PMAP_LOCK_EXCLUSIVE);
1885
1886 /**
1887 * Possible return values for pv_alloc(). See the pv_alloc() function header for
1888 * a description of each of these values.
1889 */
1890 typedef enum {
1891 PV_ALLOC_SUCCESS,
1892 PV_ALLOC_RETRY,
1893 PV_ALLOC_FAIL
1894 } pv_alloc_return_t;
1895
1896 extern pv_alloc_return_t pv_alloc(
1897 pmap_t, unsigned int, pmap_lock_mode_t, unsigned int, pv_entry_t **);
1898 extern void pv_free(pv_entry_t *);
1899 extern void pv_list_free(pv_entry_t *, pv_entry_t *, int);
1900 extern void pmap_compute_pv_targets(void);
1901 extern pv_alloc_return_t pmap_enter_pv(
1902 pmap_t, pt_entry_t *, int, unsigned int, pmap_lock_mode_t, pv_entry_t **, int *new_pve_ptep_idx);
1903 extern void pmap_remove_pv(pmap_t, pt_entry_t *, int, bool, bool *, bool *);
1904
1905 extern void ptd_bootstrap(pt_desc_t *, unsigned int);
1906 extern pt_desc_t *ptd_alloc_unlinked(void);
1907 extern pt_desc_t *ptd_alloc(pmap_t);
1908 extern void ptd_deallocate(pt_desc_t *);
1909 extern void ptd_info_init(
1910 pt_desc_t *, pmap_t, vm_map_address_t, unsigned int, pt_entry_t *);
1911
1912 extern kern_return_t pmap_ledger_credit(pmap_t, int, ledger_amount_t);
1913 extern kern_return_t pmap_ledger_debit(pmap_t, int, ledger_amount_t);
1914
1915 extern void validate_pmap_internal(const volatile struct pmap *, const char *);
1916 extern void validate_pmap_mutable_internal(const volatile struct pmap *, const char *);
1917
1918 /**
1919 * Macro function wrappers around pmap validation so that the calling function
1920 * can be printed in the panic strings for easier validation failure debugging.
1921 */
1922 #define validate_pmap(x) validate_pmap_internal(x, __func__)
1923 #define validate_pmap_mutable(x) validate_pmap_mutable_internal(x, __func__)
1924
1925 /**
1926 * This structure describes a PPL-owned I/O range.
1927 *
1928 * @note This doesn't necessarily have to represent "I/O" only, this can also
1929 * represent non-kernel-managed DRAM (e.g., iBoot carveouts). Any physical
1930 * address region that isn't considered "kernel-managed" is fair game.
1931 *
1932 * @note The layout of this structure needs to map 1-to-1 with the pmap-io-range
1933 * device tree nodes. Astris (through the LowGlobals) also depends on the
1934 * consistency of this structure.
1935 */
1936 typedef struct pmap_io_range {
1937 /* Physical address of the PPL-owned I/O range. */
1938 uint64_t addr;
1939
1940 /**
1941 * Length (in bytes) of the PPL-owned I/O range. Has to be the size
1942 * of a page if the range will be refered to by pmap_io_filter_entries.
1943 */
1944 uint64_t len;
1945
1946 /* Strong DSB required for pages in this range. */
1947 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31)
1948
1949 /* Corresponds to memory carved out by bootloader. */
1950 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30)
1951
1952 /* Pages in this range need to be included in the hibernation image */
1953 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29)
1954
1955 /* Mark the range as 'owned' by a given subsystem */
1956 #define PMAP_IO_RANGE_OWNED (1UL << 28)
1957
1958 /**
1959 * Lower 16 bits treated as pp_attr_t, upper 16 bits contain additional
1960 * mapping flags (defined above).
1961 */
1962 uint32_t wimg;
1963
1964 /**
1965 * 4 Character Code (4CC) describing what this range is.
1966 *
1967 * This has to be unique for each "type" of pages, meaning pages sharing
1968 * the same register layout, if it is used for the I/O filter descriptors
1969 * below. Otherwise it doesn't matter.
1970 */
1971 uint32_t signature;
1972 } pmap_io_range_t;
1973
1974 /* Reminder: be sure to change all relevant device trees if you change the layout of pmap_io_range_t */
1975 _Static_assert(sizeof(pmap_io_range_t) == 24, "unexpected size for pmap_io_range_t");
1976
1977 extern pmap_io_range_t* pmap_find_io_attr(pmap_paddr_t);
1978
1979 /**
1980 * This structure describes a sub-page-size I/O region owned by PPL but the kernel can write to.
1981 *
1982 * @note I/O filter software will use a collection of such data structures to determine access
1983 * permissions to a page owned by PPL.
1984 *
1985 * @note The {signature, offset} key is used to index a collection of such data structures to
1986 * optimize for space in the case where one page layout is repeated for many devices, such
1987 * as the memory controller channels.
1988 */
1989 typedef struct pmap_io_filter_entry {
1990 /* 4 Character Code (4CC) describing what this range (page) is. */
1991 uint32_t signature;
1992
1993 /* Offset within the page. It has to be within [0, PAGE_SIZE). */
1994 uint16_t offset;
1995
1996 /* Length of the range, and (offset + length) has to be within [0, PAGE_SIZE). */
1997 uint16_t length;
1998 } pmap_io_filter_entry_t;
1999
2000 _Static_assert(sizeof(pmap_io_filter_entry_t) == 8, "unexpected size for pmap_io_filter_entry_t");
2001
2002 extern pmap_io_filter_entry_t *pmap_find_io_filter_entry(pmap_paddr_t, uint64_t, const pmap_io_range_t **);
2003
2004 extern void pmap_cpu_data_init_internal(unsigned int);
2005
2006 /**
2007 * Flush a single 16K page from noncoherent coprocessor caches.
2008 *
2009 * @note Nonocoherent cache flushes are only guaranteed to work if the participating coprocessor(s)
2010 * do not have any active VA translations for the page being flushed. Since coprocessor
2011 * mappings should always be controlled by some PPL IOMMU extension, they should always
2012 * have PV list entries. This flush should therefore be performed at a point when the PV
2013 * list is known to be either empty or at least to not contain any IOMMU entries. For
2014 * the purposes of our security model, it is sufficient to wait for the PV list to become
2015 * empty, as we really want to protect PPL-sensitive pages from malicious/accidental
2016 * coprocessor cacheline evictions, and the PV list must be empty before a page can be
2017 * handed to the PPL.
2018 *
2019 * @param paddr The base physical address of the page to flush.
2020 */
2021 extern void pmap_flush_noncoherent_page(pmap_paddr_t paddr);
2022
2023 #endif /* _ARM_PMAP_PMAP_DATA_H_ */
2024