1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_assert.h>
30
31 #include <vm/pmap.h>
32 #include <vm/vm_map.h>
33 #include <vm/vm_kern.h>
34 #include <kern/ledger.h>
35 #include <kern/zalloc_internal.h>
36 #include <i386/pmap_internal.h>
37
38 void pmap_remove_range(
39 pmap_t pmap,
40 vm_map_offset_t va,
41 pt_entry_t *spte,
42 pt_entry_t *epte);
43
44 static void pmap_remove_range_options(
45 pmap_t pmap,
46 vm_map_offset_t va,
47 pt_entry_t *spte,
48 pt_entry_t *epte,
49 int options);
50
51 void pmap_reusable_range(
52 pmap_t pmap,
53 vm_map_offset_t va,
54 pt_entry_t *spte,
55 pt_entry_t *epte,
56 boolean_t reusable);
57
58 pt_entry_t *PTE_corrupted_ptr;
59
60 #if DEVELOPMENT || DEBUG
61 int pmap_inject_pte_corruption;
62 uint32_t pmap_update_clear_pte_count;
63 uint32_t pmap_update_invalid_pte_count;
64 #endif
65
66 /*
67 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
68 * on a NBPDE boundary.
69 */
70
71 uint64_t
pmap_shared_region_size_min(__unused pmap_t pmap)72 pmap_shared_region_size_min(__unused pmap_t pmap)
73 {
74 return NBPDE;
75 }
76
77 uint64_t
pmap_commpage_size_min(__unused pmap_t pmap)78 pmap_commpage_size_min(__unused pmap_t pmap)
79 {
80 return NBPDE;
81 }
82
83 /*
84 * kern_return_t pmap_nest(grand, subord, va_start, size)
85 *
86 * grand = the pmap that we will nest subord into
87 * subord = the pmap that goes into the grand
88 * va_start = start of range in pmap to be inserted
89 * size = Size of nest area (up to 16TB)
90 *
91 * Inserts a pmap into another. This is used to implement shared segments.
92 *
93 * Note that we depend upon higher level VM locks to insure that things don't change while
94 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
95 * or do 2 nests at once.
96 */
97
98 /*
99 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
100 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
101 * container and the "grand" parent. A minor optimization to consider for the
102 * future: make the "subord" truly a container rather than a full-fledged
103 * pagetable hierarchy which can be unnecessarily sparse (DRK).
104 */
105
106 kern_return_t
pmap_nest(pmap_t grand,pmap_t subord,addr64_t va_start,uint64_t size)107 pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, uint64_t size)
108 {
109 vm_map_offset_t vaddr;
110 pd_entry_t *pde, *npde;
111 unsigned int i;
112 uint64_t num_pde;
113
114 assert(!is_ept_pmap(grand));
115 assert(!is_ept_pmap(subord));
116
117 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
118 (va_start & (pmap_shared_region_size_min(grand) - 1)) ||
119 ((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
120 return KERN_INVALID_VALUE;
121 }
122
123 if (size == 0) {
124 panic("pmap_nest: size is invalid - %016llX", size);
125 }
126
127 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
128 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
129 VM_KERNEL_ADDRHIDE(va_start));
130
131 vaddr = (vm_map_offset_t)va_start;
132 num_pde = size >> PDESHIFT;
133
134 PMAP_LOCK_EXCLUSIVE(subord);
135
136 subord->pm_shared = TRUE;
137
138 for (i = 0; i < num_pde;) {
139 if (((vaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
140 npde = pmap64_pdpt(subord, vaddr);
141
142 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
143 PMAP_UNLOCK_EXCLUSIVE(subord);
144 pmap_expand_pdpt(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
145 PMAP_LOCK_EXCLUSIVE(subord);
146 npde = pmap64_pdpt(subord, vaddr);
147 }
148 *npde |= INTEL_PDPTE_NESTED;
149 vaddr += NBPDPT;
150 i += (uint32_t)NPDEPG;
151 } else {
152 npde = pmap_pde(subord, vaddr);
153
154 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
155 PMAP_UNLOCK_EXCLUSIVE(subord);
156 pmap_expand(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
157 PMAP_LOCK_EXCLUSIVE(subord);
158 npde = pmap_pde(subord, vaddr);
159 }
160 vaddr += NBPDE;
161 i++;
162 }
163 }
164
165 PMAP_UNLOCK_EXCLUSIVE(subord);
166
167 vaddr = (vm_map_offset_t)va_start;
168
169 PMAP_LOCK_EXCLUSIVE(grand);
170
171 for (i = 0; i < num_pde;) {
172 pd_entry_t tpde;
173
174 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
175 npde = pmap64_pdpt(subord, vaddr);
176 if (npde == 0) {
177 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
178 }
179 tpde = *npde;
180 pde = pmap64_pdpt(grand, vaddr);
181 if (0 == pde) {
182 PMAP_UNLOCK_EXCLUSIVE(grand);
183 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
184 PMAP_LOCK_EXCLUSIVE(grand);
185 pde = pmap64_pdpt(grand, vaddr);
186 }
187 if (pde == 0) {
188 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
189 }
190 pmap_store_pte(FALSE, pde, tpde);
191 vaddr += NBPDPT;
192 i += (uint32_t) NPDEPG;
193 } else {
194 npde = pmap_pde(subord, vaddr);
195 if (npde == 0) {
196 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
197 }
198 tpde = *npde;
199 pde = pmap_pde(grand, vaddr);
200 if (0 == pde) {
201 PMAP_UNLOCK_EXCLUSIVE(grand);
202 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
203 PMAP_LOCK_EXCLUSIVE(grand);
204 pde = pmap_pde(grand, vaddr);
205 }
206
207 if (pde == 0) {
208 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
209 }
210 vaddr += NBPDE;
211 pmap_store_pte(FALSE, pde, tpde);
212 i++;
213 }
214 }
215
216 PMAP_UNLOCK_EXCLUSIVE(grand);
217
218 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
219
220 return KERN_SUCCESS;
221 }
222
223 /*
224 * kern_return_t pmap_unnest(grand, vaddr)
225 *
226 * grand = the pmap that we will un-nest subord from
227 * vaddr = start of range in pmap to be unnested
228 *
229 * Removes a pmap from another. This is used to implement shared segments.
230 */
231
232 kern_return_t
pmap_unnest(pmap_t grand,addr64_t vaddr,uint64_t size)233 pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
234 {
235 pd_entry_t *pde;
236 unsigned int i;
237 uint64_t num_pde;
238 addr64_t va_start, va_end;
239 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
240
241 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
242 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
243
244 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
245 (vaddr & (pmap_shared_region_size_min(grand) - 1))) {
246 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...",
247 grand, vaddr, size);
248 }
249
250 assert(!is_ept_pmap(grand));
251
252 /* align everything to PDE boundaries */
253 va_start = vaddr & ~(NBPDE - 1);
254 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE - 1);
255 size = va_end - va_start;
256
257 PMAP_LOCK_EXCLUSIVE(grand);
258
259 num_pde = size >> PDESHIFT;
260 vaddr = va_start;
261
262 for (i = 0; i < num_pde;) {
263 if (pdptnum(grand, vaddr) != npdpt) {
264 npdpt = pdptnum(grand, vaddr);
265 pde = pmap64_pdpt(grand, vaddr);
266 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
267 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
268 i += (uint32_t) NPDEPG;
269 vaddr += NBPDPT;
270 continue;
271 }
272 }
273 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
274 if (pde == 0) {
275 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
276 }
277 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
278 i++;
279 vaddr += NBPDE;
280 }
281
282 PMAP_UPDATE_TLBS(grand, va_start, va_end);
283
284 PMAP_UNLOCK_EXCLUSIVE(grand);
285
286 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
287
288 return KERN_SUCCESS;
289 }
290
291 kern_return_t
pmap_unnest_options(pmap_t grand,addr64_t vaddr,__unused uint64_t size,__unused unsigned int options)292 pmap_unnest_options(
293 pmap_t grand,
294 addr64_t vaddr,
295 __unused uint64_t size,
296 __unused unsigned int options)
297 {
298 return pmap_unnest(grand, vaddr, size);
299 }
300
301 /* Invoked by the Mach VM to determine the platform specific unnest region */
302
303 boolean_t
pmap_adjust_unnest_parameters(pmap_t p,vm_map_offset_t * s,vm_map_offset_t * e)304 pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
305 {
306 pd_entry_t *pdpte;
307 boolean_t rval = FALSE;
308
309 PMAP_LOCK_EXCLUSIVE(p);
310
311 pdpte = pmap64_pdpt(p, *s);
312 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
313 *s &= ~(NBPDPT - 1);
314 rval = TRUE;
315 }
316
317 pdpte = pmap64_pdpt(p, *e);
318 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
319 *e = ((*e + NBPDPT) & ~(NBPDPT - 1));
320 rval = TRUE;
321 }
322
323 PMAP_UNLOCK_EXCLUSIVE(p);
324
325 return rval;
326 }
327
328 pmap_paddr_t
pmap_find_pa(pmap_t pmap,addr64_t va)329 pmap_find_pa(pmap_t pmap, addr64_t va)
330 {
331 pt_entry_t *ptp;
332 pd_entry_t *pdep;
333 pd_entry_t pde;
334 pt_entry_t pte;
335 boolean_t is_ept, locked = FALSE;
336 pmap_paddr_t pa = 0;
337
338 is_ept = is_ept_pmap(pmap);
339
340 if ((pmap != kernel_pmap) && not_in_kdp) {
341 PMAP_LOCK_EXCLUSIVE(pmap);
342 locked = TRUE;
343 } else {
344 mp_disable_preemption();
345 }
346
347 if (os_ref_get_count(&pmap->ref_count) == 0) {
348 goto pfp_exit;
349 }
350
351 pdep = pmap_pde(pmap, va);
352
353 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
354 if (pde & PTE_PS) {
355 pa = pte_to_pa(pde) + (va & I386_LPGMASK);
356 } else {
357 ptp = pmap_pte(pmap, va);
358 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
359 pa = pte_to_pa(pte) + (va & PAGE_MASK);
360 }
361 }
362 }
363 pfp_exit:
364 if (locked) {
365 PMAP_UNLOCK_EXCLUSIVE(pmap);
366 } else {
367 mp_enable_preemption();
368 }
369
370 return pa;
371 }
372
373 /*
374 * pmap_find_phys returns the (4K) physical page number containing a
375 * given virtual address in a given pmap.
376 * Note that pmap_pte may return a pde if this virtual address is
377 * mapped by a large page and this is taken into account in order
378 * to return the correct page number in this case.
379 */
380 ppnum_t
pmap_find_phys(pmap_t pmap,addr64_t va)381 pmap_find_phys(pmap_t pmap, addr64_t va)
382 {
383 ppnum_t ppn = 0;
384 pmap_paddr_t pa = 0;
385
386 pa = pmap_find_pa(pmap, va);
387 ppn = (ppnum_t) i386_btop(pa);
388
389 return ppn;
390 }
391
392 ppnum_t
pmap_find_phys_nofault(pmap_t pmap,addr64_t va)393 pmap_find_phys_nofault(pmap_t pmap, addr64_t va)
394 {
395 if ((pmap == kernel_pmap) ||
396 ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))) {
397 return pmap_find_phys(pmap, va);
398 }
399 return 0;
400 }
401
402 /*
403 * pmap_get_prot returns the equivalent Vm page protections
404 * set on a given address, 'va'. This function is used in the
405 * ml_static_verify_page_protections() routine which is used
406 * by the kext loading code to validate that the TEXT segment
407 * of a kext is mapped executable.
408 */
409 kern_return_t
pmap_get_prot(pmap_t pmap,addr64_t va,vm_prot_t * protp)410 pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp)
411 {
412 pt_entry_t *ptp;
413 pd_entry_t *pdep;
414 pd_entry_t pde;
415 pt_entry_t pte;
416 boolean_t is_ept, locked = FALSE;
417 kern_return_t retval = KERN_FAILURE;
418 vm_prot_t prot = 0;
419
420 is_ept = is_ept_pmap(pmap);
421
422 if ((pmap != kernel_pmap) && not_in_kdp) {
423 PMAP_LOCK_EXCLUSIVE(pmap);
424 locked = TRUE;
425 } else {
426 mp_disable_preemption();
427 }
428
429 if (os_ref_get_count(&pmap->ref_count) == 0) {
430 goto pfp_exit;
431 }
432
433 pdep = pmap_pde(pmap, va);
434
435 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
436 if (pde & PTE_PS) {
437 prot = VM_PROT_READ;
438
439 if (pde & PTE_WRITE(is_ept)) {
440 prot |= VM_PROT_WRITE;
441 }
442 if (PTE_IS_EXECUTABLE(is_ept, pde)) {
443 prot |= VM_PROT_EXECUTE;
444 }
445 retval = KERN_SUCCESS;
446 } else {
447 ptp = pmap_pte(pmap, va);
448 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
449 prot = VM_PROT_READ;
450
451 if (pte & PTE_WRITE(is_ept)) {
452 prot |= VM_PROT_WRITE;
453 }
454 if (PTE_IS_EXECUTABLE(is_ept, pte)) {
455 prot |= VM_PROT_EXECUTE;
456 }
457 retval = KERN_SUCCESS;
458 }
459 }
460 }
461
462 pfp_exit:
463 if (locked) {
464 PMAP_UNLOCK_EXCLUSIVE(pmap);
465 } else {
466 mp_enable_preemption();
467 }
468
469 if (protp) {
470 *protp = prot;
471 }
472
473 return retval;
474 }
475
476 /*
477 * Update cache attributes for all extant managed mappings.
478 * Assumes PV for this page is locked, and that the page
479 * is managed. We assume that this physical page may be mapped in
480 * both EPT and normal Intel PTEs, so we convert the attributes
481 * to the corresponding format for each pmap.
482 *
483 * We assert that the passed set of attributes is a subset of the
484 * PHYS_CACHEABILITY_MASK.
485 */
486 void
pmap_update_cache_attributes_locked(ppnum_t pn,unsigned attributes)487 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
488 {
489 pv_rooted_entry_t pv_h, pv_e;
490 pv_hashed_entry_t pvh_e, nexth;
491 vm_map_offset_t vaddr;
492 pmap_t pmap;
493 pt_entry_t *ptep;
494 boolean_t is_ept;
495 unsigned ept_attributes;
496
497 assert(IS_MANAGED_PAGE(pn));
498 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
499
500 /* We don't support the PAT bit for EPT PTEs */
501 if (attributes & INTEL_PTE_NCACHE) {
502 ept_attributes = INTEL_EPT_NCACHE;
503 } else {
504 ept_attributes = INTEL_EPT_WB;
505 }
506
507 pv_h = pai_to_pvh(pn);
508 /* TODO: translate the PHYS_* bits to PTE bits, while they're
509 * currently identical, they may not remain so
510 * Potential optimization (here and in page_protect),
511 * parallel shootdowns, check for redundant
512 * attribute modifications.
513 */
514
515 /*
516 * Alter attributes on all mappings
517 */
518 if (pv_h->pmap != PMAP_NULL) {
519 pv_e = pv_h;
520 pvh_e = (pv_hashed_entry_t)pv_e;
521
522 do {
523 pmap = pv_e->pmap;
524 vaddr = PVE_VA(pv_e);
525 ptep = pmap_pte(pmap, vaddr);
526
527 if (0 == ptep) {
528 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
529 }
530
531 is_ept = is_ept_pmap(pmap);
532
533 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
534 if (!is_ept) {
535 pmap_update_pte(is_ept, ptep, PHYS_CACHEABILITY_MASK, attributes, true);
536 } else {
537 pmap_update_pte(is_ept, ptep, INTEL_EPT_CACHE_MASK, ept_attributes, true);
538 }
539 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
540 pvh_e = nexth;
541 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
542 }
543 }
544
545 void
x86_filter_TLB_coherency_interrupts(boolean_t dofilter)546 x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
547 {
548 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
549
550 if (dofilter) {
551 CPU_CR3_MARK_INACTIVE();
552 } else {
553 CPU_CR3_MARK_ACTIVE();
554 mfence();
555 pmap_update_interrupt();
556 }
557 }
558
559
560 /*
561 * Insert the given physical page (p) at
562 * the specified virtual address (v) in the
563 * target physical map with the protection requested.
564 *
565 * If specified, the page will be wired down, meaning
566 * that the related pte cannot be reclaimed.
567 *
568 * NB: This is the only routine which MAY NOT lazy-evaluate
569 * or lose information. That is, this routine must actually
570 * insert this page into the given map NOW.
571 */
572
573 kern_return_t
pmap_enter(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired)574 pmap_enter(
575 pmap_t pmap,
576 vm_map_offset_t vaddr,
577 ppnum_t pn,
578 vm_prot_t prot,
579 vm_prot_t fault_type,
580 unsigned int flags,
581 boolean_t wired)
582 {
583 return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
584 }
585
586 #define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
587
588 static inline void PTE_LOCK_LOCK(pt_entry_t *);
589 static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
590
591 void
PTE_LOCK_LOCK(pt_entry_t * lpte)592 PTE_LOCK_LOCK(pt_entry_t *lpte)
593 {
594 pt_entry_t pte;
595 plretry:
596 while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
597 __builtin_ia32_pause();
598 }
599 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
600 return;
601 }
602
603 goto plretry;
604 }
605
606 void
PTE_LOCK_UNLOCK(pt_entry_t * lpte)607 PTE_LOCK_UNLOCK(pt_entry_t *lpte)
608 {
609 __c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
610 }
611
612 kern_return_t
pmap_enter_options_addr(pmap_t pmap,vm_map_address_t v,pmap_paddr_t pa,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,__unused void * arg)613 pmap_enter_options_addr(
614 pmap_t pmap,
615 vm_map_address_t v,
616 pmap_paddr_t pa,
617 vm_prot_t prot,
618 vm_prot_t fault_type,
619 unsigned int flags,
620 boolean_t wired,
621 unsigned int options,
622 __unused void *arg)
623 {
624 return pmap_enter_options(pmap, v, intel_btop(pa), prot, fault_type, flags, wired, options, arg);
625 }
626
627 kern_return_t
pmap_enter_options(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,__unused vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,void * arg)628 pmap_enter_options(
629 pmap_t pmap,
630 vm_map_offset_t vaddr,
631 ppnum_t pn,
632 vm_prot_t prot,
633 __unused vm_prot_t fault_type,
634 unsigned int flags,
635 boolean_t wired,
636 unsigned int options,
637 void *arg)
638 {
639 pt_entry_t *pte = NULL;
640 pv_rooted_entry_t pv_h;
641 ppnum_t pai;
642 pv_hashed_entry_t pvh_e;
643 pv_hashed_entry_t pvh_new;
644 pt_entry_t template;
645 pmap_paddr_t old_pa;
646 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
647 boolean_t need_tlbflush = FALSE;
648 boolean_t set_NX;
649 char oattr;
650 boolean_t old_pa_locked;
651 /* 2MiB mappings are confined to x86_64 by VM */
652 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
653 vm_object_t delpage_pm_obj = NULL;
654 uint64_t delpage_pde_index = 0;
655 pt_entry_t old_pte;
656 kern_return_t kr = KERN_FAILURE;
657 boolean_t is_ept;
658 boolean_t is_altacct;
659 boolean_t ptelocked = FALSE;
660
661 pmap_intr_assert();
662
663 if (__improbable(pmap == PMAP_NULL)) {
664 return KERN_INVALID_ARGUMENT;
665 }
666 if (__improbable(pn == vm_page_guard_addr)) {
667 return KERN_INVALID_ARGUMENT;
668 }
669
670 is_ept = is_ept_pmap(pmap);
671
672 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
673 * unused value for that scenario.
674 */
675 assert(pn != vm_page_fictitious_addr);
676
677
678 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
679 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
680 prot);
681
682 if ((prot & VM_PROT_EXECUTE) || __improbable(is_ept && (prot & VM_PROT_UEXEC))) {
683 set_NX = FALSE;
684 } else {
685 set_NX = TRUE;
686 }
687
688 #if DEVELOPMENT || DEBUG
689 if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
690 set_NX = FALSE;
691 }
692
693 if (__improbable(set_NX && (pmap == kernel_pmap) &&
694 ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
695 (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
696 set_NX = FALSE;
697 }
698 #endif
699
700 pvh_new = PV_HASHED_ENTRY_NULL;
701 Retry:
702 pvh_e = PV_HASHED_ENTRY_NULL;
703
704 PMAP_LOCK_SHARED(pmap);
705
706 /*
707 * Expand pmap to include this pte. Assume that
708 * pmap is always expanded to include enough hardware
709 * pages to map one VM page.
710 */
711 if (__improbable(superpage)) {
712 while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
713 /* need room for another pde entry */
714 PMAP_UNLOCK_SHARED(pmap);
715 kr = pmap_expand_pdpt(pmap, vaddr, options);
716 if (kr != KERN_SUCCESS) {
717 goto done1;
718 }
719 PMAP_LOCK_SHARED(pmap);
720 }
721 } else {
722 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
723 /*
724 * Must unlock to expand the pmap
725 * going to grow pde level page(s)
726 */
727 PMAP_UNLOCK_SHARED(pmap);
728 kr = pmap_expand(pmap, vaddr, options);
729 if (kr != KERN_SUCCESS) {
730 goto done1;
731 }
732 PMAP_LOCK_SHARED(pmap);
733 }
734 }
735
736 if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
737 PMAP_UNLOCK_SHARED(pmap);
738 kr = KERN_SUCCESS;
739 goto done1;
740 }
741
742 if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
743 /*
744 * There is still an empty page table mapped that
745 * was used for a previous base page mapping.
746 * Remember the PDE and the PDE index, so that we
747 * can free the page at the end of this function.
748 */
749 delpage_pde_index = pdeidx(pmap, vaddr);
750 delpage_pm_obj = pmap->pm_obj;
751 pmap_store_pte(is_ept, pte, 0);
752 }
753
754 PTE_LOCK_LOCK(pte);
755 ptelocked = TRUE;
756
757 old_pa = pte_to_pa(*pte);
758 pai = pa_index(old_pa);
759 old_pa_locked = FALSE;
760
761 if (old_pa == 0 &&
762 PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
763 /*
764 * "pmap" should be locked at this point, so this should
765 * not race with another pmap_enter() or pmap_remove_range().
766 */
767 assert(pmap != kernel_pmap);
768
769 /* one less "compressed" */
770 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
771 PAGE_SIZE);
772 if (*pte & PTE_COMPRESSED_ALT) {
773 pmap_ledger_debit(
774 pmap,
775 task_ledgers.alternate_accounting_compressed,
776 PAGE_SIZE);
777 } else {
778 /* was part of the footprint */
779 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
780 PAGE_SIZE);
781 }
782 /* marker will be cleared below */
783 }
784
785 /*
786 * if we have a previous managed page, lock the pv entry now. after
787 * we lock it, check to see if someone beat us to the lock and if so
788 * drop the lock
789 */
790 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
791 LOCK_PVH(pai);
792 old_pa_locked = TRUE;
793 old_pa = pte_to_pa(*pte);
794 if (0 == old_pa) {
795 UNLOCK_PVH(pai); /* another path beat us to it */
796 old_pa_locked = FALSE;
797 }
798 }
799
800 /*
801 * Special case if the incoming physical page is already mapped
802 * at this address.
803 */
804 if (old_pa == pa) {
805 pt_entry_t old_attributes =
806 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
807
808 /*
809 * May be changing its wired attribute or protection
810 */
811
812 template = pa_to_pte(pa);
813
814 if (__probable(!is_ept)) {
815 template |= INTEL_PTE_VALID;
816 } else {
817 template |= INTEL_EPT_IPAT;
818 }
819
820 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
821
822 /*
823 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
824 */
825 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
826 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
827 if (!(flags & VM_MEM_GUARDED)) {
828 template |= INTEL_PTE_PAT;
829 }
830 template |= INTEL_PTE_NCACHE;
831 }
832 if (pmap != kernel_pmap && !is_ept) {
833 template |= INTEL_PTE_USER;
834 }
835
836 if (prot & VM_PROT_READ) {
837 template |= PTE_READ(is_ept);
838 }
839
840 if (prot & VM_PROT_WRITE) {
841 template |= PTE_WRITE(is_ept);
842 if (is_ept && !pmap_ept_support_ad) {
843 template |= PTE_MOD(is_ept);
844 if (old_pa_locked) {
845 assert(IS_MANAGED_PAGE(pai));
846 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
847 }
848 }
849 }
850
851 if (prot & VM_PROT_EXECUTE) {
852 assert(set_NX == 0);
853 template = pte_set_ex(template, is_ept);
854 }
855
856 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
857 assert(set_NX == 0);
858 template = pte_set_uex(template);
859 }
860
861 if (set_NX) {
862 template = pte_remove_ex(template, is_ept);
863 }
864
865 if (wired) {
866 template |= PTE_WIRED;
867 if (!iswired(old_attributes)) {
868 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
869 }
870 } else {
871 if (iswired(old_attributes)) {
872 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
873 }
874 }
875
876 if (superpage) { /* this path can not be used */
877 template |= PTE_PS; /* to change the page size! */
878 }
879 if (old_attributes == template) {
880 goto dont_update_pte;
881 }
882
883 /* Determine delta, PV locked */
884 need_tlbflush =
885 ((old_attributes ^ template) != PTE_WIRED);
886
887 /* Optimisation: avoid TLB flush when adding writability */
888 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
889 if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
890 need_tlbflush = FALSE;
891 }
892 }
893
894 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
895 if (__improbable(is_ept && !pmap_ept_support_ad)) {
896 template |= PTE_REF(is_ept);
897 if (old_pa_locked) {
898 assert(IS_MANAGED_PAGE(pai));
899 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
900 }
901 }
902
903 /* store modified PTE and preserve RC bits */
904 pt_entry_t npte, opte;
905
906 assert((*pte & PTE_LOCK(is_ept)) != 0);
907
908 do {
909 opte = *pte;
910 npte = template | (opte & (PTE_REF(is_ept) |
911 PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
912 } while (!pmap_cmpx_pte(pte, opte, npte));
913
914 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, opte, uint64_t, npte);
915
916 dont_update_pte:
917 if (old_pa_locked) {
918 UNLOCK_PVH(pai);
919 old_pa_locked = FALSE;
920 }
921 goto done2;
922 }
923
924 /*
925 * Outline of code from here:
926 * 1) If va was mapped, update TLBs, remove the mapping
927 * and remove old pvlist entry.
928 * 2) Add pvlist entry for new mapping
929 * 3) Enter new mapping.
930 *
931 * If the old physical page is not managed step 1) is skipped
932 * (except for updating the TLBs), and the mapping is
933 * overwritten at step 3). If the new physical page is not
934 * managed, step 2) is skipped.
935 */
936 /* TODO: add opportunistic refmod collect */
937 if (old_pa != (pmap_paddr_t) 0) {
938 boolean_t was_altacct = FALSE;
939
940 /*
941 * Don't do anything to pages outside valid memory here.
942 * Instead convince the code that enters a new mapping
943 * to overwrite the old one.
944 */
945
946 /* invalidate the PTE */
947 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
948 /* propagate invalidate everywhere */
949 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
950 /* remember reference and change */
951 old_pte = *pte;
952 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
953 /* completely invalidate the PTE */
954 pmap_store_pte(is_ept, pte, PTE_LOCK(is_ept));
955
956 if (IS_MANAGED_PAGE(pai)) {
957 /*
958 * Remove the mapping from the pvlist for
959 * this physical page.
960 * We'll end up with either a rooted pv or a
961 * hashed pv
962 */
963 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
964 }
965
966 if (IS_MANAGED_PAGE(pai)) {
967 pmap_assert(old_pa_locked == TRUE);
968 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
969 if (pmap != kernel_pmap) {
970 /* update ledgers */
971 if (was_altacct) {
972 assert(IS_INTERNAL_PAGE(pai));
973 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
974 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
975 } else if (IS_REUSABLE_PAGE(pai)) {
976 assert(!was_altacct);
977 assert(IS_INTERNAL_PAGE(pai));
978 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
979 /* was already not in phys_footprint */
980 } else if (IS_INTERNAL_PAGE(pai)) {
981 assert(!was_altacct);
982 assert(!IS_REUSABLE_PAGE(pai));
983 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
984 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
985 } else {
986 /* not an internal page */
987 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
988 }
989 }
990 if (iswired(*pte)) {
991 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
992 PAGE_SIZE);
993 }
994
995 if (!is_ept) {
996 pmap_phys_attributes[pai] |= oattr;
997 } else {
998 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
999 }
1000 } else {
1001 /*
1002 * old_pa is not managed.
1003 * Do removal part of accounting.
1004 */
1005
1006 if (pmap != kernel_pmap) {
1007 #if 00
1008 assert(pmap->stats.device > 0);
1009 OSAddAtomic(-1, &pmap->stats.device);
1010 #endif
1011 }
1012 if (iswired(*pte)) {
1013 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1014 }
1015 }
1016 }
1017
1018 /*
1019 * if we had a previously managed paged locked, unlock it now
1020 */
1021 if (old_pa_locked) {
1022 UNLOCK_PVH(pai);
1023 old_pa_locked = FALSE;
1024 }
1025
1026 pai = pa_index(pa); /* now working with new incoming phys page */
1027 if (IS_MANAGED_PAGE(pai)) {
1028 /*
1029 * Step 2) Enter the mapping in the PV list for this
1030 * physical page.
1031 */
1032 pv_h = pai_to_pvh(pai);
1033
1034 LOCK_PVH(pai);
1035
1036 if (pv_h->pmap == PMAP_NULL) {
1037 /*
1038 * No mappings yet, use rooted pv
1039 */
1040 pv_h->va_and_flags = vaddr;
1041 pv_h->pmap = pmap;
1042 queue_init(&pv_h->qlink);
1043
1044 if (options & PMAP_OPTIONS_INTERNAL) {
1045 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
1046 } else {
1047 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
1048 }
1049 if (options & PMAP_OPTIONS_REUSABLE) {
1050 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1051 } else {
1052 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1053 }
1054 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1055 IS_INTERNAL_PAGE(pai)) {
1056 pv_h->va_and_flags |= PVE_IS_ALTACCT;
1057 is_altacct = TRUE;
1058 } else {
1059 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
1060 is_altacct = FALSE;
1061 }
1062 } else {
1063 /*
1064 * Add new pv_hashed_entry after header.
1065 */
1066 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
1067 pvh_e = pvh_new;
1068 pvh_new = PV_HASHED_ENTRY_NULL;
1069 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
1070 PV_HASHED_ALLOC(&pvh_e);
1071 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1072 /*
1073 * the pv list is empty. if we are on
1074 * the kernel pmap we'll use one of
1075 * the special private kernel pv_e's,
1076 * else, we need to unlock
1077 * everything, zalloc a pv_e, and
1078 * restart bringing in the pv_e with
1079 * us.
1080 */
1081 if (kernel_pmap == pmap) {
1082 PV_HASHED_KERN_ALLOC(&pvh_e);
1083 } else {
1084 UNLOCK_PVH(pai);
1085 PTE_LOCK_UNLOCK(pte);
1086 PMAP_UNLOCK_SHARED(pmap);
1087 pmap_pv_throttle(pmap);
1088 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1089 goto Retry;
1090 }
1091 }
1092 }
1093
1094 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1095 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1096 }
1097
1098 pvh_e->va_and_flags = vaddr;
1099 pvh_e->pmap = pmap;
1100 pvh_e->ppn = pn;
1101 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1102 IS_INTERNAL_PAGE(pai)) {
1103 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
1104 is_altacct = TRUE;
1105 } else {
1106 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
1107 is_altacct = FALSE;
1108 }
1109 pv_hash_add(pvh_e, pv_h);
1110
1111 /*
1112 * Remember that we used the pvlist entry.
1113 */
1114 pvh_e = PV_HASHED_ENTRY_NULL;
1115 }
1116
1117 /*
1118 * only count the mapping
1119 * for 'managed memory'
1120 */
1121 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1122 if (pmap != kernel_pmap) {
1123 /* update ledgers */
1124 if (is_altacct) {
1125 /* internal but also alternate accounting */
1126 assert(IS_INTERNAL_PAGE(pai));
1127 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1128 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1129 /* alternate accounting, so not in footprint */
1130 } else if (IS_REUSABLE_PAGE(pai)) {
1131 assert(!is_altacct);
1132 assert(IS_INTERNAL_PAGE(pai));
1133 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
1134 /* internal but reusable: not in footprint */
1135 } else if (IS_INTERNAL_PAGE(pai)) {
1136 assert(!is_altacct);
1137 assert(!IS_REUSABLE_PAGE(pai));
1138 /* internal: add to footprint */
1139 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1140 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1141 } else {
1142 /* not internal: not in footprint */
1143 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
1144 }
1145 }
1146 } else if (last_managed_page == 0) {
1147 /* Account for early mappings created before "managed pages"
1148 * are determined. Consider consulting the available DRAM map.
1149 */
1150 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1151 if (pmap != kernel_pmap) {
1152 #if 00
1153 OSAddAtomic(+1, &pmap->stats.device);
1154 PMAP_STATS_PEAK(pmap->stats.device);
1155 #endif
1156 }
1157 }
1158 /*
1159 * Step 3) Enter the mapping.
1160 *
1161 * Build a template to speed up entering -
1162 * only the pfn changes.
1163 */
1164 template = pa_to_pte(pa);
1165
1166 if (!is_ept) {
1167 template |= INTEL_PTE_VALID;
1168 } else {
1169 template |= INTEL_EPT_IPAT;
1170 }
1171
1172 /*
1173 * DRK: It may be worth asserting on cache attribute flags that diverge
1174 * from the existing physical page attributes.
1175 */
1176
1177 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1178
1179 /*
1180 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1181 */
1182 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
1183 if (!(flags & VM_MEM_GUARDED)) {
1184 template |= INTEL_PTE_PAT;
1185 }
1186 template |= INTEL_PTE_NCACHE;
1187 }
1188 if (pmap != kernel_pmap && !is_ept) {
1189 template |= INTEL_PTE_USER;
1190 }
1191 if (prot & VM_PROT_READ) {
1192 template |= PTE_READ(is_ept);
1193 }
1194 if (prot & VM_PROT_WRITE) {
1195 template |= PTE_WRITE(is_ept);
1196 if (is_ept && !pmap_ept_support_ad) {
1197 template |= PTE_MOD(is_ept);
1198 if (IS_MANAGED_PAGE(pai)) {
1199 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
1200 }
1201 }
1202 }
1203 if (prot & VM_PROT_EXECUTE) {
1204 assert(set_NX == 0);
1205 template = pte_set_ex(template, is_ept);
1206 }
1207 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
1208 assert(set_NX == 0);
1209 template = pte_set_uex(template);
1210 }
1211
1212 if (set_NX) {
1213 template = pte_remove_ex(template, is_ept);
1214 }
1215 if (wired) {
1216 template |= INTEL_PTE_WIRED;
1217 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1218 }
1219 if (__improbable(superpage)) {
1220 template |= INTEL_PTE_PS;
1221 }
1222
1223 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1224 if (__improbable(is_ept && !pmap_ept_support_ad)) {
1225 template |= PTE_REF(is_ept);
1226 if (IS_MANAGED_PAGE(pai)) {
1227 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
1228 }
1229 }
1230 template |= PTE_LOCK(is_ept);
1231 pmap_store_pte(is_ept, pte, template);
1232 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, 0, uint64_t, template);
1233
1234 /*
1235 * if this was a managed page we delayed unlocking the pv until here
1236 * to prevent pmap_page_protect et al from finding it until the pte
1237 * has been stored
1238 */
1239 if (IS_MANAGED_PAGE(pai)) {
1240 UNLOCK_PVH(pai);
1241 }
1242 done2:
1243 if (need_tlbflush == TRUE) {
1244 if (options & PMAP_OPTIONS_NOFLUSH) {
1245 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1246 } else {
1247 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1248 }
1249 }
1250 if (ptelocked) {
1251 PTE_LOCK_UNLOCK(pte);
1252 }
1253 PMAP_UNLOCK_SHARED(pmap);
1254
1255 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1256 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1257 }
1258 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1259 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1260 }
1261
1262 if (delpage_pm_obj) {
1263 vm_page_t m;
1264
1265 vm_object_lock(delpage_pm_obj);
1266 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1267 if (m == VM_PAGE_NULL) {
1268 panic("pmap_enter: pte page not in object");
1269 }
1270 VM_PAGE_FREE(m);
1271 vm_object_unlock(delpage_pm_obj);
1272 OSAddAtomic(-1, &inuse_ptepages_count);
1273 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1274 }
1275
1276 kr = KERN_SUCCESS;
1277 done1:
1278 if (__improbable((kr == KERN_SUCCESS) && (pmap == kernel_pmap) &&
1279 zone_spans_ro_va(vaddr, vaddr + PAGE_SIZE))) {
1280 pmap_page_protect((ppnum_t)atop_kernel(kvtophys(vaddr)), VM_PROT_READ);
1281 }
1282 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
1283 return kr;
1284 }
1285
1286 /*
1287 * Remove a range of hardware page-table entries.
1288 * The entries given are the first (inclusive)
1289 * and last (exclusive) entries for the VM pages.
1290 * The virtual address is the va for the first pte.
1291 *
1292 * The pmap must be locked.
1293 * If the pmap is not the kernel pmap, the range must lie
1294 * entirely within one pte-page. This is NOT checked.
1295 * Assumes that the pte-page exists.
1296 */
1297
1298 void
pmap_remove_range(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte)1299 pmap_remove_range(
1300 pmap_t pmap,
1301 vm_map_offset_t start_vaddr,
1302 pt_entry_t *spte,
1303 pt_entry_t *epte)
1304 {
1305 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1306 PMAP_OPTIONS_REMOVE);
1307 }
1308
1309 static void
pmap_remove_range_options(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte,int options)1310 pmap_remove_range_options(
1311 pmap_t pmap,
1312 vm_map_offset_t start_vaddr,
1313 pt_entry_t *spte,
1314 pt_entry_t *epte,
1315 int options)
1316 {
1317 pt_entry_t *cpte;
1318 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1319 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1320 pv_hashed_entry_t pvh_e;
1321 int pvh_cnt = 0;
1322 int num_removed, num_unwired, num_found, num_invalid;
1323 int ledgers_external, ledgers_reusable, ledgers_internal, ledgers_alt_internal;
1324 uint64_t ledgers_compressed, ledgers_alt_compressed;
1325 ppnum_t pai;
1326 pmap_paddr_t pa;
1327 vm_map_offset_t vaddr;
1328 boolean_t is_ept = is_ept_pmap(pmap);
1329 boolean_t was_altacct;
1330
1331 num_removed = 0;
1332 num_unwired = 0;
1333 num_found = 0;
1334 num_invalid = 0;
1335 ledgers_external = 0;
1336 ledgers_reusable = 0;
1337 ledgers_internal = 0;
1338 ledgers_compressed = 0;
1339 ledgers_alt_internal = 0;
1340 ledgers_alt_compressed = 0;
1341
1342 /* invalidate the PTEs first to "freeze" them */
1343 for (cpte = spte, vaddr = start_vaddr;
1344 cpte < epte;
1345 cpte++, vaddr += PAGE_SIZE_64) {
1346 pt_entry_t p = *cpte;
1347
1348 pa = pte_to_pa(p);
1349 if (pa == 0) {
1350 if ((options & PMAP_OPTIONS_REMOVE) &&
1351 (PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
1352 assert(pmap != kernel_pmap);
1353 /* one less "compressed"... */
1354 ledgers_compressed++;
1355 if (p & PTE_COMPRESSED_ALT) {
1356 /* ... but it used to be "ALTACCT" */
1357 ledgers_alt_compressed++;
1358 }
1359 /* clear marker(s) */
1360 /* XXX probably does not need to be atomic! */
1361 pmap_update_pte(is_ept, cpte, INTEL_PTE_COMPRESSED_MASK, 0, true);
1362 }
1363 continue;
1364 }
1365 num_found++;
1366
1367 if (iswired(p)) {
1368 num_unwired++;
1369 }
1370
1371 pai = pa_index(pa);
1372
1373 if (!IS_MANAGED_PAGE(pai)) {
1374 /*
1375 * Outside range of managed physical memory.
1376 * Just remove the mappings.
1377 */
1378 pmap_store_pte(is_ept, cpte, 0);
1379 continue;
1380 }
1381
1382 if ((p & PTE_VALID_MASK(is_ept)) == 0) {
1383 num_invalid++;
1384 }
1385
1386 /* invalidate the PTE */
1387 pmap_update_pte(is_ept, cpte, PTE_VALID_MASK(is_ept), 0, true);
1388 }
1389
1390 if (num_found == 0) {
1391 /* nothing was changed: we're done */
1392 goto update_counts;
1393 }
1394
1395 /* propagate the invalidates to other CPUs */
1396
1397 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1398
1399 for (cpte = spte, vaddr = start_vaddr;
1400 cpte < epte;
1401 cpte++, vaddr += PAGE_SIZE_64) {
1402 pa = pte_to_pa(*cpte);
1403 if (pa == 0) {
1404 check_pte_for_compressed_marker:
1405 /*
1406 * This PTE could have been replaced with a
1407 * "compressed" marker after our first "freeze"
1408 * loop above, so check again.
1409 */
1410 if ((options & PMAP_OPTIONS_REMOVE) &&
1411 (PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
1412 assert(pmap != kernel_pmap);
1413 /* one less "compressed"... */
1414 ledgers_compressed++;
1415 if (*cpte & PTE_COMPRESSED_ALT) {
1416 /* ... but it used to be "ALTACCT" */
1417 ledgers_alt_compressed++;
1418 }
1419 pmap_store_pte(is_ept, cpte, 0);
1420 }
1421 continue;
1422 }
1423
1424 pai = pa_index(pa);
1425
1426 LOCK_PVH(pai);
1427
1428 pa = pte_to_pa(*cpte);
1429 if (pa == 0) {
1430 UNLOCK_PVH(pai);
1431 goto check_pte_for_compressed_marker;
1432 }
1433
1434 /*
1435 * Remove the mapping from the pvlist for this physical page.
1436 */
1437 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1438
1439 num_removed++;
1440 /* update ledgers */
1441 if (was_altacct) {
1442 /* internal and alternate accounting */
1443 assert(IS_INTERNAL_PAGE(pai));
1444 ledgers_internal++;
1445 ledgers_alt_internal++;
1446 } else if (IS_REUSABLE_PAGE(pai)) {
1447 /* internal but reusable */
1448 assert(!was_altacct);
1449 assert(IS_INTERNAL_PAGE(pai));
1450 ledgers_reusable++;
1451 } else if (IS_INTERNAL_PAGE(pai)) {
1452 /* internal */
1453 assert(!was_altacct);
1454 assert(!IS_REUSABLE_PAGE(pai));
1455 ledgers_internal++;
1456 } else {
1457 /* not internal */
1458 ledgers_external++;
1459 }
1460
1461 /*
1462 * Get the modify and reference bits, then
1463 * nuke the entry in the page table
1464 */
1465 /* remember reference and change */
1466 if (!is_ept) {
1467 pmap_phys_attributes[pai] |=
1468 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
1469 } else {
1470 pmap_phys_attributes[pai] |=
1471 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1472 }
1473
1474 /* completely invalidate the PTE */
1475 pmap_store_pte(is_ept, cpte, 0);
1476
1477 UNLOCK_PVH(pai);
1478
1479 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1480 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1481 pvh_eh = pvh_e;
1482
1483 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1484 pvh_et = pvh_e;
1485 }
1486 pvh_cnt++;
1487 }
1488 /* We can encounter at most 'num_found' PTEs for this level
1489 * Fewer may be encountered if some were replaced by
1490 * compressed markers. No new valid PTEs can be created
1491 * since the pmap lock is held exclusively.
1492 */
1493 if (num_removed == num_found) {
1494 break;
1495 }
1496 } /* for loop */
1497
1498 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1499 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1500 }
1501 update_counts:
1502 /*
1503 * Update the counts
1504 */
1505 #if TESTING
1506 if (pmap->stats.resident_count < num_removed) {
1507 panic("pmap_remove_range: resident_count");
1508 }
1509 #endif
1510 if (num_removed) {
1511 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1512 }
1513
1514 if (pmap != kernel_pmap) {
1515 if (ledgers_external) {
1516 pmap_ledger_debit(pmap,
1517 task_ledgers.external,
1518 machine_ptob(ledgers_external));
1519 }
1520 if (ledgers_reusable) {
1521 pmap_ledger_debit(pmap,
1522 task_ledgers.reusable,
1523 machine_ptob(ledgers_reusable));
1524 }
1525 if (ledgers_internal) {
1526 pmap_ledger_debit(pmap,
1527 task_ledgers.internal,
1528 machine_ptob(ledgers_internal));
1529 }
1530 if (ledgers_compressed) {
1531 pmap_ledger_debit(pmap,
1532 task_ledgers.internal_compressed,
1533 machine_ptob(ledgers_compressed));
1534 }
1535 if (ledgers_alt_internal) {
1536 pmap_ledger_debit(pmap,
1537 task_ledgers.alternate_accounting,
1538 machine_ptob(ledgers_alt_internal));
1539 }
1540 if (ledgers_alt_compressed) {
1541 pmap_ledger_debit(pmap,
1542 task_ledgers.alternate_accounting_compressed,
1543 machine_ptob(ledgers_alt_compressed));
1544 }
1545
1546 uint64_t net_debit = (ledgers_internal - ledgers_alt_internal) + (ledgers_compressed - ledgers_alt_compressed);
1547 if (net_debit) {
1548 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(net_debit));
1549 }
1550 }
1551
1552 if (num_unwired != 0) {
1553 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1554 }
1555 return;
1556 }
1557
1558
1559 /*
1560 * Remove the given range of addresses
1561 * from the specified map.
1562 *
1563 * It is assumed that the start and end are properly
1564 * rounded to the hardware page size.
1565 */
1566 void
pmap_remove(pmap_t map,addr64_t s64,addr64_t e64)1567 pmap_remove(
1568 pmap_t map,
1569 addr64_t s64,
1570 addr64_t e64)
1571 {
1572 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1573 }
1574 #define PLCHECK_THRESHOLD (2)
1575
1576 void
pmap_remove_options(pmap_t map,addr64_t s64,addr64_t e64,int options)1577 pmap_remove_options(
1578 pmap_t map,
1579 addr64_t s64,
1580 addr64_t e64,
1581 int options)
1582 {
1583 pt_entry_t *pde;
1584 pt_entry_t *spte, *epte;
1585 addr64_t l64;
1586 uint64_t deadline = 0;
1587 boolean_t is_ept;
1588
1589 pmap_intr_assert();
1590
1591 if (map == PMAP_NULL || s64 == e64) {
1592 return;
1593 }
1594
1595 is_ept = is_ept_pmap(map);
1596
1597 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1598 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
1599 VM_KERNEL_ADDRHIDE(e64));
1600
1601 PMAP_LOCK_EXCLUSIVE(map);
1602 uint32_t traverse_count = 0;
1603
1604 while (s64 < e64) {
1605 pml4_entry_t *pml4e = pmap64_pml4(map, s64);
1606 if ((pml4e == NULL) ||
1607 ((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
1608 s64 = (s64 + NBPML4) & ~(PML4MASK);
1609 continue;
1610 }
1611 pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
1612 if ((pdpte == NULL) ||
1613 ((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
1614 s64 = (s64 + NBPDPT) & ~(PDPTMASK);
1615 continue;
1616 }
1617
1618 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
1619
1620 if (l64 > e64) {
1621 l64 = e64;
1622 }
1623
1624 pde = pmap_pde(map, s64);
1625
1626 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1627 if (*pde & PTE_PS) {
1628 /*
1629 * If we're removing a superpage, pmap_remove_range()
1630 * must work on level 2 instead of level 1; and we're
1631 * only passing a single level 2 entry instead of a
1632 * level 1 range.
1633 */
1634 spte = pde;
1635 epte = spte + 1; /* excluded */
1636 } else {
1637 spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
1638 spte = &spte[ptenum(s64)];
1639 epte = &spte[intel_btop(l64 - s64)];
1640 }
1641 pmap_remove_range_options(map, s64, spte, epte,
1642 options);
1643 }
1644 s64 = l64;
1645
1646 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
1647 if (deadline == 0) {
1648 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1649 } else {
1650 if (rdtsc64_nofence() > deadline) {
1651 PMAP_UNLOCK_EXCLUSIVE(map);
1652 __builtin_ia32_pause();
1653 PMAP_LOCK_EXCLUSIVE(map);
1654 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1655 }
1656 }
1657 }
1658 }
1659
1660 PMAP_UNLOCK_EXCLUSIVE(map);
1661
1662 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
1663 }
1664
1665 void
pmap_page_protect(ppnum_t pn,vm_prot_t prot)1666 pmap_page_protect(
1667 ppnum_t pn,
1668 vm_prot_t prot)
1669 {
1670 pmap_page_protect_options(pn, prot, 0, NULL);
1671 }
1672
1673 /*
1674 * Routine: pmap_page_protect_options
1675 *
1676 * Function:
1677 * Lower the permission for all mappings to a given
1678 * page.
1679 */
1680 void
pmap_page_protect_options(ppnum_t pn,vm_prot_t prot,unsigned int options,void * arg)1681 pmap_page_protect_options(
1682 ppnum_t pn,
1683 vm_prot_t prot,
1684 unsigned int options,
1685 void *arg)
1686 {
1687 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1688 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1689 pv_hashed_entry_t nexth;
1690 int pvh_cnt = 0;
1691 pv_rooted_entry_t pv_h;
1692 pv_rooted_entry_t pv_e;
1693 pv_hashed_entry_t pvh_e;
1694 pt_entry_t *pte;
1695 int pai;
1696 pmap_t pmap;
1697 boolean_t remove;
1698 pt_entry_t new_pte_value;
1699 boolean_t is_ept;
1700
1701 pmap_intr_assert();
1702 assert(pn != vm_page_fictitious_addr);
1703 if (pn == vm_page_guard_addr) {
1704 return;
1705 }
1706
1707 pai = ppn_to_pai(pn);
1708
1709 if (!IS_MANAGED_PAGE(pai)) {
1710 /*
1711 * Not a managed page.
1712 */
1713 return;
1714 }
1715
1716 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
1717
1718 /*
1719 * Determine the new protection.
1720 */
1721 switch (prot) {
1722 case VM_PROT_READ:
1723 case VM_PROT_READ | VM_PROT_EXECUTE:
1724 remove = FALSE;
1725 break;
1726 case VM_PROT_ALL:
1727 return; /* nothing to do */
1728 default:
1729 remove = TRUE;
1730 break;
1731 }
1732
1733 pv_h = pai_to_pvh(pai);
1734
1735 LOCK_PVH(pai);
1736
1737
1738 /*
1739 * Walk down PV list, if any, changing or removing all mappings.
1740 */
1741 if (pv_h->pmap == PMAP_NULL) {
1742 goto done;
1743 }
1744
1745 pv_e = pv_h;
1746 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1747
1748 do {
1749 vm_map_offset_t vaddr;
1750
1751 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1752 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1753 /* page was modified, so it will be compressed */
1754 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1755 options |= PMAP_OPTIONS_COMPRESSOR;
1756 }
1757
1758 pmap = pv_e->pmap;
1759 is_ept = is_ept_pmap(pmap);
1760 vaddr = PVE_VA(pv_e);
1761 pte = pmap_pte(pmap, vaddr);
1762
1763 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1764 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1765
1766 if (0 == pte) {
1767 panic("pmap_page_protect() "
1768 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1769 pmap, pn, vaddr);
1770 }
1771 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1772
1773 /*
1774 * Remove the mapping if new protection is NONE
1775 */
1776 if (remove) {
1777 /* Remove per-pmap wired count */
1778 if (iswired(*pte)) {
1779 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1780 }
1781
1782 if (pmap != kernel_pmap &&
1783 (options & PMAP_OPTIONS_COMPRESSOR) &&
1784 IS_INTERNAL_PAGE(pai)) {
1785 assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
1786 /* mark this PTE as having been "compressed" */
1787 new_pte_value = PTE_COMPRESSED;
1788 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1789 new_pte_value |= PTE_COMPRESSED_ALT;
1790 }
1791 } else {
1792 new_pte_value = 0;
1793 }
1794
1795 if (options & PMAP_OPTIONS_NOREFMOD) {
1796 pmap_store_pte(is_ept, pte, new_pte_value);
1797
1798 if (options & PMAP_OPTIONS_NOFLUSH) {
1799 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1800 } else {
1801 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1802 }
1803 } else {
1804 /*
1805 * Remove the mapping, collecting dirty bits.
1806 */
1807 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
1808
1809 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1810 if (!is_ept) {
1811 pmap_phys_attributes[pai] |=
1812 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1813 } else {
1814 pmap_phys_attributes[pai] |=
1815 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1816 }
1817 if ((options &
1818 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1819 IS_INTERNAL_PAGE(pai) &&
1820 (pmap_phys_attributes[pai] &
1821 PHYS_MODIFIED)) {
1822 /*
1823 * Page is actually "modified" and
1824 * will be compressed. Start
1825 * accounting for it as "compressed".
1826 */
1827 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
1828 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1829 options |= PMAP_OPTIONS_COMPRESSOR;
1830 assert(new_pte_value == 0);
1831 if (pmap != kernel_pmap) {
1832 new_pte_value = PTE_COMPRESSED;
1833 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1834 new_pte_value |= PTE_COMPRESSED_ALT;
1835 }
1836 }
1837 }
1838 pmap_store_pte(is_ept, pte, new_pte_value);
1839 }
1840
1841 #if TESTING
1842 if (pmap->stats.resident_count < 1) {
1843 panic("pmap_page_protect: resident_count");
1844 }
1845 #endif
1846 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1847
1848 /*
1849 * We only ever compress internal pages.
1850 */
1851 if (options & PMAP_OPTIONS_COMPRESSOR) {
1852 assert(IS_INTERNAL_PAGE(pai));
1853 }
1854 if (pmap != kernel_pmap) {
1855 /* update ledgers */
1856 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1857 assert(IS_INTERNAL_PAGE(pai));
1858 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1859 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1860 if (options & PMAP_OPTIONS_COMPRESSOR) {
1861 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1862 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1863 }
1864 } else if (IS_REUSABLE_PAGE(pai)) {
1865 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1866 assert(IS_INTERNAL_PAGE(pai));
1867 if (options & PMAP_OPTIONS_COMPRESSOR) {
1868 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1869 /* was not in footprint, but is now */
1870 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1871 }
1872 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
1873 } else if (IS_INTERNAL_PAGE(pai)) {
1874 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1875 assert(!IS_REUSABLE_PAGE(pai));
1876 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1877 /*
1878 * Update all stats related to physical
1879 * footprint, which only deals with
1880 * internal pages.
1881 */
1882 if (options & PMAP_OPTIONS_COMPRESSOR) {
1883 /*
1884 * This removal is only being
1885 * done so we can send this page
1886 * to the compressor; therefore
1887 * it mustn't affect total task
1888 * footprint.
1889 */
1890 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1891 } else {
1892 /*
1893 * This internal page isn't
1894 * going to the compressor,
1895 * so adjust stats to keep
1896 * phys_footprint up to date.
1897 */
1898 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1899 }
1900 } else {
1901 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
1902 }
1903 }
1904
1905 /*
1906 * Deal with the pv_rooted_entry.
1907 */
1908
1909 if (pv_e == pv_h) {
1910 /*
1911 * Fix up head later.
1912 */
1913 pv_h->pmap = PMAP_NULL;
1914 } else {
1915 /*
1916 * Delete this entry.
1917 */
1918 pv_hash_remove(pvh_e);
1919 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1920 pvh_eh = pvh_e;
1921
1922 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1923 pvh_et = pvh_e;
1924 }
1925 pvh_cnt++;
1926 }
1927 } else {
1928 /*
1929 * Write-protect, after opportunistic refmod collect
1930 */
1931 if (!is_ept) {
1932 pmap_phys_attributes[pai] |=
1933 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1934 } else {
1935 pmap_phys_attributes[pai] |=
1936 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1937 }
1938
1939 pmap_update_pte(is_ept, pte, PTE_WRITE(is_ept), 0, true);
1940 if (options & PMAP_OPTIONS_NOFLUSH) {
1941 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1942 } else {
1943 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1944 }
1945 }
1946 pvh_e = nexth;
1947 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1948
1949
1950 /*
1951 * If pv_head mapping was removed, fix it up.
1952 */
1953 if (pv_h->pmap == PMAP_NULL) {
1954 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1955
1956 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1957 pv_hash_remove(pvh_e);
1958 pv_h->pmap = pvh_e->pmap;
1959 pv_h->va_and_flags = pvh_e->va_and_flags;
1960 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1961 pvh_eh = pvh_e;
1962
1963 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1964 pvh_et = pvh_e;
1965 }
1966 pvh_cnt++;
1967 }
1968 }
1969 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1970 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1971 }
1972 done:
1973 UNLOCK_PVH(pai);
1974
1975 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
1976 }
1977
1978
1979 /*
1980 * Clear specified attribute bits.
1981 */
1982 void
phys_attribute_clear(ppnum_t pn,int bits,unsigned int options,void * arg)1983 phys_attribute_clear(
1984 ppnum_t pn,
1985 int bits,
1986 unsigned int options,
1987 void *arg)
1988 {
1989 pv_rooted_entry_t pv_h;
1990 pv_hashed_entry_t pv_e;
1991 pt_entry_t *pte = NULL;
1992 int pai;
1993 pmap_t pmap;
1994 char attributes = 0;
1995 boolean_t is_internal, is_reusable, is_altacct, is_ept;
1996 int ept_bits_to_clear;
1997 boolean_t ept_keep_global_mod = FALSE;
1998
1999 if ((bits & PHYS_MODIFIED) &&
2000 (options & PMAP_OPTIONS_NOFLUSH) &&
2001 arg == NULL) {
2002 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
2003 "should not clear 'modified' without flushing TLBs\n",
2004 pn, bits, options, arg);
2005 }
2006
2007 /* We only support converting MOD and REF bits for EPT PTEs in this function */
2008 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
2009
2010 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
2011
2012 pmap_intr_assert();
2013 assert(pn != vm_page_fictitious_addr);
2014 if (pn == vm_page_guard_addr) {
2015 return;
2016 }
2017
2018 pai = ppn_to_pai(pn);
2019
2020 if (!IS_MANAGED_PAGE(pai)) {
2021 /*
2022 * Not a managed page.
2023 */
2024 return;
2025 }
2026
2027 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
2028
2029 pv_h = pai_to_pvh(pai);
2030
2031 LOCK_PVH(pai);
2032
2033
2034 /*
2035 * Walk down PV list, clearing all modify or reference bits.
2036 * We do not have to lock the pv_list because we have
2037 * the per-pmap lock
2038 */
2039 if (pv_h->pmap != PMAP_NULL) {
2040 /*
2041 * There are some mappings.
2042 */
2043
2044 is_internal = IS_INTERNAL_PAGE(pai);
2045 is_reusable = IS_REUSABLE_PAGE(pai);
2046
2047 pv_e = (pv_hashed_entry_t)pv_h;
2048
2049 do {
2050 vm_map_offset_t va;
2051 char pte_bits;
2052
2053 pmap = pv_e->pmap;
2054 is_ept = is_ept_pmap(pmap);
2055 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
2056 va = PVE_VA(pv_e);
2057 pte_bits = 0;
2058
2059 if (bits) {
2060 pte = pmap_pte(pmap, va);
2061 /* grab ref/mod bits from this PTE */
2062 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
2063 /* propagate to page's global attributes */
2064 if (!is_ept) {
2065 attributes |= pte_bits;
2066 } else {
2067 attributes |= ept_refmod_to_physmap(pte_bits);
2068 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
2069 ept_keep_global_mod = TRUE;
2070 }
2071 }
2072 /* which bits to clear for this PTE? */
2073 if (!is_ept) {
2074 pte_bits &= bits;
2075 } else {
2076 pte_bits &= ept_bits_to_clear;
2077 }
2078 }
2079 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
2080 pte_bits |= PTE_WRITE(is_ept);
2081 }
2082
2083 /*
2084 * Clear modify and/or reference bits.
2085 */
2086 if (pte_bits) {
2087 pmap_update_pte(is_ept, pte, pte_bits, 0, true);
2088
2089 /* Ensure all processors using this translation
2090 * invalidate this TLB entry. The invalidation
2091 * *must* follow the PTE update, to ensure that
2092 * the TLB shadow of the 'D' bit (in particular)
2093 * is synchronized with the updated PTE.
2094 */
2095 if (!(options & PMAP_OPTIONS_NOFLUSH)) {
2096 /* flush TLBS now */
2097 PMAP_UPDATE_TLBS(pmap,
2098 va,
2099 va + PAGE_SIZE);
2100 } else if (arg) {
2101 /* delayed TLB flush: add "pmap" info */
2102 PMAP_UPDATE_TLBS_DELAYED(
2103 pmap,
2104 va,
2105 va + PAGE_SIZE,
2106 (pmap_flush_context *)arg);
2107 } else {
2108 /* no TLB flushing at all */
2109 }
2110 }
2111
2112 /* update pmap "reusable" stats */
2113 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2114 is_reusable &&
2115 pmap != kernel_pmap) {
2116 /* one less "reusable" */
2117 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
2118 if (is_internal) {
2119 /* one more "internal" */
2120 if (is_altacct) {
2121 /* no impact on ledgers */
2122 } else {
2123 pmap_ledger_credit(pmap,
2124 task_ledgers.internal,
2125 PAGE_SIZE);
2126 pmap_ledger_credit(
2127 pmap,
2128 task_ledgers.phys_footprint,
2129 PAGE_SIZE);
2130 }
2131 } else {
2132 /* one more "external" */
2133 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
2134 }
2135 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
2136 !is_reusable &&
2137 pmap != kernel_pmap) {
2138 /* one more "reusable" */
2139 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
2140 if (is_internal) {
2141 /* one less "internal" */
2142 if (is_altacct) {
2143 /* no impact on footprint */
2144 } else {
2145 pmap_ledger_debit(pmap,
2146 task_ledgers.internal,
2147 PAGE_SIZE);
2148 pmap_ledger_debit(
2149 pmap,
2150 task_ledgers.phys_footprint,
2151 PAGE_SIZE);
2152 }
2153 } else {
2154 /* one less "external" */
2155 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
2156 }
2157 }
2158
2159 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2160 } while (pv_e != (pv_hashed_entry_t)pv_h);
2161 }
2162 /* Opportunistic refmod collection, annulled
2163 * if both REF and MOD are being cleared.
2164 */
2165
2166 pmap_phys_attributes[pai] |= attributes;
2167
2168 if (ept_keep_global_mod) {
2169 /*
2170 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2171 * requesting that we clear the modified bit for a phys page, we need
2172 * to ensure that there are no EPT mappings for the page with the
2173 * modified bit set. If there are, we cannot clear the global modified bit.
2174 */
2175 bits &= ~PHYS_MODIFIED;
2176 }
2177 pmap_phys_attributes[pai] &= ~(bits);
2178
2179 /* update this page's "reusable" status */
2180 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2181 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2182 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2183 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2184 }
2185
2186 UNLOCK_PVH(pai);
2187
2188 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
2189 }
2190
2191 /*
2192 * Check specified attribute bits.
2193 */
2194 int
phys_attribute_test(ppnum_t pn,int bits)2195 phys_attribute_test(
2196 ppnum_t pn,
2197 int bits)
2198 {
2199 pv_rooted_entry_t pv_h;
2200 pv_hashed_entry_t pv_e;
2201 pt_entry_t *pte;
2202 int pai;
2203 pmap_t pmap;
2204 int attributes = 0;
2205 boolean_t is_ept;
2206
2207 pmap_intr_assert();
2208 assert(pn != vm_page_fictitious_addr);
2209 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
2210 if (pn == vm_page_guard_addr) {
2211 return 0;
2212 }
2213
2214 pai = ppn_to_pai(pn);
2215
2216 if (!IS_MANAGED_PAGE(pai)) {
2217 /*
2218 * Not a managed page.
2219 */
2220 return 0;
2221 }
2222
2223 /*
2224 * Fast check... if bits already collected
2225 * no need to take any locks...
2226 * if not set, we need to recheck after taking
2227 * the lock in case they got pulled in while
2228 * we were waiting for the lock
2229 */
2230 if ((pmap_phys_attributes[pai] & bits) == bits) {
2231 return bits;
2232 }
2233
2234 pv_h = pai_to_pvh(pai);
2235
2236 LOCK_PVH(pai);
2237
2238 attributes = pmap_phys_attributes[pai] & bits;
2239
2240
2241 /*
2242 * Walk down PV list, checking the mappings until we
2243 * reach the end or we've found the desired attributes.
2244 */
2245 if (attributes != bits &&
2246 pv_h->pmap != PMAP_NULL) {
2247 /*
2248 * There are some mappings.
2249 */
2250 pv_e = (pv_hashed_entry_t)pv_h;
2251 do {
2252 vm_map_offset_t va;
2253
2254 pmap = pv_e->pmap;
2255 is_ept = is_ept_pmap(pmap);
2256 va = PVE_VA(pv_e);
2257 /*
2258 * pick up modify and/or reference bits from mapping
2259 */
2260
2261 pte = pmap_pte(pmap, va);
2262 if (!is_ept) {
2263 attributes |= (int)(*pte & bits);
2264 } else {
2265 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
2266 }
2267
2268 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2269 } while ((attributes != bits) &&
2270 (pv_e != (pv_hashed_entry_t)pv_h));
2271 }
2272 pmap_phys_attributes[pai] |= attributes;
2273
2274 UNLOCK_PVH(pai);
2275 return attributes;
2276 }
2277
2278 /*
2279 * Routine: pmap_change_wiring
2280 * Function: Change the wiring attribute for a map/virtual-address
2281 * pair.
2282 * In/out conditions:
2283 * The mapping must already exist in the pmap.
2284 */
2285 void
pmap_change_wiring(pmap_t map,vm_map_offset_t vaddr,boolean_t wired)2286 pmap_change_wiring(
2287 pmap_t map,
2288 vm_map_offset_t vaddr,
2289 boolean_t wired)
2290 {
2291 pt_entry_t *pte;
2292
2293 PMAP_LOCK_SHARED(map);
2294
2295 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
2296 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2297 map, vaddr, wired);
2298 }
2299
2300 if (wired && !iswired(*pte)) {
2301 /*
2302 * wiring down mapping
2303 */
2304 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
2305 pmap_update_pte(is_ept_pmap(map), pte, 0, PTE_WIRED, false);
2306 } else if (!wired && iswired(*pte)) {
2307 /*
2308 * unwiring mapping
2309 */
2310 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
2311 pmap_update_pte(is_ept_pmap(map), pte, PTE_WIRED, 0, false);
2312 }
2313
2314 PMAP_UNLOCK_SHARED(map);
2315 }
2316
2317 /*
2318 * "Backdoor" direct map routine for early mappings.
2319 * Useful for mapping memory outside the range
2320 * Sets A, D and NC if requested
2321 */
2322
2323 vm_offset_t
pmap_map_bd(vm_offset_t virt,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int flags)2324 pmap_map_bd(
2325 vm_offset_t virt,
2326 vm_map_offset_t start_addr,
2327 vm_map_offset_t end_addr,
2328 vm_prot_t prot,
2329 unsigned int flags)
2330 {
2331 pt_entry_t template;
2332 pt_entry_t *ptep;
2333
2334 vm_offset_t base = virt;
2335 boolean_t doflush = FALSE;
2336
2337 template = pa_to_pte(start_addr)
2338 | INTEL_PTE_REF
2339 | INTEL_PTE_MOD
2340 | INTEL_PTE_WIRED
2341 | INTEL_PTE_VALID;
2342
2343 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2344 template |= INTEL_PTE_NCACHE;
2345 if (!(flags & (VM_MEM_GUARDED))) {
2346 template |= INTEL_PTE_PAT;
2347 }
2348 }
2349
2350 if ((prot & VM_PROT_EXECUTE) == 0) {
2351 template |= INTEL_PTE_NX;
2352 }
2353
2354 if (prot & VM_PROT_WRITE) {
2355 template |= INTEL_PTE_WRITE;
2356 }
2357 vm_map_offset_t caddr = start_addr;
2358 while (caddr < end_addr) {
2359 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2360 if (ptep == PT_ENTRY_NULL) {
2361 panic("pmap_map_bd: Invalid kernel address");
2362 }
2363 if (pte_to_pa(*ptep)) {
2364 doflush = TRUE;
2365 }
2366 pmap_store_pte(FALSE, ptep, template);
2367 pte_increment_pa(template);
2368 virt += PAGE_SIZE;
2369 caddr += PAGE_SIZE;
2370 }
2371 if (doflush) {
2372 pmap_tlbi_range(0, ~0ULL, true, 0);
2373 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2374 }
2375 return virt;
2376 }
2377
2378 /* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2379 * range. The aliased pagetable range is expanded if
2380 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2381 * assumes caller has stabilized the source and destination ranges. Currently
2382 * used to populate sections of the trampoline "doublemap" at CPU startup.
2383 */
2384
2385 void
pmap_alias(vm_offset_t ava,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int eoptions)2386 pmap_alias(
2387 vm_offset_t ava,
2388 vm_map_offset_t start_addr,
2389 vm_map_offset_t end_addr,
2390 vm_prot_t prot,
2391 unsigned int eoptions)
2392 {
2393 pt_entry_t prot_template, template;
2394 pt_entry_t *aptep, *sptep;
2395
2396 prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
2397 if ((prot & VM_PROT_EXECUTE) == 0) {
2398 prot_template |= INTEL_PTE_NX;
2399 }
2400
2401 if (prot & VM_PROT_WRITE) {
2402 prot_template |= INTEL_PTE_WRITE;
2403 }
2404 assert(((start_addr | end_addr) & PAGE_MASK) == 0);
2405 while (start_addr < end_addr) {
2406 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2407 if (aptep == PT_ENTRY_NULL) {
2408 if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
2409 pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
2410 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2411 } else {
2412 panic("pmap_alias: Invalid alias address");
2413 }
2414 }
2415 /* The aliased range should not have any active mappings */
2416 assert(pte_to_pa(*aptep) == 0);
2417
2418 sptep = pmap_pte(kernel_pmap, start_addr);
2419 assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
2420 template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
2421 pmap_store_pte(FALSE, aptep, template);
2422
2423 ava += PAGE_SIZE;
2424 start_addr += PAGE_SIZE;
2425 }
2426 }
2427
2428 mach_vm_size_t
pmap_query_resident(pmap_t pmap,addr64_t s64,addr64_t e64,mach_vm_size_t * compressed_bytes_p)2429 pmap_query_resident(
2430 pmap_t pmap,
2431 addr64_t s64,
2432 addr64_t e64,
2433 mach_vm_size_t *compressed_bytes_p)
2434 {
2435 pt_entry_t *pde;
2436 pt_entry_t *spte, *epte;
2437 addr64_t l64;
2438 uint64_t deadline = 0;
2439 mach_vm_size_t resident_bytes;
2440 mach_vm_size_t compressed_bytes;
2441 boolean_t is_ept;
2442
2443 pmap_intr_assert();
2444
2445 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2446 if (compressed_bytes_p) {
2447 *compressed_bytes_p = 0;
2448 }
2449 return 0;
2450 }
2451
2452 is_ept = is_ept_pmap(pmap);
2453
2454 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2455 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
2456 VM_KERNEL_ADDRHIDE(e64));
2457
2458 resident_bytes = 0;
2459 compressed_bytes = 0;
2460
2461 PMAP_LOCK_EXCLUSIVE(pmap);
2462 uint32_t traverse_count = 0;
2463
2464 while (s64 < e64) {
2465 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
2466 if (l64 > e64) {
2467 l64 = e64;
2468 }
2469 pde = pmap_pde(pmap, s64);
2470
2471 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2472 if (*pde & PTE_PS) {
2473 /* superpage: not supported */
2474 } else {
2475 spte = pmap_pte(pmap,
2476 (s64 & ~(PDE_MAPPED_SIZE - 1)));
2477 spte = &spte[ptenum(s64)];
2478 epte = &spte[intel_btop(l64 - s64)];
2479
2480 for (; spte < epte; spte++) {
2481 if (pte_to_pa(*spte) != 0) {
2482 resident_bytes += PAGE_SIZE;
2483 } else if (*spte & PTE_COMPRESSED) {
2484 compressed_bytes += PAGE_SIZE;
2485 }
2486 }
2487 }
2488 }
2489 s64 = l64;
2490
2491 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
2492 if (deadline == 0) {
2493 deadline = rdtsc64() + max_preemption_latency_tsc;
2494 } else {
2495 if (rdtsc64() > deadline) {
2496 PMAP_UNLOCK_EXCLUSIVE(pmap);
2497 __builtin_ia32_pause();
2498 PMAP_LOCK_EXCLUSIVE(pmap);
2499 deadline = rdtsc64() + max_preemption_latency_tsc;
2500 }
2501 }
2502 }
2503 }
2504
2505 PMAP_UNLOCK_EXCLUSIVE(pmap);
2506
2507 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2508 resident_bytes);
2509
2510 if (compressed_bytes_p) {
2511 *compressed_bytes_p = compressed_bytes;
2512 }
2513 return resident_bytes;
2514 }
2515
2516 kern_return_t
pmap_query_page_info(pmap_t pmap,vm_map_offset_t va,int * disp_p)2517 pmap_query_page_info(
2518 pmap_t pmap,
2519 vm_map_offset_t va,
2520 int *disp_p)
2521 {
2522 int disp;
2523 boolean_t is_ept;
2524 pmap_paddr_t pa;
2525 ppnum_t pai;
2526 pd_entry_t *pde;
2527 pt_entry_t *pte;
2528
2529 pmap_intr_assert();
2530 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2531 *disp_p = 0;
2532 return KERN_INVALID_ARGUMENT;
2533 }
2534
2535 disp = 0;
2536 is_ept = is_ept_pmap(pmap);
2537
2538 PMAP_LOCK_EXCLUSIVE(pmap);
2539
2540 pde = pmap_pde(pmap, va);
2541 if (!pde ||
2542 !(*pde & PTE_VALID_MASK(is_ept)) ||
2543 (*pde & PTE_PS)) {
2544 goto done;
2545 }
2546
2547 pte = pmap_pte(pmap, va);
2548 if (pte == PT_ENTRY_NULL) {
2549 goto done;
2550 }
2551
2552 pa = pte_to_pa(*pte);
2553 if (pa == 0) {
2554 if (PTE_IS_COMPRESSED(*pte, pte, pmap, va)) {
2555 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2556 if (*pte & PTE_COMPRESSED_ALT) {
2557 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2558 }
2559 }
2560 } else {
2561 disp |= PMAP_QUERY_PAGE_PRESENT;
2562 pai = pa_index(pa);
2563 if (!IS_MANAGED_PAGE(pai)) {
2564 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2565 assert(IS_INTERNAL_PAGE(pai));
2566 disp |= PMAP_QUERY_PAGE_INTERNAL;
2567 disp |= PMAP_QUERY_PAGE_ALTACCT;
2568 } else if (IS_REUSABLE_PAGE(pai)) {
2569 disp |= PMAP_QUERY_PAGE_REUSABLE;
2570 } else if (IS_INTERNAL_PAGE(pai)) {
2571 disp |= PMAP_QUERY_PAGE_INTERNAL;
2572 }
2573 }
2574
2575 done:
2576 PMAP_UNLOCK_EXCLUSIVE(pmap);
2577 *disp_p = disp;
2578 return KERN_SUCCESS;
2579 }
2580
2581 void
pmap_set_vm_map_cs_enforced(pmap_t pmap,bool new_value)2582 pmap_set_vm_map_cs_enforced(
2583 pmap_t pmap,
2584 bool new_value)
2585 {
2586 PMAP_LOCK_EXCLUSIVE(pmap);
2587 pmap->pm_vm_map_cs_enforced = new_value;
2588 PMAP_UNLOCK_EXCLUSIVE(pmap);
2589 }
2590 extern int cs_process_enforcement_enable;
2591 bool
pmap_get_vm_map_cs_enforced(pmap_t pmap)2592 pmap_get_vm_map_cs_enforced(
2593 pmap_t pmap)
2594 {
2595 if (cs_process_enforcement_enable) {
2596 return true;
2597 }
2598 return pmap->pm_vm_map_cs_enforced;
2599 }
2600
2601 void
pmap_set_jit_entitled(__unused pmap_t pmap)2602 pmap_set_jit_entitled(__unused pmap_t pmap)
2603 {
2604 /* The x86 pmap layer does not care if a map has a JIT entry. */
2605 return;
2606 }
2607
2608 bool
pmap_get_jit_entitled(__unused pmap_t pmap)2609 pmap_get_jit_entitled(__unused pmap_t pmap)
2610 {
2611 /* The x86 pmap layer does not care if a map is using JIT. */
2612 return false;
2613 }
2614
2615 bool
pmap_has_prot_policy(__unused pmap_t pmap,__unused bool translated_allow_execute,__unused vm_prot_t prot)2616 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
2617 {
2618 /*
2619 * The x86 pmap layer does not apply any policy to any protection
2620 * types.
2621 */
2622 return false;
2623 }
2624
2625 uint64_t
pmap_release_pages_fast(void)2626 pmap_release_pages_fast(void)
2627 {
2628 return 0;
2629 }
2630
2631 void
pmap_trim(__unused pmap_t grand,__unused pmap_t subord,__unused addr64_t vstart,__unused uint64_t size)2632 pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused uint64_t size)
2633 {
2634 return;
2635 }
2636
2637 __dead2
2638 void
pmap_ledger_verify_size(size_t size)2639 pmap_ledger_verify_size(size_t size)
2640 {
2641 panic("%s: unsupported, "
2642 "size=%lu",
2643 __func__, size);
2644 }
2645
2646 __dead2
2647 ledger_t
pmap_ledger_alloc(void)2648 pmap_ledger_alloc(void)
2649 {
2650 panic("%s: unsupported",
2651 __func__);
2652 }
2653
2654 __dead2
2655 void
pmap_ledger_free(ledger_t ledger)2656 pmap_ledger_free(ledger_t ledger)
2657 {
2658 panic("%s: unsupported, "
2659 "ledger=%p",
2660 __func__, ledger);
2661 }
2662
2663 kern_return_t
pmap_dump_page_tables(pmap_t pmap __unused,void * bufp __unused,void * buf_end __unused,unsigned int level_mask __unused,size_t * bytes_copied __unused)2664 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
2665 unsigned int level_mask __unused, size_t *bytes_copied __unused)
2666 {
2667 return KERN_NOT_SUPPORTED;
2668 }
2669
2670 void *
pmap_map_compressor_page(ppnum_t pn)2671 pmap_map_compressor_page(ppnum_t pn)
2672 {
2673 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
2674 return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
2675 }
2676
2677 void
pmap_unmap_compressor_page(ppnum_t pn __unused,void * kva __unused)2678 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
2679 {
2680 }
2681
2682 bool
pmap_clear_refmod_range_options(pmap_t pmap __unused,vm_map_address_t start __unused,vm_map_address_t end __unused,unsigned int mask __unused,unsigned int options __unused)2683 pmap_clear_refmod_range_options(
2684 pmap_t pmap __unused,
2685 vm_map_address_t start __unused,
2686 vm_map_address_t end __unused,
2687 unsigned int mask __unused,
2688 unsigned int options __unused)
2689 {
2690 /*
2691 * x86 doesn't have ranged tlbi instructions, and we already have
2692 * the pmap_flush_context. This operation isn't implemented.
2693 */
2694 return false;
2695 }
2696
2697 bool
pmap_supported_feature(pmap_t pmap,pmap_feature_flags_t feat)2698 pmap_supported_feature(pmap_t pmap, pmap_feature_flags_t feat)
2699 {
2700 switch (feat) {
2701 case PMAP_FEAT_UEXEC:
2702 return pmap != NULL && is_ept_pmap(pmap);
2703 default:
2704 return false;
2705 }
2706 }
2707