1 /*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_assert.h>
30
31 #include <vm/pmap.h>
32 #include <vm/vm_map.h>
33 #include <vm/vm_kern.h>
34 #include <kern/ledger.h>
35 #include <kern/zalloc_internal.h>
36 #include <i386/pmap_internal.h>
37
38 void pmap_remove_range(
39 pmap_t pmap,
40 vm_map_offset_t va,
41 pt_entry_t *spte,
42 pt_entry_t *epte);
43
44 static void pmap_remove_range_options(
45 pmap_t pmap,
46 vm_map_offset_t va,
47 pt_entry_t *spte,
48 pt_entry_t *epte,
49 int options);
50
51 void pmap_reusable_range(
52 pmap_t pmap,
53 vm_map_offset_t va,
54 pt_entry_t *spte,
55 pt_entry_t *epte,
56 boolean_t reusable);
57
58 pt_entry_t *PTE_corrupted_ptr;
59
60 #if DEVELOPMENT || DEBUG
61 int pmap_inject_pte_corruption;
62 uint32_t pmap_update_clear_pte_count;
63 uint32_t pmap_update_invalid_pte_count;
64 #endif
65
66 /*
67 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
68 * on a NBPDE boundary.
69 */
70
71 uint64_t
pmap_shared_region_size_min(__unused pmap_t pmap)72 pmap_shared_region_size_min(__unused pmap_t pmap)
73 {
74 return NBPDE;
75 }
76
77 uint64_t
pmap_commpage_size_min(__unused pmap_t pmap)78 pmap_commpage_size_min(__unused pmap_t pmap)
79 {
80 return NBPDE;
81 }
82
83 /*
84 * kern_return_t pmap_nest(grand, subord, va_start, size)
85 *
86 * grand = the pmap that we will nest subord into
87 * subord = the pmap that goes into the grand
88 * va_start = start of range in pmap to be inserted
89 * size = Size of nest area (up to 16TB)
90 *
91 * Inserts a pmap into another. This is used to implement shared segments.
92 *
93 * Note that we depend upon higher level VM locks to insure that things don't change while
94 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
95 * or do 2 nests at once.
96 */
97
98 /*
99 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
100 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
101 * container and the "grand" parent. A minor optimization to consider for the
102 * future: make the "subord" truly a container rather than a full-fledged
103 * pagetable hierarchy which can be unnecessarily sparse (DRK).
104 */
105
106 kern_return_t
pmap_nest(pmap_t grand,pmap_t subord,addr64_t va_start,uint64_t size)107 pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, uint64_t size)
108 {
109 vm_map_offset_t vaddr;
110 pd_entry_t *pde, *npde;
111 unsigned int i;
112 uint64_t num_pde;
113
114 assert(!is_ept_pmap(grand));
115 assert(!is_ept_pmap(subord));
116
117 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
118 (va_start & (pmap_shared_region_size_min(grand) - 1)) ||
119 ((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
120 return KERN_INVALID_VALUE;
121 }
122
123 if (size == 0) {
124 panic("pmap_nest: size is invalid - %016llX", size);
125 }
126
127 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
128 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
129 VM_KERNEL_ADDRHIDE(va_start));
130
131 vaddr = (vm_map_offset_t)va_start;
132 num_pde = size >> PDESHIFT;
133
134 PMAP_LOCK_EXCLUSIVE(subord);
135
136 subord->pm_shared = TRUE;
137
138 for (i = 0; i < num_pde;) {
139 if (((vaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
140 npde = pmap64_pdpt(subord, vaddr);
141
142 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
143 PMAP_UNLOCK_EXCLUSIVE(subord);
144 pmap_expand_pdpt(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
145 PMAP_LOCK_EXCLUSIVE(subord);
146 npde = pmap64_pdpt(subord, vaddr);
147 }
148 *npde |= INTEL_PDPTE_NESTED;
149 vaddr += NBPDPT;
150 i += (uint32_t)NPDEPG;
151 } else {
152 npde = pmap_pde(subord, vaddr);
153
154 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
155 PMAP_UNLOCK_EXCLUSIVE(subord);
156 pmap_expand(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
157 PMAP_LOCK_EXCLUSIVE(subord);
158 npde = pmap_pde(subord, vaddr);
159 }
160 vaddr += NBPDE;
161 i++;
162 }
163 }
164
165 PMAP_UNLOCK_EXCLUSIVE(subord);
166
167 vaddr = (vm_map_offset_t)va_start;
168
169 PMAP_LOCK_EXCLUSIVE(grand);
170
171 for (i = 0; i < num_pde;) {
172 pd_entry_t tpde;
173
174 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
175 npde = pmap64_pdpt(subord, vaddr);
176 if (npde == 0) {
177 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
178 }
179 tpde = *npde;
180 pde = pmap64_pdpt(grand, vaddr);
181 if (0 == pde) {
182 PMAP_UNLOCK_EXCLUSIVE(grand);
183 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
184 PMAP_LOCK_EXCLUSIVE(grand);
185 pde = pmap64_pdpt(grand, vaddr);
186 }
187 if (pde == 0) {
188 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
189 }
190 pmap_store_pte(FALSE, pde, tpde);
191 vaddr += NBPDPT;
192 i += (uint32_t) NPDEPG;
193 } else {
194 npde = pmap_pde(subord, vaddr);
195 if (npde == 0) {
196 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
197 }
198 tpde = *npde;
199 pde = pmap_pde(grand, vaddr);
200 if (0 == pde) {
201 PMAP_UNLOCK_EXCLUSIVE(grand);
202 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
203 PMAP_LOCK_EXCLUSIVE(grand);
204 pde = pmap_pde(grand, vaddr);
205 }
206
207 if (pde == 0) {
208 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
209 }
210 vaddr += NBPDE;
211 pmap_store_pte(FALSE, pde, tpde);
212 i++;
213 }
214 }
215
216 PMAP_UNLOCK_EXCLUSIVE(grand);
217
218 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
219
220 return KERN_SUCCESS;
221 }
222
223 /*
224 * kern_return_t pmap_unnest(grand, vaddr)
225 *
226 * grand = the pmap that we will un-nest subord from
227 * vaddr = start of range in pmap to be unnested
228 *
229 * Removes a pmap from another. This is used to implement shared segments.
230 */
231
232 kern_return_t
pmap_unnest(pmap_t grand,addr64_t vaddr,uint64_t size)233 pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
234 {
235 pd_entry_t *pde;
236 unsigned int i;
237 uint64_t num_pde;
238 addr64_t va_start, va_end;
239 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
240
241 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
242 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
243
244 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
245 (vaddr & (pmap_shared_region_size_min(grand) - 1))) {
246 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...",
247 grand, vaddr, size);
248 }
249
250 assert(!is_ept_pmap(grand));
251
252 /* align everything to PDE boundaries */
253 va_start = vaddr & ~(NBPDE - 1);
254
255 if (os_add_overflow(vaddr, size + NBPDE - 1, &va_end)) {
256 panic("pmap_unnest: Overflow when calculating range end: s=0x%llx sz=0x%llx\n", vaddr, size);
257 }
258
259 va_end &= ~(NBPDE - 1);
260 size = va_end - va_start;
261
262 PMAP_LOCK_EXCLUSIVE(grand);
263
264 num_pde = size >> PDESHIFT;
265 vaddr = va_start;
266
267 for (i = 0; i < num_pde;) {
268 if (pdptnum(grand, vaddr) != npdpt) {
269 npdpt = pdptnum(grand, vaddr);
270 pde = pmap64_pdpt(grand, vaddr);
271 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
272 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
273 i += (uint32_t) NPDEPG;
274 vaddr += NBPDPT;
275 continue;
276 }
277 }
278 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
279 if (pde == 0) {
280 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
281 }
282 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
283 i++;
284 vaddr += NBPDE;
285 }
286
287 PMAP_UPDATE_TLBS(grand, va_start, va_end);
288
289 PMAP_UNLOCK_EXCLUSIVE(grand);
290
291 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
292
293 return KERN_SUCCESS;
294 }
295
296 kern_return_t
pmap_unnest_options(pmap_t grand,addr64_t vaddr,__unused uint64_t size,__unused unsigned int options)297 pmap_unnest_options(
298 pmap_t grand,
299 addr64_t vaddr,
300 __unused uint64_t size,
301 __unused unsigned int options)
302 {
303 return pmap_unnest(grand, vaddr, size);
304 }
305
306 /* Invoked by the Mach VM to determine the platform specific unnest region */
307
308 boolean_t
pmap_adjust_unnest_parameters(pmap_t p,vm_map_offset_t * s,vm_map_offset_t * e)309 pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
310 {
311 pd_entry_t *pdpte;
312 boolean_t rval = FALSE;
313
314 PMAP_LOCK_EXCLUSIVE(p);
315
316 pdpte = pmap64_pdpt(p, *s);
317 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
318 *s &= ~(NBPDPT - 1);
319 rval = TRUE;
320 }
321
322 pdpte = pmap64_pdpt(p, *e);
323 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
324 *e = ((*e + NBPDPT) & ~(NBPDPT - 1));
325 rval = TRUE;
326 }
327
328 PMAP_UNLOCK_EXCLUSIVE(p);
329
330 return rval;
331 }
332
333 pmap_paddr_t
pmap_find_pa(pmap_t pmap,addr64_t va)334 pmap_find_pa(pmap_t pmap, addr64_t va)
335 {
336 pt_entry_t *ptp;
337 pd_entry_t *pdep;
338 pd_entry_t pde;
339 pt_entry_t pte;
340 boolean_t is_ept, locked = FALSE;
341 pmap_paddr_t pa = 0;
342
343 is_ept = is_ept_pmap(pmap);
344
345 if ((pmap != kernel_pmap) && not_in_kdp) {
346 PMAP_LOCK_EXCLUSIVE(pmap);
347 locked = TRUE;
348 } else {
349 mp_disable_preemption();
350 }
351
352 if (os_ref_get_count(&pmap->ref_count) == 0) {
353 goto pfp_exit;
354 }
355
356 pdep = pmap_pde(pmap, va);
357
358 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
359 if (pde & PTE_PS) {
360 pa = pte_to_pa(pde) + (va & I386_LPGMASK);
361 } else {
362 ptp = pmap_pte(pmap, va);
363 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
364 pa = pte_to_pa(pte) + (va & PAGE_MASK);
365 }
366 }
367 }
368 pfp_exit:
369 if (locked) {
370 PMAP_UNLOCK_EXCLUSIVE(pmap);
371 } else {
372 mp_enable_preemption();
373 }
374
375 return pa;
376 }
377
378 /*
379 * pmap_find_phys returns the (4K) physical page number containing a
380 * given virtual address in a given pmap.
381 * Note that pmap_pte may return a pde if this virtual address is
382 * mapped by a large page and this is taken into account in order
383 * to return the correct page number in this case.
384 */
385 ppnum_t
pmap_find_phys(pmap_t pmap,addr64_t va)386 pmap_find_phys(pmap_t pmap, addr64_t va)
387 {
388 ppnum_t ppn = 0;
389 pmap_paddr_t pa = 0;
390
391 pa = pmap_find_pa(pmap, va);
392 ppn = (ppnum_t) i386_btop(pa);
393
394 return ppn;
395 }
396
397 ppnum_t
pmap_find_phys_nofault(pmap_t pmap,addr64_t va)398 pmap_find_phys_nofault(pmap_t pmap, addr64_t va)
399 {
400 if ((pmap == kernel_pmap) ||
401 ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))) {
402 return pmap_find_phys(pmap, va);
403 }
404 return 0;
405 }
406
407 /*
408 * pmap_get_prot returns the equivalent Vm page protections
409 * set on a given address, 'va'. This function is used in the
410 * ml_static_verify_page_protections() routine which is used
411 * by the kext loading code to validate that the TEXT segment
412 * of a kext is mapped executable.
413 */
414 kern_return_t
pmap_get_prot(pmap_t pmap,addr64_t va,vm_prot_t * protp)415 pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp)
416 {
417 pt_entry_t *ptp;
418 pd_entry_t *pdep;
419 pd_entry_t pde;
420 pt_entry_t pte;
421 boolean_t is_ept, locked = FALSE;
422 kern_return_t retval = KERN_FAILURE;
423 vm_prot_t prot = 0;
424
425 is_ept = is_ept_pmap(pmap);
426
427 if ((pmap != kernel_pmap) && not_in_kdp) {
428 PMAP_LOCK_EXCLUSIVE(pmap);
429 locked = TRUE;
430 } else {
431 mp_disable_preemption();
432 }
433
434 if (os_ref_get_count(&pmap->ref_count) == 0) {
435 goto pfp_exit;
436 }
437
438 pdep = pmap_pde(pmap, va);
439
440 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
441 if (pde & PTE_PS) {
442 prot = VM_PROT_READ;
443
444 if (pde & PTE_WRITE(is_ept)) {
445 prot |= VM_PROT_WRITE;
446 }
447 if (PTE_IS_EXECUTABLE(is_ept, pde)) {
448 prot |= VM_PROT_EXECUTE;
449 }
450 retval = KERN_SUCCESS;
451 } else {
452 ptp = pmap_pte(pmap, va);
453 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
454 prot = VM_PROT_READ;
455
456 if (pte & PTE_WRITE(is_ept)) {
457 prot |= VM_PROT_WRITE;
458 }
459 if (PTE_IS_EXECUTABLE(is_ept, pte)) {
460 prot |= VM_PROT_EXECUTE;
461 }
462 retval = KERN_SUCCESS;
463 }
464 }
465 }
466
467 pfp_exit:
468 if (locked) {
469 PMAP_UNLOCK_EXCLUSIVE(pmap);
470 } else {
471 mp_enable_preemption();
472 }
473
474 if (protp) {
475 *protp = prot;
476 }
477
478 return retval;
479 }
480
481 /*
482 * Update cache attributes for all extant managed mappings.
483 * Assumes PV for this page is locked, and that the page
484 * is managed. We assume that this physical page may be mapped in
485 * both EPT and normal Intel PTEs, so we convert the attributes
486 * to the corresponding format for each pmap.
487 *
488 * We assert that the passed set of attributes is a subset of the
489 * PHYS_CACHEABILITY_MASK.
490 */
491 void
pmap_update_cache_attributes_locked(ppnum_t pn,unsigned attributes)492 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
493 {
494 pv_rooted_entry_t pv_h, pv_e;
495 pv_hashed_entry_t pvh_e, nexth;
496 vm_map_offset_t vaddr;
497 pmap_t pmap;
498 pt_entry_t *ptep;
499 boolean_t is_ept;
500 unsigned ept_attributes;
501
502 assert(IS_MANAGED_PAGE(pn));
503 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
504
505 /* We don't support the PAT bit for EPT PTEs */
506 if (attributes & INTEL_PTE_NCACHE) {
507 ept_attributes = INTEL_EPT_NCACHE;
508 } else {
509 ept_attributes = INTEL_EPT_WB;
510 }
511
512 pv_h = pai_to_pvh(pn);
513 /* TODO: translate the PHYS_* bits to PTE bits, while they're
514 * currently identical, they may not remain so
515 * Potential optimization (here and in page_protect),
516 * parallel shootdowns, check for redundant
517 * attribute modifications.
518 */
519
520 /*
521 * Alter attributes on all mappings
522 */
523 if (pv_h->pmap != PMAP_NULL) {
524 pv_e = pv_h;
525 pvh_e = (pv_hashed_entry_t)pv_e;
526
527 do {
528 pmap = pv_e->pmap;
529 vaddr = PVE_VA(pv_e);
530 ptep = pmap_pte(pmap, vaddr);
531
532 if (0 == ptep) {
533 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
534 }
535
536 is_ept = is_ept_pmap(pmap);
537
538 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
539 if (!is_ept) {
540 pmap_update_pte(is_ept, ptep, PHYS_CACHEABILITY_MASK, attributes, true);
541 } else {
542 pmap_update_pte(is_ept, ptep, INTEL_EPT_CACHE_MASK, ept_attributes, true);
543 }
544 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
545 pvh_e = nexth;
546 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
547 }
548 }
549
550 void
x86_filter_TLB_coherency_interrupts(boolean_t dofilter)551 x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
552 {
553 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
554
555 if (dofilter) {
556 CPU_CR3_MARK_INACTIVE();
557 } else {
558 CPU_CR3_MARK_ACTIVE();
559 mfence();
560 pmap_update_interrupt();
561 }
562 }
563
564
565 /*
566 * Insert the given physical page (p) at
567 * the specified virtual address (v) in the
568 * target physical map with the protection requested.
569 *
570 * If specified, the page will be wired down, meaning
571 * that the related pte cannot be reclaimed.
572 *
573 * NB: This is the only routine which MAY NOT lazy-evaluate
574 * or lose information. That is, this routine must actually
575 * insert this page into the given map NOW.
576 */
577
578 kern_return_t
pmap_enter(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired,pmap_mapping_type_t mapping_type)579 pmap_enter(
580 pmap_t pmap,
581 vm_map_offset_t vaddr,
582 ppnum_t pn,
583 vm_prot_t prot,
584 vm_prot_t fault_type,
585 unsigned int flags,
586 boolean_t wired,
587 pmap_mapping_type_t mapping_type)
588 {
589 return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL, mapping_type);
590 }
591
592 #define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
593
594 static inline void PTE_LOCK_LOCK(pt_entry_t *);
595 static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
596
597 void
PTE_LOCK_LOCK(pt_entry_t * lpte)598 PTE_LOCK_LOCK(pt_entry_t *lpte)
599 {
600 pt_entry_t pte;
601 plretry:
602 while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
603 __builtin_ia32_pause();
604 }
605 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
606 return;
607 }
608
609 goto plretry;
610 }
611
612 void
PTE_LOCK_UNLOCK(pt_entry_t * lpte)613 PTE_LOCK_UNLOCK(pt_entry_t *lpte)
614 {
615 __c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
616 }
617
618 kern_return_t
pmap_enter_options_addr(pmap_t pmap,vm_map_address_t v,pmap_paddr_t pa,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,__unused void * arg,pmap_mapping_type_t mapping_type)619 pmap_enter_options_addr(
620 pmap_t pmap,
621 vm_map_address_t v,
622 pmap_paddr_t pa,
623 vm_prot_t prot,
624 vm_prot_t fault_type,
625 unsigned int flags,
626 boolean_t wired,
627 unsigned int options,
628 __unused void *arg,
629 pmap_mapping_type_t mapping_type)
630 {
631 return pmap_enter_options(pmap, v, intel_btop(pa), prot, fault_type, flags, wired, options, arg, mapping_type);
632 }
633
634 kern_return_t
pmap_enter_options(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,__unused vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,void * arg,__unused pmap_mapping_type_t mapping_type)635 pmap_enter_options(
636 pmap_t pmap,
637 vm_map_offset_t vaddr,
638 ppnum_t pn,
639 vm_prot_t prot,
640 __unused vm_prot_t fault_type,
641 unsigned int flags,
642 boolean_t wired,
643 unsigned int options,
644 void *arg,
645 __unused pmap_mapping_type_t mapping_type)
646 {
647 pt_entry_t *pte = NULL;
648 pv_rooted_entry_t pv_h;
649 ppnum_t pai;
650 pv_hashed_entry_t pvh_e;
651 pv_hashed_entry_t pvh_new;
652 pt_entry_t template;
653 pmap_paddr_t old_pa;
654 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
655 boolean_t need_tlbflush = FALSE;
656 boolean_t set_NX;
657 char oattr;
658 boolean_t old_pa_locked;
659 /* 2MiB mappings are confined to x86_64 by VM */
660 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
661 vm_object_t delpage_pm_obj = NULL;
662 uint64_t delpage_pde_index = 0;
663 pt_entry_t old_pte;
664 kern_return_t kr = KERN_FAILURE;
665 boolean_t is_ept;
666 boolean_t is_altacct;
667 boolean_t ptelocked = FALSE;
668
669 pmap_intr_assert();
670
671 if (__improbable(pmap == PMAP_NULL)) {
672 return KERN_INVALID_ARGUMENT;
673 }
674 if (__improbable(pn == vm_page_guard_addr)) {
675 return KERN_INVALID_ARGUMENT;
676 }
677
678 is_ept = is_ept_pmap(pmap);
679
680 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
681 * unused value for that scenario.
682 */
683 assert(pn != vm_page_fictitious_addr);
684
685
686 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
687 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
688 prot);
689
690 if ((prot & VM_PROT_EXECUTE) || __improbable(is_ept && (prot & VM_PROT_UEXEC))) {
691 set_NX = FALSE;
692 } else {
693 set_NX = TRUE;
694 }
695
696 #if DEVELOPMENT || DEBUG
697 if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
698 set_NX = FALSE;
699 }
700
701 if (__improbable(set_NX && (pmap == kernel_pmap) &&
702 ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
703 (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
704 set_NX = FALSE;
705 }
706 #endif
707
708 pvh_new = PV_HASHED_ENTRY_NULL;
709 Retry:
710 pvh_e = PV_HASHED_ENTRY_NULL;
711
712 PMAP_LOCK_SHARED(pmap);
713
714 /*
715 * Expand pmap to include this pte. Assume that
716 * pmap is always expanded to include enough hardware
717 * pages to map one VM page.
718 */
719 if (__improbable(superpage)) {
720 while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
721 /* need room for another pde entry */
722 PMAP_UNLOCK_SHARED(pmap);
723 kr = pmap_expand_pdpt(pmap, vaddr, options);
724 if (kr != KERN_SUCCESS) {
725 goto done1;
726 }
727 PMAP_LOCK_SHARED(pmap);
728 }
729 } else {
730 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
731 /*
732 * Must unlock to expand the pmap
733 * going to grow pde level page(s)
734 */
735 PMAP_UNLOCK_SHARED(pmap);
736 kr = pmap_expand(pmap, vaddr, options);
737 if (kr != KERN_SUCCESS) {
738 goto done1;
739 }
740 PMAP_LOCK_SHARED(pmap);
741 }
742 }
743
744 if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
745 PMAP_UNLOCK_SHARED(pmap);
746 kr = KERN_SUCCESS;
747 goto done1;
748 }
749
750 if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
751 /*
752 * There is still an empty page table mapped that
753 * was used for a previous base page mapping.
754 * Remember the PDE and the PDE index, so that we
755 * can free the page at the end of this function.
756 */
757 delpage_pde_index = pdeidx(pmap, vaddr);
758 delpage_pm_obj = pmap->pm_obj;
759 pmap_store_pte(is_ept, pte, 0);
760 }
761
762 PTE_LOCK_LOCK(pte);
763 ptelocked = TRUE;
764
765 old_pa = pte_to_pa(*pte);
766 pai = pa_index(old_pa);
767 old_pa_locked = FALSE;
768
769 if (old_pa == 0 &&
770 PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
771 /*
772 * "pmap" should be locked at this point, so this should
773 * not race with another pmap_enter() or pmap_remove_range().
774 */
775 assert(pmap != kernel_pmap);
776
777 /* one less "compressed" */
778 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
779 PAGE_SIZE);
780 if (*pte & PTE_COMPRESSED_ALT) {
781 pmap_ledger_debit(
782 pmap,
783 task_ledgers.alternate_accounting_compressed,
784 PAGE_SIZE);
785 } else {
786 /* was part of the footprint */
787 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
788 PAGE_SIZE);
789 }
790 /* marker will be cleared below */
791 }
792
793 /*
794 * if we have a previous managed page, lock the pv entry now. after
795 * we lock it, check to see if someone beat us to the lock and if so
796 * drop the lock
797 */
798 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
799 LOCK_PVH(pai);
800 old_pa_locked = TRUE;
801 old_pa = pte_to_pa(*pte);
802 if (0 == old_pa) {
803 UNLOCK_PVH(pai); /* another path beat us to it */
804 old_pa_locked = FALSE;
805 }
806 }
807
808 /*
809 * Special case if the incoming physical page is already mapped
810 * at this address.
811 */
812 if (old_pa == pa) {
813 pt_entry_t old_attributes =
814 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
815
816 /*
817 * May be changing its wired attribute or protection
818 */
819
820 template = pa_to_pte(pa);
821
822 if (__probable(!is_ept)) {
823 template |= INTEL_PTE_VALID;
824 } else {
825 template |= INTEL_EPT_IPAT;
826 }
827
828 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
829
830 /*
831 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
832 */
833 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
834 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
835 if (!(flags & VM_MEM_GUARDED)) {
836 template |= INTEL_PTE_PAT;
837 }
838 template |= INTEL_PTE_NCACHE;
839 }
840 if (pmap != kernel_pmap && !is_ept) {
841 template |= INTEL_PTE_USER;
842 }
843
844 if (prot & VM_PROT_READ) {
845 template |= PTE_READ(is_ept);
846 }
847
848 if (prot & VM_PROT_WRITE) {
849 template |= PTE_WRITE(is_ept);
850 if (is_ept && !pmap_ept_support_ad) {
851 template |= PTE_MOD(is_ept);
852 if (old_pa_locked) {
853 assert(IS_MANAGED_PAGE(pai));
854 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
855 }
856 }
857 }
858
859 if (prot & VM_PROT_EXECUTE) {
860 assert(set_NX == 0);
861 template = pte_set_ex(template, is_ept);
862 }
863
864 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
865 assert(set_NX == 0);
866 template = pte_set_uex(template);
867 }
868
869 if (set_NX) {
870 template = pte_remove_ex(template, is_ept);
871 }
872
873 if (wired) {
874 template |= PTE_WIRED;
875 if (!iswired(old_attributes)) {
876 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
877 }
878 } else {
879 if (iswired(old_attributes)) {
880 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
881 }
882 }
883
884 if (superpage) { /* this path can not be used */
885 template |= PTE_PS; /* to change the page size! */
886 }
887 if (old_attributes == template) {
888 goto dont_update_pte;
889 }
890
891 /* Determine delta, PV locked */
892 need_tlbflush =
893 ((old_attributes ^ template) != PTE_WIRED);
894
895 /* Optimisation: avoid TLB flush when adding writability */
896 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
897 if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
898 need_tlbflush = FALSE;
899 }
900 }
901
902 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
903 if (__improbable(is_ept && !pmap_ept_support_ad)) {
904 template |= PTE_REF(is_ept);
905 if (old_pa_locked) {
906 assert(IS_MANAGED_PAGE(pai));
907 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
908 }
909 }
910
911 /* store modified PTE and preserve RC bits */
912 pt_entry_t npte, opte;
913
914 assert((*pte & PTE_LOCK(is_ept)) != 0);
915
916 do {
917 opte = *pte;
918 npte = template | (opte & (PTE_REF(is_ept) |
919 PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
920 } while (!pmap_cmpx_pte(pte, opte, npte));
921
922 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, opte, uint64_t, npte);
923
924 dont_update_pte:
925 if (old_pa_locked) {
926 UNLOCK_PVH(pai);
927 old_pa_locked = FALSE;
928 }
929 goto done2;
930 }
931
932 /*
933 * Outline of code from here:
934 * 1) If va was mapped, update TLBs, remove the mapping
935 * and remove old pvlist entry.
936 * 2) Add pvlist entry for new mapping
937 * 3) Enter new mapping.
938 *
939 * If the old physical page is not managed step 1) is skipped
940 * (except for updating the TLBs), and the mapping is
941 * overwritten at step 3). If the new physical page is not
942 * managed, step 2) is skipped.
943 */
944 /* TODO: add opportunistic refmod collect */
945 if (old_pa != (pmap_paddr_t) 0) {
946 boolean_t was_altacct = FALSE;
947
948 /*
949 * Don't do anything to pages outside valid memory here.
950 * Instead convince the code that enters a new mapping
951 * to overwrite the old one.
952 */
953
954 /* invalidate the PTE */
955 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
956 /* propagate invalidate everywhere */
957 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
958 /* remember reference and change */
959 old_pte = *pte;
960 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
961 /* completely invalidate the PTE */
962 pmap_store_pte(is_ept, pte, PTE_LOCK(is_ept));
963
964 if (IS_MANAGED_PAGE(pai)) {
965 /*
966 * Remove the mapping from the pvlist for
967 * this physical page.
968 * We'll end up with either a rooted pv or a
969 * hashed pv
970 */
971 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
972 }
973
974 if (IS_MANAGED_PAGE(pai)) {
975 pmap_assert(old_pa_locked == TRUE);
976 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
977 if (pmap != kernel_pmap) {
978 /* update ledgers */
979 if (was_altacct) {
980 assert(IS_INTERNAL_PAGE(pai));
981 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
982 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
983 } else if (IS_REUSABLE_PAGE(pai)) {
984 assert(!was_altacct);
985 assert(IS_INTERNAL_PAGE(pai));
986 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
987 /* was already not in phys_footprint */
988 } else if (IS_INTERNAL_PAGE(pai)) {
989 assert(!was_altacct);
990 assert(!IS_REUSABLE_PAGE(pai));
991 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
992 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
993 } else {
994 /* not an internal page */
995 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
996 }
997 }
998 if (iswired(*pte)) {
999 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
1000 PAGE_SIZE);
1001 }
1002
1003 if (!is_ept) {
1004 pmap_phys_attributes[pai] |= oattr;
1005 } else {
1006 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
1007 }
1008 } else {
1009 /*
1010 * old_pa is not managed.
1011 * Do removal part of accounting.
1012 */
1013
1014 if (pmap != kernel_pmap) {
1015 #if 00
1016 assert(pmap->stats.device > 0);
1017 OSAddAtomic(-1, &pmap->stats.device);
1018 #endif
1019 }
1020 if (iswired(*pte)) {
1021 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1022 }
1023 }
1024 }
1025
1026 /*
1027 * if we had a previously managed paged locked, unlock it now
1028 */
1029 if (old_pa_locked) {
1030 UNLOCK_PVH(pai);
1031 old_pa_locked = FALSE;
1032 }
1033
1034 pai = pa_index(pa); /* now working with new incoming phys page */
1035 if (IS_MANAGED_PAGE(pai)) {
1036 /*
1037 * Step 2) Enter the mapping in the PV list for this
1038 * physical page.
1039 */
1040 pv_h = pai_to_pvh(pai);
1041
1042 LOCK_PVH(pai);
1043
1044 if (pv_h->pmap == PMAP_NULL) {
1045 /*
1046 * No mappings yet, use rooted pv
1047 */
1048 pv_h->va_and_flags = vaddr;
1049 pv_h->pmap = pmap;
1050 queue_init(&pv_h->qlink);
1051
1052 if (options & PMAP_OPTIONS_INTERNAL) {
1053 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
1054 } else {
1055 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
1056 }
1057 if (options & PMAP_OPTIONS_REUSABLE) {
1058 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1059 } else {
1060 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1061 }
1062 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1063 IS_INTERNAL_PAGE(pai)) {
1064 pv_h->va_and_flags |= PVE_IS_ALTACCT;
1065 is_altacct = TRUE;
1066 } else {
1067 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
1068 is_altacct = FALSE;
1069 }
1070 } else {
1071 /*
1072 * Add new pv_hashed_entry after header.
1073 */
1074 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
1075 pvh_e = pvh_new;
1076 pvh_new = PV_HASHED_ENTRY_NULL;
1077 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
1078 PV_HASHED_ALLOC(&pvh_e);
1079 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1080 /*
1081 * the pv list is empty. if we are on
1082 * the kernel pmap we'll use one of
1083 * the special private kernel pv_e's,
1084 * else, we need to unlock
1085 * everything, zalloc a pv_e, and
1086 * restart bringing in the pv_e with
1087 * us.
1088 */
1089 if (kernel_pmap == pmap) {
1090 PV_HASHED_KERN_ALLOC(&pvh_e);
1091 } else {
1092 UNLOCK_PVH(pai);
1093 PTE_LOCK_UNLOCK(pte);
1094 PMAP_UNLOCK_SHARED(pmap);
1095 pmap_pv_throttle(pmap);
1096 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1097 goto Retry;
1098 }
1099 }
1100 }
1101
1102 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1103 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1104 }
1105
1106 pvh_e->va_and_flags = vaddr;
1107 pvh_e->pmap = pmap;
1108 pvh_e->ppn = pn;
1109 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1110 IS_INTERNAL_PAGE(pai)) {
1111 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
1112 is_altacct = TRUE;
1113 } else {
1114 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
1115 is_altacct = FALSE;
1116 }
1117 pv_hash_add(pvh_e, pv_h);
1118
1119 /*
1120 * Remember that we used the pvlist entry.
1121 */
1122 pvh_e = PV_HASHED_ENTRY_NULL;
1123 }
1124
1125 /*
1126 * only count the mapping
1127 * for 'managed memory'
1128 */
1129 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1130 if (pmap != kernel_pmap) {
1131 /* update ledgers */
1132 if (is_altacct) {
1133 /* internal but also alternate accounting */
1134 assert(IS_INTERNAL_PAGE(pai));
1135 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1136 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1137 /* alternate accounting, so not in footprint */
1138 } else if (IS_REUSABLE_PAGE(pai)) {
1139 assert(!is_altacct);
1140 assert(IS_INTERNAL_PAGE(pai));
1141 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
1142 /* internal but reusable: not in footprint */
1143 } else if (IS_INTERNAL_PAGE(pai)) {
1144 assert(!is_altacct);
1145 assert(!IS_REUSABLE_PAGE(pai));
1146 /* internal: add to footprint */
1147 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1148 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1149 } else {
1150 /* not internal: not in footprint */
1151 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
1152 }
1153 }
1154 } else if (last_managed_page == 0) {
1155 /* Account for early mappings created before "managed pages"
1156 * are determined. Consider consulting the available DRAM map.
1157 */
1158 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1159 if (pmap != kernel_pmap) {
1160 #if 00
1161 OSAddAtomic(+1, &pmap->stats.device);
1162 PMAP_STATS_PEAK(pmap->stats.device);
1163 #endif
1164 }
1165 }
1166 /*
1167 * Step 3) Enter the mapping.
1168 *
1169 * Build a template to speed up entering -
1170 * only the pfn changes.
1171 */
1172 template = pa_to_pte(pa);
1173
1174 if (!is_ept) {
1175 template |= INTEL_PTE_VALID;
1176 } else {
1177 template |= INTEL_EPT_IPAT;
1178 }
1179
1180 /*
1181 * DRK: It may be worth asserting on cache attribute flags that diverge
1182 * from the existing physical page attributes.
1183 */
1184
1185 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1186
1187 /*
1188 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1189 */
1190 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
1191 if (!(flags & VM_MEM_GUARDED)) {
1192 template |= INTEL_PTE_PAT;
1193 }
1194 template |= INTEL_PTE_NCACHE;
1195 }
1196 if (pmap != kernel_pmap && !is_ept) {
1197 template |= INTEL_PTE_USER;
1198 }
1199 if (prot & VM_PROT_READ) {
1200 template |= PTE_READ(is_ept);
1201 }
1202 if (prot & VM_PROT_WRITE) {
1203 template |= PTE_WRITE(is_ept);
1204 if (is_ept && !pmap_ept_support_ad) {
1205 template |= PTE_MOD(is_ept);
1206 if (IS_MANAGED_PAGE(pai)) {
1207 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
1208 }
1209 }
1210 }
1211 if (prot & VM_PROT_EXECUTE) {
1212 assert(set_NX == 0);
1213 template = pte_set_ex(template, is_ept);
1214 }
1215 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
1216 assert(set_NX == 0);
1217 template = pte_set_uex(template);
1218 }
1219
1220 if (set_NX) {
1221 template = pte_remove_ex(template, is_ept);
1222 }
1223 if (wired) {
1224 template |= INTEL_PTE_WIRED;
1225 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1226 }
1227 if (__improbable(superpage)) {
1228 template |= INTEL_PTE_PS;
1229 }
1230
1231 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1232 if (__improbable(is_ept && !pmap_ept_support_ad)) {
1233 template |= PTE_REF(is_ept);
1234 if (IS_MANAGED_PAGE(pai)) {
1235 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
1236 }
1237 }
1238 template |= PTE_LOCK(is_ept);
1239 pmap_store_pte(is_ept, pte, template);
1240 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, 0, uint64_t, template);
1241
1242 /*
1243 * if this was a managed page we delayed unlocking the pv until here
1244 * to prevent pmap_page_protect et al from finding it until the pte
1245 * has been stored
1246 */
1247 if (IS_MANAGED_PAGE(pai)) {
1248 UNLOCK_PVH(pai);
1249 }
1250 done2:
1251 if (need_tlbflush == TRUE) {
1252 if (options & PMAP_OPTIONS_NOFLUSH) {
1253 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1254 } else {
1255 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1256 }
1257 }
1258 if (ptelocked) {
1259 PTE_LOCK_UNLOCK(pte);
1260 }
1261 PMAP_UNLOCK_SHARED(pmap);
1262
1263 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1264 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1265 }
1266 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1267 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1268 }
1269
1270 if (delpage_pm_obj) {
1271 vm_page_t m;
1272
1273 vm_object_lock(delpage_pm_obj);
1274 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1275 if (m == VM_PAGE_NULL) {
1276 panic("pmap_enter: pte page not in object");
1277 }
1278 VM_PAGE_FREE(m);
1279 vm_object_unlock(delpage_pm_obj);
1280 OSAddAtomic(-1, &inuse_ptepages_count);
1281 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1282 }
1283
1284 kr = KERN_SUCCESS;
1285 done1:
1286 if (__improbable((kr == KERN_SUCCESS) && (pmap == kernel_pmap) &&
1287 zone_spans_ro_va(vaddr, vaddr + PAGE_SIZE))) {
1288 pmap_page_protect((ppnum_t)atop_kernel(kvtophys(vaddr)), VM_PROT_READ);
1289 }
1290 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
1291 return kr;
1292 }
1293
1294 /*
1295 * Remove a range of hardware page-table entries.
1296 * The entries given are the first (inclusive)
1297 * and last (exclusive) entries for the VM pages.
1298 * The virtual address is the va for the first pte.
1299 *
1300 * The pmap must be locked.
1301 * If the pmap is not the kernel pmap, the range must lie
1302 * entirely within one pte-page. This is NOT checked.
1303 * Assumes that the pte-page exists.
1304 */
1305
1306 void
pmap_remove_range(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte)1307 pmap_remove_range(
1308 pmap_t pmap,
1309 vm_map_offset_t start_vaddr,
1310 pt_entry_t *spte,
1311 pt_entry_t *epte)
1312 {
1313 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1314 PMAP_OPTIONS_REMOVE);
1315 }
1316
1317 static void
pmap_remove_range_options(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte,int options)1318 pmap_remove_range_options(
1319 pmap_t pmap,
1320 vm_map_offset_t start_vaddr,
1321 pt_entry_t *spte,
1322 pt_entry_t *epte,
1323 int options)
1324 {
1325 pt_entry_t *cpte;
1326 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1327 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1328 pv_hashed_entry_t pvh_e;
1329 int pvh_cnt = 0;
1330 int num_removed, num_unwired, num_found, num_invalid;
1331 int ledgers_external, ledgers_reusable, ledgers_internal, ledgers_alt_internal;
1332 uint64_t ledgers_compressed, ledgers_alt_compressed;
1333 ppnum_t pai;
1334 pmap_paddr_t pa;
1335 vm_map_offset_t vaddr;
1336 boolean_t is_ept = is_ept_pmap(pmap);
1337 boolean_t was_altacct;
1338
1339 num_removed = 0;
1340 num_unwired = 0;
1341 num_found = 0;
1342 num_invalid = 0;
1343 ledgers_external = 0;
1344 ledgers_reusable = 0;
1345 ledgers_internal = 0;
1346 ledgers_compressed = 0;
1347 ledgers_alt_internal = 0;
1348 ledgers_alt_compressed = 0;
1349
1350 /* invalidate the PTEs first to "freeze" them */
1351 for (cpte = spte, vaddr = start_vaddr;
1352 cpte < epte;
1353 cpte++, vaddr += PAGE_SIZE_64) {
1354 pt_entry_t p = *cpte;
1355
1356 pa = pte_to_pa(p);
1357 if (pa == 0) {
1358 if ((options & PMAP_OPTIONS_REMOVE) &&
1359 (PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
1360 assert(pmap != kernel_pmap);
1361 /* one less "compressed"... */
1362 ledgers_compressed++;
1363 if (p & PTE_COMPRESSED_ALT) {
1364 /* ... but it used to be "ALTACCT" */
1365 ledgers_alt_compressed++;
1366 }
1367 /* clear marker(s) */
1368 /* XXX probably does not need to be atomic! */
1369 pmap_update_pte(is_ept, cpte, INTEL_PTE_COMPRESSED_MASK, 0, true);
1370 }
1371 continue;
1372 }
1373 num_found++;
1374
1375 if (iswired(p)) {
1376 num_unwired++;
1377 }
1378
1379 pai = pa_index(pa);
1380
1381 if (!IS_MANAGED_PAGE(pai)) {
1382 /*
1383 * Outside range of managed physical memory.
1384 * Just remove the mappings.
1385 */
1386 pmap_store_pte(is_ept, cpte, 0);
1387 continue;
1388 }
1389
1390 if ((p & PTE_VALID_MASK(is_ept)) == 0) {
1391 num_invalid++;
1392 }
1393
1394 /* invalidate the PTE */
1395 pmap_update_pte(is_ept, cpte, PTE_VALID_MASK(is_ept), 0, true);
1396 }
1397
1398 if (num_found == 0) {
1399 /* nothing was changed: we're done */
1400 goto update_counts;
1401 }
1402
1403 /* propagate the invalidates to other CPUs */
1404
1405 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1406
1407 for (cpte = spte, vaddr = start_vaddr;
1408 cpte < epte;
1409 cpte++, vaddr += PAGE_SIZE_64) {
1410 pa = pte_to_pa(*cpte);
1411 if (pa == 0) {
1412 check_pte_for_compressed_marker:
1413 /*
1414 * This PTE could have been replaced with a
1415 * "compressed" marker after our first "freeze"
1416 * loop above, so check again.
1417 */
1418 if ((options & PMAP_OPTIONS_REMOVE) &&
1419 (PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
1420 assert(pmap != kernel_pmap);
1421 /* one less "compressed"... */
1422 ledgers_compressed++;
1423 if (*cpte & PTE_COMPRESSED_ALT) {
1424 /* ... but it used to be "ALTACCT" */
1425 ledgers_alt_compressed++;
1426 }
1427 pmap_store_pte(is_ept, cpte, 0);
1428 }
1429 continue;
1430 }
1431
1432 pai = pa_index(pa);
1433
1434 LOCK_PVH(pai);
1435
1436 pa = pte_to_pa(*cpte);
1437 if (pa == 0) {
1438 UNLOCK_PVH(pai);
1439 goto check_pte_for_compressed_marker;
1440 }
1441
1442 /*
1443 * Remove the mapping from the pvlist for this physical page.
1444 */
1445 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1446
1447 num_removed++;
1448 /* update ledgers */
1449 if (was_altacct) {
1450 /* internal and alternate accounting */
1451 assert(IS_INTERNAL_PAGE(pai));
1452 ledgers_internal++;
1453 ledgers_alt_internal++;
1454 } else if (IS_REUSABLE_PAGE(pai)) {
1455 /* internal but reusable */
1456 assert(!was_altacct);
1457 assert(IS_INTERNAL_PAGE(pai));
1458 ledgers_reusable++;
1459 } else if (IS_INTERNAL_PAGE(pai)) {
1460 /* internal */
1461 assert(!was_altacct);
1462 assert(!IS_REUSABLE_PAGE(pai));
1463 ledgers_internal++;
1464 } else {
1465 /* not internal */
1466 ledgers_external++;
1467 }
1468
1469 /*
1470 * Get the modify and reference bits, then
1471 * nuke the entry in the page table
1472 */
1473 /* remember reference and change */
1474 if (!is_ept) {
1475 pmap_phys_attributes[pai] |=
1476 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
1477 } else {
1478 pmap_phys_attributes[pai] |=
1479 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1480 }
1481
1482 /* completely invalidate the PTE */
1483 pmap_store_pte(is_ept, cpte, 0);
1484
1485 UNLOCK_PVH(pai);
1486
1487 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1488 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1489 pvh_eh = pvh_e;
1490
1491 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1492 pvh_et = pvh_e;
1493 }
1494 pvh_cnt++;
1495 }
1496 /* We can encounter at most 'num_found' PTEs for this level
1497 * Fewer may be encountered if some were replaced by
1498 * compressed markers. No new valid PTEs can be created
1499 * since the pmap lock is held exclusively.
1500 */
1501 if (num_removed == num_found) {
1502 break;
1503 }
1504 } /* for loop */
1505
1506 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1507 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1508 }
1509 update_counts:
1510 /*
1511 * Update the counts
1512 */
1513 #if TESTING
1514 if (pmap->stats.resident_count < num_removed) {
1515 panic("pmap_remove_range: resident_count");
1516 }
1517 #endif
1518 if (num_removed) {
1519 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1520 }
1521
1522 if (pmap != kernel_pmap) {
1523 if (ledgers_external) {
1524 pmap_ledger_debit(pmap,
1525 task_ledgers.external,
1526 machine_ptob(ledgers_external));
1527 }
1528 if (ledgers_reusable) {
1529 pmap_ledger_debit(pmap,
1530 task_ledgers.reusable,
1531 machine_ptob(ledgers_reusable));
1532 }
1533 if (ledgers_internal) {
1534 pmap_ledger_debit(pmap,
1535 task_ledgers.internal,
1536 machine_ptob(ledgers_internal));
1537 }
1538 if (ledgers_compressed) {
1539 pmap_ledger_debit(pmap,
1540 task_ledgers.internal_compressed,
1541 machine_ptob(ledgers_compressed));
1542 }
1543 if (ledgers_alt_internal) {
1544 pmap_ledger_debit(pmap,
1545 task_ledgers.alternate_accounting,
1546 machine_ptob(ledgers_alt_internal));
1547 }
1548 if (ledgers_alt_compressed) {
1549 pmap_ledger_debit(pmap,
1550 task_ledgers.alternate_accounting_compressed,
1551 machine_ptob(ledgers_alt_compressed));
1552 }
1553
1554 uint64_t net_debit = (ledgers_internal - ledgers_alt_internal) + (ledgers_compressed - ledgers_alt_compressed);
1555 if (net_debit) {
1556 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(net_debit));
1557 }
1558 }
1559
1560 if (num_unwired != 0) {
1561 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1562 }
1563 return;
1564 }
1565
1566
1567 /*
1568 * Remove the given range of addresses
1569 * from the specified map.
1570 *
1571 * It is assumed that the start and end are properly
1572 * rounded to the hardware page size.
1573 */
1574 void
pmap_remove(pmap_t map,addr64_t s64,addr64_t e64)1575 pmap_remove(
1576 pmap_t map,
1577 addr64_t s64,
1578 addr64_t e64)
1579 {
1580 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1581 }
1582 #define PLCHECK_THRESHOLD (2)
1583
1584 void
pmap_remove_options(pmap_t map,addr64_t s64,addr64_t e64,int options)1585 pmap_remove_options(
1586 pmap_t map,
1587 addr64_t s64,
1588 addr64_t e64,
1589 int options)
1590 {
1591 pt_entry_t *pde;
1592 pt_entry_t *spte, *epte;
1593 addr64_t l64;
1594 uint64_t deadline = 0;
1595 boolean_t is_ept;
1596
1597 pmap_intr_assert();
1598
1599 if (map == PMAP_NULL || s64 == e64) {
1600 return;
1601 }
1602
1603 is_ept = is_ept_pmap(map);
1604
1605 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1606 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
1607 VM_KERNEL_ADDRHIDE(e64));
1608
1609 PMAP_LOCK_EXCLUSIVE(map);
1610 uint32_t traverse_count = 0;
1611
1612 while (s64 < e64) {
1613 pml4_entry_t *pml4e = pmap64_pml4(map, s64);
1614 if ((pml4e == NULL) ||
1615 ((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
1616 if (os_add_overflow(s64, NBPML4, &s64)) {
1617 /* wrap; clip s64 to e64 */
1618 s64 = e64;
1619 break;
1620 }
1621 s64 &= ~(PML4MASK);
1622 continue;
1623 }
1624 pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
1625 if ((pdpte == NULL) ||
1626 ((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
1627 if (os_add_overflow(s64, NBPDPT, &s64)) {
1628 /* wrap; clip s64 to e64 */
1629 s64 = e64;
1630 break;
1631 }
1632 s64 &= ~(PDPTMASK);
1633 continue;
1634 }
1635
1636 if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
1637 l64 = e64;
1638 } else {
1639 l64 &= ~(PDE_MAPPED_SIZE - 1);
1640
1641 if (l64 > e64) {
1642 l64 = e64;
1643 }
1644 }
1645
1646 pde = pmap_pde(map, s64);
1647
1648 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1649 if (*pde & PTE_PS) {
1650 /*
1651 * If we're removing a superpage, pmap_remove_range()
1652 * must work on level 2 instead of level 1; and we're
1653 * only passing a single level 2 entry instead of a
1654 * level 1 range.
1655 */
1656 spte = pde;
1657 epte = spte + 1; /* excluded */
1658 } else {
1659 spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
1660 spte = &spte[ptenum(s64)];
1661 epte = &spte[intel_btop(l64 - s64)];
1662 }
1663 pmap_remove_range_options(map, s64, spte, epte,
1664 options);
1665 }
1666 s64 = l64;
1667
1668 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
1669 if (deadline == 0) {
1670 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1671 } else {
1672 if (rdtsc64_nofence() > deadline) {
1673 PMAP_UNLOCK_EXCLUSIVE(map);
1674 __builtin_ia32_pause();
1675 PMAP_LOCK_EXCLUSIVE(map);
1676 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1677 }
1678 }
1679 }
1680 }
1681
1682 PMAP_UNLOCK_EXCLUSIVE(map);
1683
1684 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
1685 }
1686
1687 void
pmap_page_protect(ppnum_t pn,vm_prot_t prot)1688 pmap_page_protect(
1689 ppnum_t pn,
1690 vm_prot_t prot)
1691 {
1692 pmap_page_protect_options(pn, prot, 0, NULL);
1693 }
1694
1695 /*
1696 * Routine: pmap_page_protect_options
1697 *
1698 * Function:
1699 * Lower the permission for all mappings to a given
1700 * page.
1701 */
1702 void
pmap_page_protect_options(ppnum_t pn,vm_prot_t prot,unsigned int options,void * arg)1703 pmap_page_protect_options(
1704 ppnum_t pn,
1705 vm_prot_t prot,
1706 unsigned int options,
1707 void *arg)
1708 {
1709 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1710 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1711 pv_hashed_entry_t nexth;
1712 int pvh_cnt = 0;
1713 pv_rooted_entry_t pv_h;
1714 pv_rooted_entry_t pv_e;
1715 pv_hashed_entry_t pvh_e;
1716 pt_entry_t *pte;
1717 int pai;
1718 pmap_t pmap;
1719 boolean_t remove;
1720 pt_entry_t new_pte_value;
1721 boolean_t is_ept;
1722
1723 pmap_intr_assert();
1724 assert(pn != vm_page_fictitious_addr);
1725 if (pn == vm_page_guard_addr) {
1726 return;
1727 }
1728
1729 pai = ppn_to_pai(pn);
1730
1731 if (!IS_MANAGED_PAGE(pai)) {
1732 /*
1733 * Not a managed page.
1734 */
1735 return;
1736 }
1737
1738 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
1739
1740 /*
1741 * Determine the new protection.
1742 */
1743 switch (prot) {
1744 case VM_PROT_READ:
1745 case VM_PROT_READ | VM_PROT_EXECUTE:
1746 remove = FALSE;
1747 break;
1748 case VM_PROT_ALL:
1749 return; /* nothing to do */
1750 default:
1751 remove = TRUE;
1752 break;
1753 }
1754
1755 pv_h = pai_to_pvh(pai);
1756
1757 LOCK_PVH(pai);
1758
1759
1760 /*
1761 * Walk down PV list, if any, changing or removing all mappings.
1762 */
1763 if (pv_h->pmap == PMAP_NULL) {
1764 goto done;
1765 }
1766
1767 pv_e = pv_h;
1768 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1769
1770 do {
1771 vm_map_offset_t vaddr;
1772
1773 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1774 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1775 /* page was modified, so it will be compressed */
1776 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1777 options |= PMAP_OPTIONS_COMPRESSOR;
1778 }
1779
1780 pmap = pv_e->pmap;
1781 is_ept = is_ept_pmap(pmap);
1782 vaddr = PVE_VA(pv_e);
1783 pte = pmap_pte(pmap, vaddr);
1784
1785 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1786 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1787
1788 if (0 == pte) {
1789 panic("pmap_page_protect() "
1790 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1791 pmap, pn, vaddr);
1792 }
1793 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1794
1795 /*
1796 * Remove the mapping if new protection is NONE
1797 */
1798 if (remove) {
1799 /* Remove per-pmap wired count */
1800 if (iswired(*pte)) {
1801 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1802 }
1803
1804 if (pmap != kernel_pmap &&
1805 (options & PMAP_OPTIONS_COMPRESSOR) &&
1806 IS_INTERNAL_PAGE(pai)) {
1807 assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
1808 /* mark this PTE as having been "compressed" */
1809 new_pte_value = PTE_COMPRESSED;
1810 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1811 new_pte_value |= PTE_COMPRESSED_ALT;
1812 }
1813 } else {
1814 new_pte_value = 0;
1815 }
1816
1817 if (options & PMAP_OPTIONS_NOREFMOD) {
1818 pmap_store_pte(is_ept, pte, new_pte_value);
1819
1820 if (options & PMAP_OPTIONS_NOFLUSH) {
1821 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1822 } else {
1823 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1824 }
1825 } else {
1826 /*
1827 * Remove the mapping, collecting dirty bits.
1828 */
1829 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
1830
1831 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1832 if (!is_ept) {
1833 pmap_phys_attributes[pai] |=
1834 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1835 } else {
1836 pmap_phys_attributes[pai] |=
1837 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1838 }
1839 if ((options &
1840 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1841 IS_INTERNAL_PAGE(pai) &&
1842 (pmap_phys_attributes[pai] &
1843 PHYS_MODIFIED)) {
1844 /*
1845 * Page is actually "modified" and
1846 * will be compressed. Start
1847 * accounting for it as "compressed".
1848 */
1849 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
1850 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1851 options |= PMAP_OPTIONS_COMPRESSOR;
1852 assert(new_pte_value == 0);
1853 if (pmap != kernel_pmap) {
1854 new_pte_value = PTE_COMPRESSED;
1855 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1856 new_pte_value |= PTE_COMPRESSED_ALT;
1857 }
1858 }
1859 }
1860 pmap_store_pte(is_ept, pte, new_pte_value);
1861 }
1862
1863 #if TESTING
1864 if (pmap->stats.resident_count < 1) {
1865 panic("pmap_page_protect: resident_count");
1866 }
1867 #endif
1868 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1869
1870 /*
1871 * We only ever compress internal pages.
1872 */
1873 if (options & PMAP_OPTIONS_COMPRESSOR) {
1874 assert(IS_INTERNAL_PAGE(pai));
1875 }
1876 if (pmap != kernel_pmap) {
1877 /* update ledgers */
1878 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1879 assert(IS_INTERNAL_PAGE(pai));
1880 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1881 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1882 if (options & PMAP_OPTIONS_COMPRESSOR) {
1883 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1884 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1885 }
1886 } else if (IS_REUSABLE_PAGE(pai)) {
1887 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1888 assert(IS_INTERNAL_PAGE(pai));
1889 if (options & PMAP_OPTIONS_COMPRESSOR) {
1890 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1891 /* was not in footprint, but is now */
1892 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1893 }
1894 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
1895 } else if (IS_INTERNAL_PAGE(pai)) {
1896 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1897 assert(!IS_REUSABLE_PAGE(pai));
1898 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1899 /*
1900 * Update all stats related to physical
1901 * footprint, which only deals with
1902 * internal pages.
1903 */
1904 if (options & PMAP_OPTIONS_COMPRESSOR) {
1905 /*
1906 * This removal is only being
1907 * done so we can send this page
1908 * to the compressor; therefore
1909 * it mustn't affect total task
1910 * footprint.
1911 */
1912 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1913 } else {
1914 /*
1915 * This internal page isn't
1916 * going to the compressor,
1917 * so adjust stats to keep
1918 * phys_footprint up to date.
1919 */
1920 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1921 }
1922 } else {
1923 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
1924 }
1925 }
1926
1927 /*
1928 * Deal with the pv_rooted_entry.
1929 */
1930
1931 if (pv_e == pv_h) {
1932 /*
1933 * Fix up head later.
1934 */
1935 pv_h->pmap = PMAP_NULL;
1936 } else {
1937 /*
1938 * Delete this entry.
1939 */
1940 pv_hash_remove(pvh_e);
1941 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1942 pvh_eh = pvh_e;
1943
1944 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1945 pvh_et = pvh_e;
1946 }
1947 pvh_cnt++;
1948 }
1949 } else {
1950 /*
1951 * Write-protect, after opportunistic refmod collect
1952 */
1953 if (!is_ept) {
1954 pmap_phys_attributes[pai] |=
1955 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1956 } else {
1957 pmap_phys_attributes[pai] |=
1958 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1959 }
1960
1961 pmap_update_pte(is_ept, pte, PTE_WRITE(is_ept), 0, true);
1962 if (options & PMAP_OPTIONS_NOFLUSH) {
1963 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1964 } else {
1965 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1966 }
1967 }
1968 pvh_e = nexth;
1969 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1970
1971
1972 /*
1973 * If pv_head mapping was removed, fix it up.
1974 */
1975 if (pv_h->pmap == PMAP_NULL) {
1976 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1977
1978 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1979 pv_hash_remove(pvh_e);
1980 pv_h->pmap = pvh_e->pmap;
1981 pv_h->va_and_flags = pvh_e->va_and_flags;
1982 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1983 pvh_eh = pvh_e;
1984
1985 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1986 pvh_et = pvh_e;
1987 }
1988 pvh_cnt++;
1989 }
1990 }
1991 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1992 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1993 }
1994 done:
1995 UNLOCK_PVH(pai);
1996
1997 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
1998 }
1999
2000
2001 /*
2002 * Clear specified attribute bits.
2003 */
2004 void
phys_attribute_clear(ppnum_t pn,int bits,unsigned int options,void * arg)2005 phys_attribute_clear(
2006 ppnum_t pn,
2007 int bits,
2008 unsigned int options,
2009 void *arg)
2010 {
2011 pv_rooted_entry_t pv_h;
2012 pv_hashed_entry_t pv_e;
2013 pt_entry_t *pte = NULL;
2014 int pai;
2015 pmap_t pmap;
2016 char attributes = 0;
2017 boolean_t is_internal, is_reusable, is_altacct, is_ept;
2018 int ept_bits_to_clear;
2019 boolean_t ept_keep_global_mod = FALSE;
2020
2021 if ((bits & PHYS_MODIFIED) &&
2022 (options & PMAP_OPTIONS_NOFLUSH) &&
2023 arg == NULL) {
2024 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
2025 "should not clear 'modified' without flushing TLBs\n",
2026 pn, bits, options, arg);
2027 }
2028
2029 /* We only support converting MOD and REF bits for EPT PTEs in this function */
2030 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
2031
2032 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
2033
2034 pmap_intr_assert();
2035 assert(pn != vm_page_fictitious_addr);
2036 if (pn == vm_page_guard_addr) {
2037 return;
2038 }
2039
2040 pai = ppn_to_pai(pn);
2041
2042 if (!IS_MANAGED_PAGE(pai)) {
2043 /*
2044 * Not a managed page.
2045 */
2046 return;
2047 }
2048
2049 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
2050
2051 pv_h = pai_to_pvh(pai);
2052
2053 LOCK_PVH(pai);
2054
2055
2056 /*
2057 * Walk down PV list, clearing all modify or reference bits.
2058 * We do not have to lock the pv_list because we have
2059 * the per-pmap lock
2060 */
2061 if (pv_h->pmap != PMAP_NULL) {
2062 /*
2063 * There are some mappings.
2064 */
2065
2066 is_internal = IS_INTERNAL_PAGE(pai);
2067 is_reusable = IS_REUSABLE_PAGE(pai);
2068
2069 pv_e = (pv_hashed_entry_t)pv_h;
2070
2071 do {
2072 vm_map_offset_t va;
2073 char pte_bits;
2074
2075 pmap = pv_e->pmap;
2076 is_ept = is_ept_pmap(pmap);
2077 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
2078 va = PVE_VA(pv_e);
2079 pte_bits = 0;
2080
2081 if (bits) {
2082 pte = pmap_pte(pmap, va);
2083 /* grab ref/mod bits from this PTE */
2084 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
2085 /* propagate to page's global attributes */
2086 if (!is_ept) {
2087 attributes |= pte_bits;
2088 } else {
2089 attributes |= ept_refmod_to_physmap(pte_bits);
2090 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
2091 ept_keep_global_mod = TRUE;
2092 }
2093 }
2094 /* which bits to clear for this PTE? */
2095 if (!is_ept) {
2096 pte_bits &= bits;
2097 } else {
2098 pte_bits &= ept_bits_to_clear;
2099 }
2100 }
2101 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
2102 pte_bits |= PTE_WRITE(is_ept);
2103 }
2104
2105 /*
2106 * Clear modify and/or reference bits.
2107 */
2108 if (pte_bits) {
2109 pmap_update_pte(is_ept, pte, pte_bits, 0, true);
2110
2111 /* Ensure all processors using this translation
2112 * invalidate this TLB entry. The invalidation
2113 * *must* follow the PTE update, to ensure that
2114 * the TLB shadow of the 'D' bit (in particular)
2115 * is synchronized with the updated PTE.
2116 */
2117 if (!(options & PMAP_OPTIONS_NOFLUSH)) {
2118 /* flush TLBS now */
2119 PMAP_UPDATE_TLBS(pmap,
2120 va,
2121 va + PAGE_SIZE);
2122 } else if (arg) {
2123 /* delayed TLB flush: add "pmap" info */
2124 PMAP_UPDATE_TLBS_DELAYED(
2125 pmap,
2126 va,
2127 va + PAGE_SIZE,
2128 (pmap_flush_context *)arg);
2129 } else {
2130 /* no TLB flushing at all */
2131 }
2132 }
2133
2134 /* update pmap "reusable" stats */
2135 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2136 is_reusable &&
2137 pmap != kernel_pmap) {
2138 /* one less "reusable" */
2139 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
2140 if (is_internal) {
2141 /* one more "internal" */
2142 if (is_altacct) {
2143 /* no impact on ledgers */
2144 } else {
2145 pmap_ledger_credit(pmap,
2146 task_ledgers.internal,
2147 PAGE_SIZE);
2148 pmap_ledger_credit(
2149 pmap,
2150 task_ledgers.phys_footprint,
2151 PAGE_SIZE);
2152 }
2153 } else {
2154 /* one more "external" */
2155 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
2156 }
2157 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
2158 !is_reusable &&
2159 pmap != kernel_pmap) {
2160 /* one more "reusable" */
2161 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
2162 if (is_internal) {
2163 /* one less "internal" */
2164 if (is_altacct) {
2165 /* no impact on footprint */
2166 } else {
2167 pmap_ledger_debit(pmap,
2168 task_ledgers.internal,
2169 PAGE_SIZE);
2170 pmap_ledger_debit(
2171 pmap,
2172 task_ledgers.phys_footprint,
2173 PAGE_SIZE);
2174 }
2175 } else {
2176 /* one less "external" */
2177 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
2178 }
2179 }
2180
2181 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2182 } while (pv_e != (pv_hashed_entry_t)pv_h);
2183 }
2184 /* Opportunistic refmod collection, annulled
2185 * if both REF and MOD are being cleared.
2186 */
2187
2188 pmap_phys_attributes[pai] |= attributes;
2189
2190 if (ept_keep_global_mod) {
2191 /*
2192 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2193 * requesting that we clear the modified bit for a phys page, we need
2194 * to ensure that there are no EPT mappings for the page with the
2195 * modified bit set. If there are, we cannot clear the global modified bit.
2196 */
2197 bits &= ~PHYS_MODIFIED;
2198 }
2199 pmap_phys_attributes[pai] &= ~(bits);
2200
2201 /* update this page's "reusable" status */
2202 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2203 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2204 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2205 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2206 }
2207
2208 UNLOCK_PVH(pai);
2209
2210 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
2211 }
2212
2213 /*
2214 * Check specified attribute bits.
2215 */
2216 int
phys_attribute_test(ppnum_t pn,int bits)2217 phys_attribute_test(
2218 ppnum_t pn,
2219 int bits)
2220 {
2221 pv_rooted_entry_t pv_h;
2222 pv_hashed_entry_t pv_e;
2223 pt_entry_t *pte;
2224 int pai;
2225 pmap_t pmap;
2226 int attributes = 0;
2227 boolean_t is_ept;
2228
2229 pmap_intr_assert();
2230 assert(pn != vm_page_fictitious_addr);
2231 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
2232 if (pn == vm_page_guard_addr) {
2233 return 0;
2234 }
2235
2236 pai = ppn_to_pai(pn);
2237
2238 if (!IS_MANAGED_PAGE(pai)) {
2239 /*
2240 * Not a managed page.
2241 */
2242 return 0;
2243 }
2244
2245 /*
2246 * Fast check... if bits already collected
2247 * no need to take any locks...
2248 * if not set, we need to recheck after taking
2249 * the lock in case they got pulled in while
2250 * we were waiting for the lock
2251 */
2252 if ((pmap_phys_attributes[pai] & bits) == bits) {
2253 return bits;
2254 }
2255
2256 pv_h = pai_to_pvh(pai);
2257
2258 LOCK_PVH(pai);
2259
2260 attributes = pmap_phys_attributes[pai] & bits;
2261
2262
2263 /*
2264 * Walk down PV list, checking the mappings until we
2265 * reach the end or we've found the desired attributes.
2266 */
2267 if (attributes != bits &&
2268 pv_h->pmap != PMAP_NULL) {
2269 /*
2270 * There are some mappings.
2271 */
2272 pv_e = (pv_hashed_entry_t)pv_h;
2273 do {
2274 vm_map_offset_t va;
2275
2276 pmap = pv_e->pmap;
2277 is_ept = is_ept_pmap(pmap);
2278 va = PVE_VA(pv_e);
2279 /*
2280 * pick up modify and/or reference bits from mapping
2281 */
2282
2283 pte = pmap_pte(pmap, va);
2284 if (!is_ept) {
2285 attributes |= (int)(*pte & bits);
2286 } else {
2287 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
2288 }
2289
2290 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2291 } while ((attributes != bits) &&
2292 (pv_e != (pv_hashed_entry_t)pv_h));
2293 }
2294 pmap_phys_attributes[pai] |= attributes;
2295
2296 UNLOCK_PVH(pai);
2297 return attributes;
2298 }
2299
2300 /*
2301 * Routine: pmap_change_wiring
2302 * Function: Change the wiring attribute for a map/virtual-address
2303 * pair.
2304 * In/out conditions:
2305 * The mapping must already exist in the pmap.
2306 */
2307 void
pmap_change_wiring(pmap_t map,vm_map_offset_t vaddr,boolean_t wired)2308 pmap_change_wiring(
2309 pmap_t map,
2310 vm_map_offset_t vaddr,
2311 boolean_t wired)
2312 {
2313 pt_entry_t *pte;
2314
2315 PMAP_LOCK_SHARED(map);
2316
2317 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
2318 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2319 map, vaddr, wired);
2320 }
2321
2322 if (wired && !iswired(*pte)) {
2323 /*
2324 * wiring down mapping
2325 */
2326 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
2327 pmap_update_pte(is_ept_pmap(map), pte, 0, PTE_WIRED, false);
2328 } else if (!wired && iswired(*pte)) {
2329 /*
2330 * unwiring mapping
2331 */
2332 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
2333 pmap_update_pte(is_ept_pmap(map), pte, PTE_WIRED, 0, false);
2334 }
2335
2336 PMAP_UNLOCK_SHARED(map);
2337 }
2338
2339 /*
2340 * "Backdoor" direct map routine for early mappings.
2341 * Useful for mapping memory outside the range
2342 * Sets A, D and NC if requested
2343 */
2344
2345 vm_offset_t
pmap_map_bd(vm_offset_t virt,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int flags)2346 pmap_map_bd(
2347 vm_offset_t virt,
2348 vm_map_offset_t start_addr,
2349 vm_map_offset_t end_addr,
2350 vm_prot_t prot,
2351 unsigned int flags)
2352 {
2353 pt_entry_t template;
2354 pt_entry_t *ptep;
2355
2356 vm_offset_t base = virt;
2357 boolean_t doflush = FALSE;
2358
2359 template = pa_to_pte(start_addr)
2360 | INTEL_PTE_REF
2361 | INTEL_PTE_MOD
2362 | INTEL_PTE_WIRED
2363 | INTEL_PTE_VALID;
2364
2365 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2366 template |= INTEL_PTE_NCACHE;
2367 if (!(flags & (VM_MEM_GUARDED))) {
2368 template |= INTEL_PTE_PAT;
2369 }
2370 }
2371
2372 if ((prot & VM_PROT_EXECUTE) == 0) {
2373 template |= INTEL_PTE_NX;
2374 }
2375
2376 if (prot & VM_PROT_WRITE) {
2377 template |= INTEL_PTE_WRITE;
2378 }
2379 vm_map_offset_t caddr = start_addr;
2380 while (caddr < end_addr) {
2381 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2382 if (ptep == PT_ENTRY_NULL) {
2383 panic("pmap_map_bd: Invalid kernel address");
2384 }
2385 if (pte_to_pa(*ptep)) {
2386 doflush = TRUE;
2387 }
2388 pmap_store_pte(FALSE, ptep, template);
2389 pte_increment_pa(template);
2390 virt += PAGE_SIZE;
2391 caddr += PAGE_SIZE;
2392 }
2393 if (doflush) {
2394 pmap_tlbi_range(0, ~0ULL, true, 0);
2395 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2396 }
2397 return virt;
2398 }
2399
2400 /* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2401 * range. The aliased pagetable range is expanded if
2402 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2403 * assumes caller has stabilized the source and destination ranges. Currently
2404 * used to populate sections of the trampoline "doublemap" at CPU startup.
2405 */
2406
2407 void
pmap_alias(vm_offset_t ava,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int eoptions)2408 pmap_alias(
2409 vm_offset_t ava,
2410 vm_map_offset_t start_addr,
2411 vm_map_offset_t end_addr,
2412 vm_prot_t prot,
2413 unsigned int eoptions)
2414 {
2415 pt_entry_t prot_template, template;
2416 pt_entry_t *aptep, *sptep;
2417
2418 prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
2419 if ((prot & VM_PROT_EXECUTE) == 0) {
2420 prot_template |= INTEL_PTE_NX;
2421 }
2422
2423 if (prot & VM_PROT_WRITE) {
2424 prot_template |= INTEL_PTE_WRITE;
2425 }
2426 assert(((start_addr | end_addr) & PAGE_MASK) == 0);
2427 while (start_addr < end_addr) {
2428 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2429 if (aptep == PT_ENTRY_NULL) {
2430 if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
2431 pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
2432 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2433 } else {
2434 panic("pmap_alias: Invalid alias address");
2435 }
2436 }
2437 /* The aliased range should not have any active mappings */
2438 assert(pte_to_pa(*aptep) == 0);
2439
2440 sptep = pmap_pte(kernel_pmap, start_addr);
2441 assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
2442 template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
2443 pmap_store_pte(FALSE, aptep, template);
2444
2445 ava += PAGE_SIZE;
2446 start_addr += PAGE_SIZE;
2447 }
2448 }
2449
2450 mach_vm_size_t
pmap_query_resident(pmap_t pmap,addr64_t s64,addr64_t e64,mach_vm_size_t * compressed_bytes_p)2451 pmap_query_resident(
2452 pmap_t pmap,
2453 addr64_t s64,
2454 addr64_t e64,
2455 mach_vm_size_t *compressed_bytes_p)
2456 {
2457 pt_entry_t *pde;
2458 pt_entry_t *spte, *epte;
2459 addr64_t l64;
2460 uint64_t deadline = 0;
2461 mach_vm_size_t resident_bytes;
2462 mach_vm_size_t compressed_bytes;
2463 boolean_t is_ept;
2464
2465 pmap_intr_assert();
2466
2467 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2468 if (compressed_bytes_p) {
2469 *compressed_bytes_p = 0;
2470 }
2471 return 0;
2472 }
2473
2474 is_ept = is_ept_pmap(pmap);
2475
2476 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2477 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
2478 VM_KERNEL_ADDRHIDE(e64));
2479
2480 resident_bytes = 0;
2481 compressed_bytes = 0;
2482
2483 PMAP_LOCK_EXCLUSIVE(pmap);
2484 uint32_t traverse_count = 0;
2485
2486 while (s64 < e64) {
2487 if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
2488 l64 = e64;
2489 } else {
2490 l64 &= ~(PDE_MAPPED_SIZE - 1);
2491
2492 if (l64 > e64) {
2493 l64 = e64;
2494 }
2495 }
2496
2497 pde = pmap_pde(pmap, s64);
2498
2499 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2500 if (*pde & PTE_PS) {
2501 /* superpage: not supported */
2502 } else {
2503 spte = pmap_pte(pmap,
2504 (s64 & ~(PDE_MAPPED_SIZE - 1)));
2505 spte = &spte[ptenum(s64)];
2506 epte = &spte[intel_btop(l64 - s64)];
2507
2508 for (; spte < epte; spte++) {
2509 if (pte_to_pa(*spte) != 0) {
2510 resident_bytes += PAGE_SIZE;
2511 } else if (*spte & PTE_COMPRESSED) {
2512 compressed_bytes += PAGE_SIZE;
2513 }
2514 }
2515 }
2516 }
2517 s64 = l64;
2518
2519 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
2520 if (deadline == 0) {
2521 deadline = rdtsc64() + max_preemption_latency_tsc;
2522 } else {
2523 if (rdtsc64() > deadline) {
2524 PMAP_UNLOCK_EXCLUSIVE(pmap);
2525 __builtin_ia32_pause();
2526 PMAP_LOCK_EXCLUSIVE(pmap);
2527 deadline = rdtsc64() + max_preemption_latency_tsc;
2528 }
2529 }
2530 }
2531 }
2532
2533 PMAP_UNLOCK_EXCLUSIVE(pmap);
2534
2535 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2536 resident_bytes);
2537
2538 if (compressed_bytes_p) {
2539 *compressed_bytes_p = compressed_bytes;
2540 }
2541 return resident_bytes;
2542 }
2543
2544 uint64_t pmap_query_page_info_retries;
2545
2546 kern_return_t
pmap_query_page_info(pmap_t pmap,vm_map_offset_t va,int * disp_p)2547 pmap_query_page_info(
2548 pmap_t pmap,
2549 vm_map_offset_t va,
2550 int *disp_p)
2551 {
2552 int disp;
2553 boolean_t is_ept;
2554 pmap_paddr_t pa;
2555 ppnum_t pai;
2556 pd_entry_t *pde_p;
2557 pt_entry_t *pte_p, pte;
2558
2559 pmap_intr_assert();
2560 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2561 *disp_p = 0;
2562 return KERN_INVALID_ARGUMENT;
2563 }
2564
2565 disp = 0;
2566 is_ept = is_ept_pmap(pmap);
2567
2568 PMAP_LOCK_EXCLUSIVE(pmap);
2569
2570 pde_p = pmap_pde(pmap, va);
2571 if (!pde_p ||
2572 !(*pde_p & PTE_VALID_MASK(is_ept)) ||
2573 (*pde_p & PTE_PS)) {
2574 goto done;
2575 }
2576
2577 try_again:
2578 disp = 0;
2579
2580 pte_p = pmap_pte(pmap, va);
2581 if (pte_p == PT_ENTRY_NULL) {
2582 goto done;
2583 }
2584
2585 pte = *pte_p;
2586 pa = pte_to_pa(pte);
2587 if (pa == 0) {
2588 if (PTE_IS_COMPRESSED(pte, pte_p, pmap, va)) {
2589 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2590 if (pte & PTE_COMPRESSED_ALT) {
2591 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2592 }
2593 }
2594 } else {
2595 disp |= PMAP_QUERY_PAGE_PRESENT;
2596 pai = pa_index(pa);
2597 if (!IS_MANAGED_PAGE(pai)) {
2598 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2599 assert(IS_INTERNAL_PAGE(pai));
2600 disp |= PMAP_QUERY_PAGE_INTERNAL;
2601 disp |= PMAP_QUERY_PAGE_ALTACCT;
2602 } else if (IS_REUSABLE_PAGE(pai)) {
2603 disp |= PMAP_QUERY_PAGE_REUSABLE;
2604 } else if (IS_INTERNAL_PAGE(pai)) {
2605 disp |= PMAP_QUERY_PAGE_INTERNAL;
2606 }
2607 }
2608 if (__improbable(pte_p != pmap_pte(pmap, va) || pte != *pte_p)) {
2609 /* something changed: try again */
2610 pmap_query_page_info_retries++;
2611 goto try_again;
2612 }
2613 done:
2614 PMAP_UNLOCK_EXCLUSIVE(pmap);
2615 *disp_p = disp;
2616 return KERN_SUCCESS;
2617 }
2618
2619 void
pmap_set_vm_map_cs_enforced(pmap_t pmap,bool new_value)2620 pmap_set_vm_map_cs_enforced(
2621 pmap_t pmap,
2622 bool new_value)
2623 {
2624 PMAP_LOCK_EXCLUSIVE(pmap);
2625 pmap->pm_vm_map_cs_enforced = new_value;
2626 PMAP_UNLOCK_EXCLUSIVE(pmap);
2627 }
2628 extern int cs_process_enforcement_enable;
2629 bool
pmap_get_vm_map_cs_enforced(pmap_t pmap)2630 pmap_get_vm_map_cs_enforced(
2631 pmap_t pmap)
2632 {
2633 if (cs_process_enforcement_enable) {
2634 return true;
2635 }
2636 return pmap->pm_vm_map_cs_enforced;
2637 }
2638
2639 void
pmap_set_jit_entitled(__unused pmap_t pmap)2640 pmap_set_jit_entitled(__unused pmap_t pmap)
2641 {
2642 /* The x86 pmap layer does not care if a map has a JIT entry. */
2643 return;
2644 }
2645
2646 bool
pmap_get_jit_entitled(__unused pmap_t pmap)2647 pmap_get_jit_entitled(__unused pmap_t pmap)
2648 {
2649 /* The x86 pmap layer does not care if a map is using JIT. */
2650 return false;
2651 }
2652
2653 void
pmap_set_tpro(__unused pmap_t pmap)2654 pmap_set_tpro(__unused pmap_t pmap)
2655 {
2656 /* The x86 pmap layer does not care if a map is using TPRO */
2657 return;
2658 }
2659
2660 bool
pmap_get_tpro(__unused pmap_t pmap)2661 pmap_get_tpro(__unused pmap_t pmap)
2662 {
2663 /* The x86 pmap layer does not care if a map is using TPRO */
2664 return false;
2665 }
2666
2667 bool
pmap_has_prot_policy(__unused pmap_t pmap,__unused bool translated_allow_execute,__unused vm_prot_t prot)2668 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
2669 {
2670 /*
2671 * The x86 pmap layer does not apply any policy to any protection
2672 * types.
2673 */
2674 return false;
2675 }
2676
2677 uint64_t
pmap_release_pages_fast(void)2678 pmap_release_pages_fast(void)
2679 {
2680 return 0;
2681 }
2682
2683 void
pmap_trim(__unused pmap_t grand,__unused pmap_t subord,__unused addr64_t vstart,__unused uint64_t size)2684 pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused uint64_t size)
2685 {
2686 return;
2687 }
2688
2689 __dead2
2690 void
pmap_ledger_verify_size(size_t size)2691 pmap_ledger_verify_size(size_t size)
2692 {
2693 panic("%s: unsupported, "
2694 "size=%lu",
2695 __func__, size);
2696 }
2697
2698 __dead2
2699 ledger_t
pmap_ledger_alloc(void)2700 pmap_ledger_alloc(void)
2701 {
2702 panic("%s: unsupported",
2703 __func__);
2704 }
2705
2706 __dead2
2707 void
pmap_ledger_free(ledger_t ledger)2708 pmap_ledger_free(ledger_t ledger)
2709 {
2710 panic("%s: unsupported, "
2711 "ledger=%p",
2712 __func__, ledger);
2713 }
2714
2715 kern_return_t
pmap_dump_page_tables(pmap_t pmap __unused,void * bufp __unused,void * buf_end __unused,unsigned int level_mask __unused,size_t * bytes_copied __unused)2716 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
2717 unsigned int level_mask __unused, size_t *bytes_copied __unused)
2718 {
2719 return KERN_NOT_SUPPORTED;
2720 }
2721
2722 void *
pmap_map_compressor_page(ppnum_t pn)2723 pmap_map_compressor_page(ppnum_t pn)
2724 {
2725 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
2726 return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
2727 }
2728
2729 void
pmap_unmap_compressor_page(ppnum_t pn __unused,void * kva __unused)2730 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
2731 {
2732 }
2733
2734 bool
pmap_clear_refmod_range_options(pmap_t pmap __unused,vm_map_address_t start __unused,vm_map_address_t end __unused,unsigned int mask __unused,unsigned int options __unused)2735 pmap_clear_refmod_range_options(
2736 pmap_t pmap __unused,
2737 vm_map_address_t start __unused,
2738 vm_map_address_t end __unused,
2739 unsigned int mask __unused,
2740 unsigned int options __unused)
2741 {
2742 /*
2743 * x86 doesn't have ranged tlbi instructions, and we already have
2744 * the pmap_flush_context. This operation isn't implemented.
2745 */
2746 return false;
2747 }
2748
2749 bool
pmap_supported_feature(pmap_t pmap,pmap_feature_flags_t feat)2750 pmap_supported_feature(pmap_t pmap, pmap_feature_flags_t feat)
2751 {
2752 switch (feat) {
2753 case PMAP_FEAT_UEXEC:
2754 return pmap != NULL && is_ept_pmap(pmap);
2755 default:
2756 return false;
2757 }
2758 }
2759