1 /*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_assert.h>
30
31 #include <vm/pmap.h>
32 #include <vm/vm_map_xnu.h>
33 #include <vm/vm_kern_xnu.h>
34 #include <vm/vm_page_internal.h>
35 #include <kern/ledger.h>
36 #include <kern/zalloc_internal.h>
37 #include <i386/pmap_internal.h>
38
39 void pmap_remove_range(
40 pmap_t pmap,
41 vm_map_offset_t va,
42 pt_entry_t *spte,
43 pt_entry_t *epte);
44
45 static void pmap_remove_range_options(
46 pmap_t pmap,
47 vm_map_offset_t va,
48 pt_entry_t *spte,
49 pt_entry_t *epte,
50 int options);
51
52 void pmap_reusable_range(
53 pmap_t pmap,
54 vm_map_offset_t va,
55 pt_entry_t *spte,
56 pt_entry_t *epte,
57 boolean_t reusable);
58
59 pt_entry_t *PTE_corrupted_ptr;
60
61 #if DEVELOPMENT || DEBUG
62 int pmap_inject_pte_corruption;
63 uint32_t pmap_update_clear_pte_count;
64 uint32_t pmap_update_invalid_pte_count;
65 #endif
66
67 /*
68 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
69 * on a NBPDE boundary.
70 */
71
72 uint64_t
pmap_shared_region_size_min(__unused pmap_t pmap)73 pmap_shared_region_size_min(__unused pmap_t pmap)
74 {
75 return NBPDE;
76 }
77
78 uint64_t
pmap_commpage_size_min(__unused pmap_t pmap)79 pmap_commpage_size_min(__unused pmap_t pmap)
80 {
81 return NBPDE;
82 }
83
84 /*
85 * kern_return_t pmap_nest(grand, subord, va_start, size)
86 *
87 * grand = the pmap that we will nest subord into
88 * subord = the pmap that goes into the grand
89 * va_start = start of range in pmap to be inserted
90 * size = Size of nest area (up to 16TB)
91 *
92 * Inserts a pmap into another. This is used to implement shared segments.
93 *
94 * Note that we depend upon higher level VM locks to insure that things don't change while
95 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
96 * or do 2 nests at once.
97 */
98
99 /*
100 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
101 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
102 * container and the "grand" parent. A minor optimization to consider for the
103 * future: make the "subord" truly a container rather than a full-fledged
104 * pagetable hierarchy which can be unnecessarily sparse (DRK).
105 */
106
107 kern_return_t
pmap_nest(pmap_t grand,pmap_t subord,addr64_t va_start,uint64_t size)108 pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, uint64_t size)
109 {
110 vm_map_offset_t vaddr;
111 pd_entry_t *pde, *npde;
112 unsigned int i;
113 uint64_t num_pde;
114
115 assert(!is_ept_pmap(grand));
116 assert(!is_ept_pmap(subord));
117
118 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
119 (va_start & (pmap_shared_region_size_min(grand) - 1)) ||
120 ((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
121 return KERN_INVALID_VALUE;
122 }
123
124 if (size == 0) {
125 panic("pmap_nest: size is invalid - %016llX", size);
126 }
127
128 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
129 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
130 VM_KERNEL_ADDRHIDE(va_start));
131
132 vaddr = (vm_map_offset_t)va_start;
133 num_pde = size >> PDESHIFT;
134
135 PMAP_LOCK_EXCLUSIVE(subord);
136
137 subord->pm_shared = TRUE;
138
139 for (i = 0; i < num_pde;) {
140 if (((vaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
141 npde = pmap64_pdpt(subord, vaddr);
142
143 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
144 PMAP_UNLOCK_EXCLUSIVE(subord);
145 pmap_expand_pdpt(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
146 PMAP_LOCK_EXCLUSIVE(subord);
147 npde = pmap64_pdpt(subord, vaddr);
148 }
149 *npde |= INTEL_PDPTE_NESTED;
150 vaddr += NBPDPT;
151 i += (uint32_t)NPDEPG;
152 } else {
153 npde = pmap_pde(subord, vaddr);
154
155 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
156 PMAP_UNLOCK_EXCLUSIVE(subord);
157 pmap_expand(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
158 PMAP_LOCK_EXCLUSIVE(subord);
159 npde = pmap_pde(subord, vaddr);
160 }
161 vaddr += NBPDE;
162 i++;
163 }
164 }
165
166 PMAP_UNLOCK_EXCLUSIVE(subord);
167
168 vaddr = (vm_map_offset_t)va_start;
169
170 PMAP_LOCK_EXCLUSIVE(grand);
171
172 for (i = 0; i < num_pde;) {
173 pd_entry_t tpde;
174
175 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
176 npde = pmap64_pdpt(subord, vaddr);
177 if (npde == 0) {
178 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
179 }
180 tpde = *npde;
181 pde = pmap64_pdpt(grand, vaddr);
182 if (0 == pde) {
183 PMAP_UNLOCK_EXCLUSIVE(grand);
184 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
185 PMAP_LOCK_EXCLUSIVE(grand);
186 pde = pmap64_pdpt(grand, vaddr);
187 }
188 if (pde == 0) {
189 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
190 }
191 pmap_store_pte(FALSE, pde, tpde);
192 vaddr += NBPDPT;
193 i += (uint32_t) NPDEPG;
194 } else {
195 npde = pmap_pde(subord, vaddr);
196 if (npde == 0) {
197 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
198 }
199 tpde = *npde;
200 pde = pmap_pde(grand, vaddr);
201 if (0 == pde) {
202 PMAP_UNLOCK_EXCLUSIVE(grand);
203 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
204 PMAP_LOCK_EXCLUSIVE(grand);
205 pde = pmap_pde(grand, vaddr);
206 }
207
208 if (pde == 0) {
209 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
210 }
211 vaddr += NBPDE;
212 pmap_store_pte(FALSE, pde, tpde);
213 i++;
214 }
215 }
216
217 PMAP_UNLOCK_EXCLUSIVE(grand);
218
219 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
220
221 return KERN_SUCCESS;
222 }
223
224 /*
225 * kern_return_t pmap_unnest(grand, vaddr)
226 *
227 * grand = the pmap that we will un-nest subord from
228 * vaddr = start of range in pmap to be unnested
229 *
230 * Removes a pmap from another. This is used to implement shared segments.
231 */
232
233 kern_return_t
pmap_unnest(pmap_t grand,addr64_t vaddr,uint64_t size)234 pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
235 {
236 pd_entry_t *pde;
237 unsigned int i;
238 uint64_t num_pde;
239 addr64_t va_start, va_end;
240 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
241
242 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
243 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
244
245 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
246 (vaddr & (pmap_shared_region_size_min(grand) - 1))) {
247 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...",
248 grand, vaddr, size);
249 }
250
251 assert(!is_ept_pmap(grand));
252
253 /* align everything to PDE boundaries */
254 va_start = vaddr & ~(NBPDE - 1);
255
256 if (os_add_overflow(vaddr, size + NBPDE - 1, &va_end)) {
257 panic("pmap_unnest: Overflow when calculating range end: s=0x%llx sz=0x%llx\n", vaddr, size);
258 }
259
260 va_end &= ~(NBPDE - 1);
261 size = va_end - va_start;
262
263 PMAP_LOCK_EXCLUSIVE(grand);
264
265 num_pde = size >> PDESHIFT;
266 vaddr = va_start;
267
268 for (i = 0; i < num_pde;) {
269 if (pdptnum(grand, vaddr) != npdpt) {
270 npdpt = pdptnum(grand, vaddr);
271 pde = pmap64_pdpt(grand, vaddr);
272 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
273 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
274 i += (uint32_t) NPDEPG;
275 vaddr += NBPDPT;
276 continue;
277 }
278 }
279 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
280 if (pde == 0) {
281 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
282 }
283 pmap_store_pte(FALSE, pde, (pd_entry_t)0);
284 i++;
285 vaddr += NBPDE;
286 }
287
288 PMAP_UPDATE_TLBS(grand, va_start, va_end);
289
290 PMAP_UNLOCK_EXCLUSIVE(grand);
291
292 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
293
294 return KERN_SUCCESS;
295 }
296
297 kern_return_t
pmap_unnest_options(pmap_t grand,addr64_t vaddr,__unused uint64_t size,__unused unsigned int options)298 pmap_unnest_options(
299 pmap_t grand,
300 addr64_t vaddr,
301 __unused uint64_t size,
302 __unused unsigned int options)
303 {
304 return pmap_unnest(grand, vaddr, size);
305 }
306
307 /* Invoked by the Mach VM to determine the platform specific unnest region */
308
309 boolean_t
pmap_adjust_unnest_parameters(pmap_t p,vm_map_offset_t * s,vm_map_offset_t * e)310 pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
311 {
312 pd_entry_t *pdpte;
313 boolean_t rval = FALSE;
314
315 PMAP_LOCK_EXCLUSIVE(p);
316
317 pdpte = pmap64_pdpt(p, *s);
318 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
319 *s &= ~(NBPDPT - 1);
320 rval = TRUE;
321 }
322
323 pdpte = pmap64_pdpt(p, *e);
324 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
325 *e = ((*e + NBPDPT) & ~(NBPDPT - 1));
326 rval = TRUE;
327 }
328
329 PMAP_UNLOCK_EXCLUSIVE(p);
330
331 return rval;
332 }
333
334 pmap_paddr_t
pmap_find_pa(pmap_t pmap,addr64_t va)335 pmap_find_pa(pmap_t pmap, addr64_t va)
336 {
337 pt_entry_t *ptp;
338 pd_entry_t *pdep;
339 pd_entry_t pde;
340 pt_entry_t pte;
341 boolean_t is_ept, locked = FALSE;
342 pmap_paddr_t pa = 0;
343
344 is_ept = is_ept_pmap(pmap);
345
346 if ((pmap != kernel_pmap) && not_in_kdp) {
347 PMAP_LOCK_EXCLUSIVE(pmap);
348 locked = TRUE;
349 } else {
350 mp_disable_preemption();
351 }
352
353 if (os_ref_get_count(&pmap->ref_count) == 0) {
354 goto pfp_exit;
355 }
356
357 pdep = pmap_pde(pmap, va);
358
359 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
360 if (pde & PTE_PS) {
361 pa = pte_to_pa(pde) + (va & I386_LPGMASK);
362 } else {
363 ptp = pmap_pte(pmap, va);
364 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
365 pa = pte_to_pa(pte) + (va & PAGE_MASK);
366 }
367 }
368 }
369 pfp_exit:
370 if (locked) {
371 PMAP_UNLOCK_EXCLUSIVE(pmap);
372 } else {
373 mp_enable_preemption();
374 }
375
376 return pa;
377 }
378
379 /*
380 * pmap_find_phys returns the (4K) physical page number containing a
381 * given virtual address in a given pmap.
382 * Note that pmap_pte may return a pde if this virtual address is
383 * mapped by a large page and this is taken into account in order
384 * to return the correct page number in this case.
385 */
386 ppnum_t
pmap_find_phys(pmap_t pmap,addr64_t va)387 pmap_find_phys(pmap_t pmap, addr64_t va)
388 {
389 ppnum_t ppn = 0;
390 pmap_paddr_t pa = 0;
391
392 pa = pmap_find_pa(pmap, va);
393 ppn = (ppnum_t) i386_btop(pa);
394
395 return ppn;
396 }
397
398 ppnum_t
pmap_find_phys_nofault(pmap_t pmap,addr64_t va)399 pmap_find_phys_nofault(pmap_t pmap, addr64_t va)
400 {
401 if ((pmap == kernel_pmap) ||
402 ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))) {
403 return pmap_find_phys(pmap, va);
404 }
405 return 0;
406 }
407
408 /*
409 * pmap_get_prot returns the equivalent Vm page protections
410 * set on a given address, 'va'. This function is used in the
411 * ml_static_verify_page_protections() routine which is used
412 * by the kext loading code to validate that the TEXT segment
413 * of a kext is mapped executable.
414 */
415 kern_return_t
pmap_get_prot(pmap_t pmap,addr64_t va,vm_prot_t * protp)416 pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp)
417 {
418 pt_entry_t *ptp;
419 pd_entry_t *pdep;
420 pd_entry_t pde;
421 pt_entry_t pte;
422 boolean_t is_ept, locked = FALSE;
423 kern_return_t retval = KERN_FAILURE;
424 vm_prot_t prot = 0;
425
426 is_ept = is_ept_pmap(pmap);
427
428 if ((pmap != kernel_pmap) && not_in_kdp) {
429 PMAP_LOCK_EXCLUSIVE(pmap);
430 locked = TRUE;
431 } else {
432 mp_disable_preemption();
433 }
434
435 if (os_ref_get_count(&pmap->ref_count) == 0) {
436 goto pfp_exit;
437 }
438
439 pdep = pmap_pde(pmap, va);
440
441 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
442 if (pde & PTE_PS) {
443 prot = VM_PROT_READ;
444
445 if (pde & PTE_WRITE(is_ept)) {
446 prot |= VM_PROT_WRITE;
447 }
448 if (PTE_IS_EXECUTABLE(is_ept, pde)) {
449 prot |= VM_PROT_EXECUTE;
450 }
451 retval = KERN_SUCCESS;
452 } else {
453 ptp = pmap_pte(pmap, va);
454 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
455 prot = VM_PROT_READ;
456
457 if (pte & PTE_WRITE(is_ept)) {
458 prot |= VM_PROT_WRITE;
459 }
460 if (PTE_IS_EXECUTABLE(is_ept, pte)) {
461 prot |= VM_PROT_EXECUTE;
462 }
463 retval = KERN_SUCCESS;
464 }
465 }
466 }
467
468 pfp_exit:
469 if (locked) {
470 PMAP_UNLOCK_EXCLUSIVE(pmap);
471 } else {
472 mp_enable_preemption();
473 }
474
475 if (protp) {
476 *protp = prot;
477 }
478
479 return retval;
480 }
481
482 /*
483 * Update cache attributes for all extant managed mappings.
484 * Assumes PV for this page is locked, and that the page
485 * is managed. We assume that this physical page may be mapped in
486 * both EPT and normal Intel PTEs, so we convert the attributes
487 * to the corresponding format for each pmap.
488 *
489 * We assert that the passed set of attributes is a subset of the
490 * PHYS_CACHEABILITY_MASK.
491 */
492 void
pmap_update_cache_attributes_locked(ppnum_t pn,unsigned attributes)493 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
494 {
495 pv_rooted_entry_t pv_h, pv_e;
496 pv_hashed_entry_t pvh_e, nexth;
497 vm_map_offset_t vaddr;
498 pmap_t pmap;
499 pt_entry_t *ptep;
500 boolean_t is_ept;
501 unsigned ept_attributes;
502
503 assert(IS_MANAGED_PAGE(pn));
504 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
505
506 /* We don't support the PAT bit for EPT PTEs */
507 if (attributes & INTEL_PTE_NCACHE) {
508 ept_attributes = INTEL_EPT_NCACHE;
509 } else {
510 ept_attributes = INTEL_EPT_WB;
511 }
512
513 pv_h = pai_to_pvh(pn);
514 /* TODO: translate the PHYS_* bits to PTE bits, while they're
515 * currently identical, they may not remain so
516 * Potential optimization (here and in page_protect),
517 * parallel shootdowns, check for redundant
518 * attribute modifications.
519 */
520
521 /*
522 * Alter attributes on all mappings
523 */
524 if (pv_h->pmap != PMAP_NULL) {
525 pv_e = pv_h;
526 pvh_e = (pv_hashed_entry_t)pv_e;
527
528 do {
529 pmap = pv_e->pmap;
530 vaddr = PVE_VA(pv_e);
531 ptep = pmap_pte(pmap, vaddr);
532
533 if (0 == ptep) {
534 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
535 }
536
537 is_ept = is_ept_pmap(pmap);
538
539 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
540 if (!is_ept) {
541 pmap_update_pte(is_ept, ptep, PHYS_CACHEABILITY_MASK, attributes, true);
542 } else {
543 pmap_update_pte(is_ept, ptep, INTEL_EPT_CACHE_MASK, ept_attributes, true);
544 }
545 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
546 pvh_e = nexth;
547 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
548 }
549 }
550
551 void
x86_filter_TLB_coherency_interrupts(boolean_t dofilter)552 x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
553 {
554 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
555
556 if (dofilter) {
557 CPU_CR3_MARK_INACTIVE();
558 } else {
559 CPU_CR3_MARK_ACTIVE();
560 mfence();
561 pmap_update_interrupt();
562 }
563 }
564
565
566 /*
567 * Insert the given physical page (p) at
568 * the specified virtual address (v) in the
569 * target physical map with the protection requested.
570 *
571 * If specified, the page will be wired down, meaning
572 * that the related pte cannot be reclaimed.
573 *
574 * NB: This is the only routine which MAY NOT lazy-evaluate
575 * or lose information. That is, this routine must actually
576 * insert this page into the given map NOW.
577 */
578
579 kern_return_t
pmap_enter(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired,pmap_mapping_type_t mapping_type)580 pmap_enter(
581 pmap_t pmap,
582 vm_map_offset_t vaddr,
583 ppnum_t pn,
584 vm_prot_t prot,
585 vm_prot_t fault_type,
586 unsigned int flags,
587 boolean_t wired,
588 pmap_mapping_type_t mapping_type)
589 {
590 return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL, mapping_type);
591 }
592
593 #define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
594
595 static inline void PTE_LOCK_LOCK(pt_entry_t *);
596 static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
597
598 void
PTE_LOCK_LOCK(pt_entry_t * lpte)599 PTE_LOCK_LOCK(pt_entry_t *lpte)
600 {
601 pt_entry_t pte;
602 plretry:
603 while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
604 __builtin_ia32_pause();
605 }
606 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
607 return;
608 }
609
610 goto plretry;
611 }
612
613 void
PTE_LOCK_UNLOCK(pt_entry_t * lpte)614 PTE_LOCK_UNLOCK(pt_entry_t *lpte)
615 {
616 __c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
617 }
618
619 kern_return_t
pmap_enter_options_addr(pmap_t pmap,vm_map_address_t v,pmap_paddr_t pa,vm_prot_t prot,vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,__unused void * arg,pmap_mapping_type_t mapping_type)620 pmap_enter_options_addr(
621 pmap_t pmap,
622 vm_map_address_t v,
623 pmap_paddr_t pa,
624 vm_prot_t prot,
625 vm_prot_t fault_type,
626 unsigned int flags,
627 boolean_t wired,
628 unsigned int options,
629 __unused void *arg,
630 pmap_mapping_type_t mapping_type)
631 {
632 return pmap_enter_options(pmap, v, intel_btop(pa), prot, fault_type, flags, wired, options, arg, mapping_type);
633 }
634
635 kern_return_t
pmap_enter_options(pmap_t pmap,vm_map_offset_t vaddr,ppnum_t pn,vm_prot_t prot,__unused vm_prot_t fault_type,unsigned int flags,boolean_t wired,unsigned int options,void * arg,__unused pmap_mapping_type_t mapping_type)636 pmap_enter_options(
637 pmap_t pmap,
638 vm_map_offset_t vaddr,
639 ppnum_t pn,
640 vm_prot_t prot,
641 __unused vm_prot_t fault_type,
642 unsigned int flags,
643 boolean_t wired,
644 unsigned int options,
645 void *arg,
646 __unused pmap_mapping_type_t mapping_type)
647 {
648 pt_entry_t *pte = NULL;
649 pv_rooted_entry_t pv_h;
650 ppnum_t pai;
651 pv_hashed_entry_t pvh_e;
652 pv_hashed_entry_t pvh_new;
653 pt_entry_t template;
654 pmap_paddr_t old_pa;
655 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
656 boolean_t need_tlbflush = FALSE;
657 boolean_t set_NX;
658 char oattr;
659 boolean_t old_pa_locked;
660 /* 2MiB mappings are confined to x86_64 by VM */
661 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
662 vm_object_t delpage_pm_obj = NULL;
663 uint64_t delpage_pde_index = 0;
664 pt_entry_t old_pte;
665 kern_return_t kr = KERN_FAILURE;
666 boolean_t is_ept;
667 boolean_t is_altacct;
668 boolean_t ptelocked = FALSE;
669
670 pmap_intr_assert();
671
672 if (__improbable(pmap == PMAP_NULL)) {
673 return KERN_INVALID_ARGUMENT;
674 }
675 if (__improbable(pn == vm_page_guard_addr)) {
676 return KERN_INVALID_ARGUMENT;
677 }
678
679 is_ept = is_ept_pmap(pmap);
680
681 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
682 * unused value for that scenario.
683 */
684 assert(pn != vm_page_fictitious_addr);
685
686
687 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
688 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
689 prot);
690
691 if ((prot & VM_PROT_EXECUTE) || __improbable(is_ept && (prot & VM_PROT_UEXEC))) {
692 set_NX = FALSE;
693 } else {
694 set_NX = TRUE;
695 }
696
697 #if DEVELOPMENT || DEBUG
698 if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
699 set_NX = FALSE;
700 }
701
702 if (__improbable(set_NX && (pmap == kernel_pmap) &&
703 ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
704 (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
705 set_NX = FALSE;
706 }
707 #endif
708
709 pvh_new = PV_HASHED_ENTRY_NULL;
710 Retry:
711 pvh_e = PV_HASHED_ENTRY_NULL;
712
713 PMAP_LOCK_SHARED(pmap);
714
715 /*
716 * Expand pmap to include this pte. Assume that
717 * pmap is always expanded to include enough hardware
718 * pages to map one VM page.
719 */
720 if (__improbable(superpage)) {
721 while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
722 /* need room for another pde entry */
723 PMAP_UNLOCK_SHARED(pmap);
724 kr = pmap_expand_pdpt(pmap, vaddr, options);
725 if (kr != KERN_SUCCESS) {
726 goto done1;
727 }
728 PMAP_LOCK_SHARED(pmap);
729 }
730 } else {
731 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
732 /*
733 * Must unlock to expand the pmap
734 * going to grow pde level page(s)
735 */
736 PMAP_UNLOCK_SHARED(pmap);
737 kr = pmap_expand(pmap, vaddr, options);
738 if (kr != KERN_SUCCESS) {
739 goto done1;
740 }
741 PMAP_LOCK_SHARED(pmap);
742 }
743 }
744
745 if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
746 PMAP_UNLOCK_SHARED(pmap);
747 kr = KERN_SUCCESS;
748 goto done1;
749 }
750
751 if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
752 /*
753 * There is still an empty page table mapped that
754 * was used for a previous base page mapping.
755 * Remember the PDE and the PDE index, so that we
756 * can free the page at the end of this function.
757 */
758 delpage_pde_index = pdeidx(pmap, vaddr);
759 delpage_pm_obj = pmap->pm_obj;
760 pmap_store_pte(is_ept, pte, 0);
761 }
762
763 PTE_LOCK_LOCK(pte);
764 ptelocked = TRUE;
765
766 old_pa = pte_to_pa(*pte);
767 pai = pa_index(old_pa);
768 old_pa_locked = FALSE;
769
770 if (old_pa == 0 &&
771 PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
772 /*
773 * "pmap" should be locked at this point, so this should
774 * not race with another pmap_enter() or pmap_remove_range().
775 */
776 assert(pmap != kernel_pmap);
777
778 /* one less "compressed" */
779 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
780 PAGE_SIZE);
781 if (*pte & PTE_COMPRESSED_ALT) {
782 pmap_ledger_debit(
783 pmap,
784 task_ledgers.alternate_accounting_compressed,
785 PAGE_SIZE);
786 } else {
787 /* was part of the footprint */
788 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
789 PAGE_SIZE);
790 }
791 /* marker will be cleared below */
792 }
793
794 /*
795 * if we have a previous managed page, lock the pv entry now. after
796 * we lock it, check to see if someone beat us to the lock and if so
797 * drop the lock
798 */
799 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
800 LOCK_PVH(pai);
801 old_pa_locked = TRUE;
802 old_pa = pte_to_pa(*pte);
803 if (0 == old_pa) {
804 UNLOCK_PVH(pai); /* another path beat us to it */
805 old_pa_locked = FALSE;
806 }
807 }
808
809 /*
810 * Special case if the incoming physical page is already mapped
811 * at this address.
812 */
813 if (old_pa == pa) {
814 pt_entry_t old_attributes =
815 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
816
817 /*
818 * May be changing its wired attribute or protection
819 */
820
821 template = pa_to_pte(pa);
822
823 if (__probable(!is_ept)) {
824 template |= INTEL_PTE_VALID;
825 } else {
826 template |= INTEL_EPT_IPAT;
827 }
828
829 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
830
831 /*
832 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
833 */
834 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
835 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
836 if (!(flags & VM_MEM_GUARDED)) {
837 template |= INTEL_PTE_PAT;
838 }
839 template |= INTEL_PTE_NCACHE;
840 }
841 if (pmap != kernel_pmap && !is_ept) {
842 template |= INTEL_PTE_USER;
843 }
844
845 if (prot & VM_PROT_READ) {
846 template |= PTE_READ(is_ept);
847 }
848
849 if (prot & VM_PROT_WRITE) {
850 template |= PTE_WRITE(is_ept);
851 if (is_ept && !pmap_ept_support_ad) {
852 template |= PTE_MOD(is_ept);
853 if (old_pa_locked) {
854 assert(IS_MANAGED_PAGE(pai));
855 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
856 }
857 }
858 }
859
860 if (prot & VM_PROT_EXECUTE) {
861 assert(set_NX == 0);
862 template = pte_set_ex(template, is_ept);
863 }
864
865 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
866 assert(set_NX == 0);
867 template = pte_set_uex(template);
868 }
869
870 if (set_NX) {
871 template = pte_remove_ex(template, is_ept);
872 }
873
874 if (wired) {
875 template |= PTE_WIRED;
876 if (!iswired(old_attributes)) {
877 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
878 }
879 } else {
880 if (iswired(old_attributes)) {
881 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
882 }
883 }
884
885 if (superpage) { /* this path can not be used */
886 template |= PTE_PS; /* to change the page size! */
887 }
888 if (old_attributes == template) {
889 goto dont_update_pte;
890 }
891
892 /* Determine delta, PV locked */
893 need_tlbflush =
894 ((old_attributes ^ template) != PTE_WIRED);
895
896 /* Optimisation: avoid TLB flush when adding writability */
897 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
898 if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
899 need_tlbflush = FALSE;
900 }
901 }
902
903 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
904 if (__improbable(is_ept && !pmap_ept_support_ad)) {
905 template |= PTE_REF(is_ept);
906 if (old_pa_locked) {
907 assert(IS_MANAGED_PAGE(pai));
908 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
909 }
910 }
911
912 /* store modified PTE and preserve RC bits */
913 pt_entry_t npte, opte;
914
915 assert((*pte & PTE_LOCK(is_ept)) != 0);
916
917 do {
918 opte = *pte;
919 npte = template | (opte & (PTE_REF(is_ept) |
920 PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
921 } while (!pmap_cmpx_pte(pte, opte, npte));
922
923 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, opte, uint64_t, npte);
924
925 dont_update_pte:
926 if (old_pa_locked) {
927 UNLOCK_PVH(pai);
928 old_pa_locked = FALSE;
929 }
930 goto done2;
931 }
932
933 /*
934 * Outline of code from here:
935 * 1) If va was mapped, update TLBs, remove the mapping
936 * and remove old pvlist entry.
937 * 2) Add pvlist entry for new mapping
938 * 3) Enter new mapping.
939 *
940 * If the old physical page is not managed step 1) is skipped
941 * (except for updating the TLBs), and the mapping is
942 * overwritten at step 3). If the new physical page is not
943 * managed, step 2) is skipped.
944 */
945 /* TODO: add opportunistic refmod collect */
946 if (old_pa != (pmap_paddr_t) 0) {
947 boolean_t was_altacct = FALSE;
948
949 /*
950 * Don't do anything to pages outside valid memory here.
951 * Instead convince the code that enters a new mapping
952 * to overwrite the old one.
953 */
954
955 /* invalidate the PTE */
956 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
957 /* propagate invalidate everywhere */
958 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
959 /* remember reference and change */
960 old_pte = *pte;
961 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
962 /* completely invalidate the PTE */
963 pmap_store_pte(is_ept, pte, PTE_LOCK(is_ept));
964
965 if (IS_MANAGED_PAGE(pai)) {
966 /*
967 * Remove the mapping from the pvlist for
968 * this physical page.
969 * We'll end up with either a rooted pv or a
970 * hashed pv
971 */
972 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
973 }
974
975 if (IS_MANAGED_PAGE(pai)) {
976 pmap_assert(old_pa_locked == TRUE);
977 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
978 if (pmap != kernel_pmap) {
979 /* update ledgers */
980 if (was_altacct) {
981 assert(IS_INTERNAL_PAGE(pai));
982 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
983 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
984 } else if (IS_REUSABLE_PAGE(pai)) {
985 assert(!was_altacct);
986 assert(IS_INTERNAL_PAGE(pai));
987 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
988 /* was already not in phys_footprint */
989 } else if (IS_INTERNAL_PAGE(pai)) {
990 assert(!was_altacct);
991 assert(!IS_REUSABLE_PAGE(pai));
992 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
993 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
994 } else {
995 /* not an internal page */
996 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
997 }
998 }
999 if (iswired(*pte)) {
1000 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
1001 PAGE_SIZE);
1002 }
1003
1004 if (!is_ept) {
1005 pmap_phys_attributes[pai] |= oattr;
1006 } else {
1007 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
1008 }
1009 } else {
1010 /*
1011 * old_pa is not managed.
1012 * Do removal part of accounting.
1013 */
1014
1015 if (pmap != kernel_pmap) {
1016 #if 00
1017 assert(pmap->stats.device > 0);
1018 OSAddAtomic(-1, &pmap->stats.device);
1019 #endif
1020 }
1021 if (iswired(*pte)) {
1022 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1023 }
1024 }
1025 }
1026
1027 /*
1028 * if we had a previously managed paged locked, unlock it now
1029 */
1030 if (old_pa_locked) {
1031 UNLOCK_PVH(pai);
1032 old_pa_locked = FALSE;
1033 }
1034
1035 pai = pa_index(pa); /* now working with new incoming phys page */
1036 if (IS_MANAGED_PAGE(pai)) {
1037 /*
1038 * Step 2) Enter the mapping in the PV list for this
1039 * physical page.
1040 */
1041 pv_h = pai_to_pvh(pai);
1042
1043 LOCK_PVH(pai);
1044
1045 if (pv_h->pmap == PMAP_NULL) {
1046 /*
1047 * No mappings yet, use rooted pv
1048 */
1049 pv_h->va_and_flags = vaddr;
1050 pv_h->pmap = pmap;
1051 queue_init(&pv_h->qlink);
1052
1053 if (options & PMAP_OPTIONS_INTERNAL) {
1054 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
1055 } else {
1056 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
1057 }
1058 if (options & PMAP_OPTIONS_REUSABLE) {
1059 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1060 } else {
1061 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1062 }
1063 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1064 IS_INTERNAL_PAGE(pai)) {
1065 pv_h->va_and_flags |= PVE_IS_ALTACCT;
1066 is_altacct = TRUE;
1067 } else {
1068 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
1069 is_altacct = FALSE;
1070 }
1071 } else {
1072 /*
1073 * Add new pv_hashed_entry after header.
1074 */
1075 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
1076 pvh_e = pvh_new;
1077 pvh_new = PV_HASHED_ENTRY_NULL;
1078 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
1079 PV_HASHED_ALLOC(&pvh_e);
1080 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1081 /*
1082 * the pv list is empty. if we are on
1083 * the kernel pmap we'll use one of
1084 * the special private kernel pv_e's,
1085 * else, we need to unlock
1086 * everything, zalloc a pv_e, and
1087 * restart bringing in the pv_e with
1088 * us.
1089 */
1090 if (kernel_pmap == pmap) {
1091 PV_HASHED_KERN_ALLOC(&pvh_e);
1092 } else {
1093 UNLOCK_PVH(pai);
1094 PTE_LOCK_UNLOCK(pte);
1095 PMAP_UNLOCK_SHARED(pmap);
1096 pmap_pv_throttle(pmap);
1097 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1098 goto Retry;
1099 }
1100 }
1101 }
1102
1103 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1104 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1105 }
1106
1107 pvh_e->va_and_flags = vaddr;
1108 pvh_e->pmap = pmap;
1109 pvh_e->ppn = pn;
1110 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1111 IS_INTERNAL_PAGE(pai)) {
1112 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
1113 is_altacct = TRUE;
1114 } else {
1115 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
1116 is_altacct = FALSE;
1117 }
1118 pv_hash_add(pvh_e, pv_h);
1119
1120 /*
1121 * Remember that we used the pvlist entry.
1122 */
1123 pvh_e = PV_HASHED_ENTRY_NULL;
1124 }
1125
1126 /*
1127 * only count the mapping
1128 * for 'managed memory'
1129 */
1130 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1131 if (pmap != kernel_pmap) {
1132 /* update ledgers */
1133 if (is_altacct) {
1134 /* internal but also alternate accounting */
1135 assert(IS_INTERNAL_PAGE(pai));
1136 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1137 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1138 /* alternate accounting, so not in footprint */
1139 } else if (IS_REUSABLE_PAGE(pai)) {
1140 assert(!is_altacct);
1141 assert(IS_INTERNAL_PAGE(pai));
1142 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
1143 /* internal but reusable: not in footprint */
1144 } else if (IS_INTERNAL_PAGE(pai)) {
1145 assert(!is_altacct);
1146 assert(!IS_REUSABLE_PAGE(pai));
1147 /* internal: add to footprint */
1148 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1149 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1150 } else {
1151 /* not internal: not in footprint */
1152 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
1153 }
1154 }
1155 } else if (last_managed_page == 0) {
1156 /* Account for early mappings created before "managed pages"
1157 * are determined. Consider consulting the available DRAM map.
1158 */
1159 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1160 if (pmap != kernel_pmap) {
1161 #if 00
1162 OSAddAtomic(+1, &pmap->stats.device);
1163 PMAP_STATS_PEAK(pmap->stats.device);
1164 #endif
1165 }
1166 }
1167 /*
1168 * Step 3) Enter the mapping.
1169 *
1170 * Build a template to speed up entering -
1171 * only the pfn changes.
1172 */
1173 template = pa_to_pte(pa);
1174
1175 if (!is_ept) {
1176 template |= INTEL_PTE_VALID;
1177 } else {
1178 template |= INTEL_EPT_IPAT;
1179 }
1180
1181 /*
1182 * DRK: It may be worth asserting on cache attribute flags that diverge
1183 * from the existing physical page attributes.
1184 */
1185
1186 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1187
1188 /*
1189 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1190 */
1191 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
1192 if (!(flags & VM_MEM_GUARDED)) {
1193 template |= INTEL_PTE_PAT;
1194 }
1195 template |= INTEL_PTE_NCACHE;
1196 }
1197 if (pmap != kernel_pmap && !is_ept) {
1198 template |= INTEL_PTE_USER;
1199 }
1200 if (prot & VM_PROT_READ) {
1201 template |= PTE_READ(is_ept);
1202 }
1203 if (prot & VM_PROT_WRITE) {
1204 template |= PTE_WRITE(is_ept);
1205 if (is_ept && !pmap_ept_support_ad) {
1206 template |= PTE_MOD(is_ept);
1207 if (IS_MANAGED_PAGE(pai)) {
1208 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
1209 }
1210 }
1211 }
1212 if (prot & VM_PROT_EXECUTE) {
1213 assert(set_NX == 0);
1214 template = pte_set_ex(template, is_ept);
1215 }
1216 if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
1217 assert(set_NX == 0);
1218 template = pte_set_uex(template);
1219 }
1220
1221 if (set_NX) {
1222 template = pte_remove_ex(template, is_ept);
1223 }
1224 if (wired) {
1225 template |= INTEL_PTE_WIRED;
1226 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1227 }
1228 if (__improbable(superpage)) {
1229 template |= INTEL_PTE_PS;
1230 }
1231
1232 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1233 if (__improbable(is_ept && !pmap_ept_support_ad)) {
1234 template |= PTE_REF(is_ept);
1235 if (IS_MANAGED_PAGE(pai)) {
1236 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
1237 }
1238 }
1239 template |= PTE_LOCK(is_ept);
1240 pmap_store_pte(is_ept, pte, template);
1241 DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, 0, uint64_t, template);
1242
1243 /*
1244 * if this was a managed page we delayed unlocking the pv until here
1245 * to prevent pmap_page_protect et al from finding it until the pte
1246 * has been stored
1247 */
1248 if (IS_MANAGED_PAGE(pai)) {
1249 UNLOCK_PVH(pai);
1250 }
1251 done2:
1252 if (need_tlbflush == TRUE) {
1253 if (options & PMAP_OPTIONS_NOFLUSH) {
1254 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1255 } else {
1256 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1257 }
1258 }
1259 if (ptelocked) {
1260 PTE_LOCK_UNLOCK(pte);
1261 }
1262 PMAP_UNLOCK_SHARED(pmap);
1263
1264 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1265 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1266 }
1267 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1268 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1269 }
1270
1271 if (delpage_pm_obj) {
1272 vm_page_t m;
1273
1274 vm_object_lock(delpage_pm_obj);
1275 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1276 if (m == VM_PAGE_NULL) {
1277 panic("pmap_enter: pte page not in object");
1278 }
1279 VM_PAGE_FREE(m);
1280 vm_object_unlock(delpage_pm_obj);
1281 OSAddAtomic(-1, &inuse_ptepages_count);
1282 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1283 }
1284
1285 kr = KERN_SUCCESS;
1286 done1:
1287 if (__improbable((kr == KERN_SUCCESS) && (pmap == kernel_pmap) &&
1288 zone_spans_ro_va(vaddr, vaddr + PAGE_SIZE))) {
1289 pmap_page_protect((ppnum_t)atop_kernel(kvtophys(vaddr)), VM_PROT_READ);
1290 }
1291 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
1292 return kr;
1293 }
1294
1295 /*
1296 * Remove a range of hardware page-table entries.
1297 * The entries given are the first (inclusive)
1298 * and last (exclusive) entries for the VM pages.
1299 * The virtual address is the va for the first pte.
1300 *
1301 * The pmap must be locked.
1302 * If the pmap is not the kernel pmap, the range must lie
1303 * entirely within one pte-page. This is NOT checked.
1304 * Assumes that the pte-page exists.
1305 */
1306
1307 void
pmap_remove_range(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte)1308 pmap_remove_range(
1309 pmap_t pmap,
1310 vm_map_offset_t start_vaddr,
1311 pt_entry_t *spte,
1312 pt_entry_t *epte)
1313 {
1314 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1315 PMAP_OPTIONS_REMOVE);
1316 }
1317
1318 static void
pmap_remove_range_options(pmap_t pmap,vm_map_offset_t start_vaddr,pt_entry_t * spte,pt_entry_t * epte,int options)1319 pmap_remove_range_options(
1320 pmap_t pmap,
1321 vm_map_offset_t start_vaddr,
1322 pt_entry_t *spte,
1323 pt_entry_t *epte,
1324 int options)
1325 {
1326 pt_entry_t *cpte;
1327 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1328 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1329 pv_hashed_entry_t pvh_e;
1330 int pvh_cnt = 0;
1331 int num_removed, num_unwired, num_found, num_invalid;
1332 int ledgers_external, ledgers_reusable, ledgers_internal, ledgers_alt_internal;
1333 uint64_t ledgers_compressed, ledgers_alt_compressed;
1334 ppnum_t pai;
1335 pmap_paddr_t pa;
1336 vm_map_offset_t vaddr;
1337 boolean_t is_ept = is_ept_pmap(pmap);
1338 boolean_t was_altacct;
1339
1340 num_removed = 0;
1341 num_unwired = 0;
1342 num_found = 0;
1343 num_invalid = 0;
1344 ledgers_external = 0;
1345 ledgers_reusable = 0;
1346 ledgers_internal = 0;
1347 ledgers_compressed = 0;
1348 ledgers_alt_internal = 0;
1349 ledgers_alt_compressed = 0;
1350
1351 /* invalidate the PTEs first to "freeze" them */
1352 for (cpte = spte, vaddr = start_vaddr;
1353 cpte < epte;
1354 cpte++, vaddr += PAGE_SIZE_64) {
1355 pt_entry_t p = *cpte;
1356
1357 pa = pte_to_pa(p);
1358 if (pa == 0) {
1359 if ((options & PMAP_OPTIONS_REMOVE) &&
1360 (PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
1361 assert(pmap != kernel_pmap);
1362 /* one less "compressed"... */
1363 ledgers_compressed++;
1364 if (p & PTE_COMPRESSED_ALT) {
1365 /* ... but it used to be "ALTACCT" */
1366 ledgers_alt_compressed++;
1367 }
1368 /* clear marker(s) */
1369 /* XXX probably does not need to be atomic! */
1370 pmap_update_pte(is_ept, cpte, INTEL_PTE_COMPRESSED_MASK, 0, true);
1371 }
1372 continue;
1373 }
1374 num_found++;
1375
1376 if (iswired(p)) {
1377 num_unwired++;
1378 }
1379
1380 pai = pa_index(pa);
1381
1382 if (!IS_MANAGED_PAGE(pai)) {
1383 /*
1384 * Outside range of managed physical memory.
1385 * Just remove the mappings.
1386 */
1387 pmap_store_pte(is_ept, cpte, 0);
1388 continue;
1389 }
1390
1391 if ((p & PTE_VALID_MASK(is_ept)) == 0) {
1392 num_invalid++;
1393 }
1394
1395 /* invalidate the PTE */
1396 pmap_update_pte(is_ept, cpte, PTE_VALID_MASK(is_ept), 0, true);
1397 }
1398
1399 if (num_found == 0) {
1400 /* nothing was changed: we're done */
1401 goto update_counts;
1402 }
1403
1404 /* propagate the invalidates to other CPUs */
1405
1406 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1407
1408 for (cpte = spte, vaddr = start_vaddr;
1409 cpte < epte;
1410 cpte++, vaddr += PAGE_SIZE_64) {
1411 pa = pte_to_pa(*cpte);
1412 if (pa == 0) {
1413 check_pte_for_compressed_marker:
1414 /*
1415 * This PTE could have been replaced with a
1416 * "compressed" marker after our first "freeze"
1417 * loop above, so check again.
1418 */
1419 if ((options & PMAP_OPTIONS_REMOVE) &&
1420 (PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
1421 assert(pmap != kernel_pmap);
1422 /* one less "compressed"... */
1423 ledgers_compressed++;
1424 if (*cpte & PTE_COMPRESSED_ALT) {
1425 /* ... but it used to be "ALTACCT" */
1426 ledgers_alt_compressed++;
1427 }
1428 pmap_store_pte(is_ept, cpte, 0);
1429 }
1430 continue;
1431 }
1432
1433 pai = pa_index(pa);
1434
1435 LOCK_PVH(pai);
1436
1437 pa = pte_to_pa(*cpte);
1438 if (pa == 0) {
1439 UNLOCK_PVH(pai);
1440 goto check_pte_for_compressed_marker;
1441 }
1442
1443 /*
1444 * Remove the mapping from the pvlist for this physical page.
1445 */
1446 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1447
1448 num_removed++;
1449 /* update ledgers */
1450 if (was_altacct) {
1451 /* internal and alternate accounting */
1452 assert(IS_INTERNAL_PAGE(pai));
1453 ledgers_internal++;
1454 ledgers_alt_internal++;
1455 } else if (IS_REUSABLE_PAGE(pai)) {
1456 /* internal but reusable */
1457 assert(!was_altacct);
1458 assert(IS_INTERNAL_PAGE(pai));
1459 ledgers_reusable++;
1460 } else if (IS_INTERNAL_PAGE(pai)) {
1461 /* internal */
1462 assert(!was_altacct);
1463 assert(!IS_REUSABLE_PAGE(pai));
1464 ledgers_internal++;
1465 } else {
1466 /* not internal */
1467 ledgers_external++;
1468 }
1469
1470 /*
1471 * Get the modify and reference bits, then
1472 * nuke the entry in the page table
1473 */
1474 /* remember reference and change */
1475 if (!is_ept) {
1476 pmap_phys_attributes[pai] |=
1477 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
1478 } else {
1479 pmap_phys_attributes[pai] |=
1480 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1481 }
1482
1483 /* completely invalidate the PTE */
1484 pmap_store_pte(is_ept, cpte, 0);
1485
1486 UNLOCK_PVH(pai);
1487
1488 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1489 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1490 pvh_eh = pvh_e;
1491
1492 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1493 pvh_et = pvh_e;
1494 }
1495 pvh_cnt++;
1496 }
1497 /* We can encounter at most 'num_found' PTEs for this level
1498 * Fewer may be encountered if some were replaced by
1499 * compressed markers. No new valid PTEs can be created
1500 * since the pmap lock is held exclusively.
1501 */
1502 if (num_removed == num_found) {
1503 break;
1504 }
1505 } /* for loop */
1506
1507 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1508 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1509 }
1510 update_counts:
1511 /*
1512 * Update the counts
1513 */
1514 #if TESTING
1515 if (pmap->stats.resident_count < num_removed) {
1516 panic("pmap_remove_range: resident_count");
1517 }
1518 #endif
1519 if (num_removed) {
1520 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1521 }
1522
1523 if (pmap != kernel_pmap) {
1524 if (ledgers_external) {
1525 pmap_ledger_debit(pmap,
1526 task_ledgers.external,
1527 machine_ptob(ledgers_external));
1528 }
1529 if (ledgers_reusable) {
1530 pmap_ledger_debit(pmap,
1531 task_ledgers.reusable,
1532 machine_ptob(ledgers_reusable));
1533 }
1534 if (ledgers_internal) {
1535 pmap_ledger_debit(pmap,
1536 task_ledgers.internal,
1537 machine_ptob(ledgers_internal));
1538 }
1539 if (ledgers_compressed) {
1540 pmap_ledger_debit(pmap,
1541 task_ledgers.internal_compressed,
1542 machine_ptob(ledgers_compressed));
1543 }
1544 if (ledgers_alt_internal) {
1545 pmap_ledger_debit(pmap,
1546 task_ledgers.alternate_accounting,
1547 machine_ptob(ledgers_alt_internal));
1548 }
1549 if (ledgers_alt_compressed) {
1550 pmap_ledger_debit(pmap,
1551 task_ledgers.alternate_accounting_compressed,
1552 machine_ptob(ledgers_alt_compressed));
1553 }
1554
1555 uint64_t net_debit = (ledgers_internal - ledgers_alt_internal) + (ledgers_compressed - ledgers_alt_compressed);
1556 if (net_debit) {
1557 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(net_debit));
1558 }
1559 }
1560
1561 if (num_unwired != 0) {
1562 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1563 }
1564 return;
1565 }
1566
1567
1568 /*
1569 * Remove the given range of addresses
1570 * from the specified map.
1571 *
1572 * It is assumed that the start and end are properly
1573 * rounded to the hardware page size.
1574 */
1575 void
pmap_remove(pmap_t map,addr64_t s64,addr64_t e64)1576 pmap_remove(
1577 pmap_t map,
1578 addr64_t s64,
1579 addr64_t e64)
1580 {
1581 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1582 }
1583 #define PLCHECK_THRESHOLD (2)
1584
1585 void
pmap_remove_options(pmap_t map,addr64_t s64,addr64_t e64,int options)1586 pmap_remove_options(
1587 pmap_t map,
1588 addr64_t s64,
1589 addr64_t e64,
1590 int options)
1591 {
1592 pt_entry_t *pde;
1593 pt_entry_t *spte, *epte;
1594 addr64_t l64;
1595 uint64_t deadline = 0;
1596 boolean_t is_ept;
1597
1598 pmap_intr_assert();
1599
1600 if (map == PMAP_NULL || s64 == e64) {
1601 return;
1602 }
1603
1604 is_ept = is_ept_pmap(map);
1605
1606 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1607 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
1608 VM_KERNEL_ADDRHIDE(e64));
1609
1610 PMAP_LOCK_EXCLUSIVE(map);
1611 uint32_t traverse_count = 0;
1612
1613 while (s64 < e64) {
1614 pml4_entry_t *pml4e = pmap64_pml4(map, s64);
1615 if ((pml4e == NULL) ||
1616 ((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
1617 if (os_add_overflow(s64, NBPML4, &s64)) {
1618 /* wrap; clip s64 to e64 */
1619 s64 = e64;
1620 break;
1621 }
1622 s64 &= ~(PML4MASK);
1623 continue;
1624 }
1625 pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
1626 if ((pdpte == NULL) ||
1627 ((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
1628 if (os_add_overflow(s64, NBPDPT, &s64)) {
1629 /* wrap; clip s64 to e64 */
1630 s64 = e64;
1631 break;
1632 }
1633 s64 &= ~(PDPTMASK);
1634 continue;
1635 }
1636
1637 if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
1638 l64 = e64;
1639 } else {
1640 l64 &= ~(PDE_MAPPED_SIZE - 1);
1641
1642 if (l64 > e64) {
1643 l64 = e64;
1644 }
1645 }
1646
1647 pde = pmap_pde(map, s64);
1648
1649 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1650 if (*pde & PTE_PS) {
1651 /*
1652 * If we're removing a superpage, pmap_remove_range()
1653 * must work on level 2 instead of level 1; and we're
1654 * only passing a single level 2 entry instead of a
1655 * level 1 range.
1656 */
1657 spte = pde;
1658 epte = spte + 1; /* excluded */
1659 } else {
1660 spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
1661 spte = &spte[ptenum(s64)];
1662 epte = &spte[intel_btop(l64 - s64)];
1663 }
1664 pmap_remove_range_options(map, s64, spte, epte,
1665 options);
1666 }
1667 s64 = l64;
1668
1669 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
1670 if (deadline == 0) {
1671 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1672 } else {
1673 if (rdtsc64_nofence() > deadline) {
1674 PMAP_UNLOCK_EXCLUSIVE(map);
1675 __builtin_ia32_pause();
1676 PMAP_LOCK_EXCLUSIVE(map);
1677 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1678 }
1679 }
1680 }
1681 }
1682
1683 PMAP_UNLOCK_EXCLUSIVE(map);
1684
1685 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
1686 }
1687
1688 void
pmap_page_protect(ppnum_t pn,vm_prot_t prot)1689 pmap_page_protect(
1690 ppnum_t pn,
1691 vm_prot_t prot)
1692 {
1693 pmap_page_protect_options(pn, prot, 0, NULL);
1694 }
1695
1696 /*
1697 * Routine: pmap_page_protect_options
1698 *
1699 * Function:
1700 * Lower the permission for all mappings to a given
1701 * page.
1702 */
1703 void
pmap_page_protect_options(ppnum_t pn,vm_prot_t prot,unsigned int options,void * arg)1704 pmap_page_protect_options(
1705 ppnum_t pn,
1706 vm_prot_t prot,
1707 unsigned int options,
1708 void *arg)
1709 {
1710 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1711 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1712 pv_hashed_entry_t nexth;
1713 int pvh_cnt = 0;
1714 pv_rooted_entry_t pv_h;
1715 pv_rooted_entry_t pv_e;
1716 pv_hashed_entry_t pvh_e;
1717 pt_entry_t *pte;
1718 int pai;
1719 pmap_t pmap;
1720 boolean_t remove;
1721 pt_entry_t new_pte_value;
1722 boolean_t is_ept;
1723
1724 pmap_intr_assert();
1725 assert(pn != vm_page_fictitious_addr);
1726 if (pn == vm_page_guard_addr) {
1727 return;
1728 }
1729
1730 pai = ppn_to_pai(pn);
1731
1732 if (!IS_MANAGED_PAGE(pai)) {
1733 /*
1734 * Not a managed page.
1735 */
1736 return;
1737 }
1738
1739 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
1740
1741 /*
1742 * Determine the new protection.
1743 */
1744 switch (prot) {
1745 case VM_PROT_READ:
1746 case VM_PROT_READ | VM_PROT_EXECUTE:
1747 remove = FALSE;
1748 break;
1749 case VM_PROT_ALL:
1750 return; /* nothing to do */
1751 default:
1752 remove = TRUE;
1753 break;
1754 }
1755
1756 pv_h = pai_to_pvh(pai);
1757
1758 LOCK_PVH(pai);
1759
1760
1761 /*
1762 * Walk down PV list, if any, changing or removing all mappings.
1763 */
1764 if (pv_h->pmap == PMAP_NULL) {
1765 goto done;
1766 }
1767
1768 pv_e = pv_h;
1769 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1770
1771 do {
1772 vm_map_offset_t vaddr;
1773
1774 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1775 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1776 /* page was modified, so it will be compressed */
1777 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1778 options |= PMAP_OPTIONS_COMPRESSOR;
1779 }
1780
1781 pmap = pv_e->pmap;
1782 is_ept = is_ept_pmap(pmap);
1783 vaddr = PVE_VA(pv_e);
1784 pte = pmap_pte(pmap, vaddr);
1785
1786 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1787 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1788
1789 if (0 == pte) {
1790 panic("pmap_page_protect() "
1791 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1792 pmap, pn, vaddr);
1793 }
1794 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1795
1796 /*
1797 * Remove the mapping if new protection is NONE
1798 */
1799 if (remove) {
1800 /* Remove per-pmap wired count */
1801 if (iswired(*pte)) {
1802 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1803 }
1804
1805 if (pmap != kernel_pmap &&
1806 (options & PMAP_OPTIONS_COMPRESSOR) &&
1807 IS_INTERNAL_PAGE(pai)) {
1808 assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
1809 /* mark this PTE as having been "compressed" */
1810 new_pte_value = PTE_COMPRESSED;
1811 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1812 new_pte_value |= PTE_COMPRESSED_ALT;
1813 }
1814 } else {
1815 new_pte_value = 0;
1816 }
1817
1818 if (options & PMAP_OPTIONS_NOREFMOD) {
1819 pmap_store_pte(is_ept, pte, new_pte_value);
1820
1821 if (options & PMAP_OPTIONS_NOFLUSH) {
1822 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1823 } else {
1824 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1825 }
1826 } else {
1827 /*
1828 * Remove the mapping, collecting dirty bits.
1829 */
1830 pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
1831
1832 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1833 if (!is_ept) {
1834 pmap_phys_attributes[pai] |=
1835 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1836 } else {
1837 pmap_phys_attributes[pai] |=
1838 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1839 }
1840 if ((options &
1841 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1842 IS_INTERNAL_PAGE(pai) &&
1843 (pmap_phys_attributes[pai] &
1844 PHYS_MODIFIED)) {
1845 /*
1846 * Page is actually "modified" and
1847 * will be compressed. Start
1848 * accounting for it as "compressed".
1849 */
1850 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
1851 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1852 options |= PMAP_OPTIONS_COMPRESSOR;
1853 assert(new_pte_value == 0);
1854 if (pmap != kernel_pmap) {
1855 new_pte_value = PTE_COMPRESSED;
1856 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1857 new_pte_value |= PTE_COMPRESSED_ALT;
1858 }
1859 }
1860 }
1861 pmap_store_pte(is_ept, pte, new_pte_value);
1862 }
1863
1864 #if TESTING
1865 if (pmap->stats.resident_count < 1) {
1866 panic("pmap_page_protect: resident_count");
1867 }
1868 #endif
1869 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1870
1871 /*
1872 * We only ever compress internal pages.
1873 */
1874 if (options & PMAP_OPTIONS_COMPRESSOR) {
1875 assert(IS_INTERNAL_PAGE(pai));
1876 }
1877 if (pmap != kernel_pmap) {
1878 /* update ledgers */
1879 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1880 assert(IS_INTERNAL_PAGE(pai));
1881 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1882 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1883 if (options & PMAP_OPTIONS_COMPRESSOR) {
1884 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1885 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1886 }
1887 } else if (IS_REUSABLE_PAGE(pai)) {
1888 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1889 assert(IS_INTERNAL_PAGE(pai));
1890 if (options & PMAP_OPTIONS_COMPRESSOR) {
1891 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1892 /* was not in footprint, but is now */
1893 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1894 }
1895 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
1896 } else if (IS_INTERNAL_PAGE(pai)) {
1897 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1898 assert(!IS_REUSABLE_PAGE(pai));
1899 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1900 /*
1901 * Update all stats related to physical
1902 * footprint, which only deals with
1903 * internal pages.
1904 */
1905 if (options & PMAP_OPTIONS_COMPRESSOR) {
1906 /*
1907 * This removal is only being
1908 * done so we can send this page
1909 * to the compressor; therefore
1910 * it mustn't affect total task
1911 * footprint.
1912 */
1913 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1914 } else {
1915 /*
1916 * This internal page isn't
1917 * going to the compressor,
1918 * so adjust stats to keep
1919 * phys_footprint up to date.
1920 */
1921 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1922 }
1923 } else {
1924 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
1925 }
1926 }
1927
1928 /*
1929 * Deal with the pv_rooted_entry.
1930 */
1931
1932 if (pv_e == pv_h) {
1933 /*
1934 * Fix up head later.
1935 */
1936 pv_h->pmap = PMAP_NULL;
1937 } else {
1938 /*
1939 * Delete this entry.
1940 */
1941 pv_hash_remove(pvh_e);
1942 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1943 pvh_eh = pvh_e;
1944
1945 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1946 pvh_et = pvh_e;
1947 }
1948 pvh_cnt++;
1949 }
1950 } else {
1951 /*
1952 * Write-protect, after opportunistic refmod collect
1953 */
1954 if (!is_ept) {
1955 pmap_phys_attributes[pai] |=
1956 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1957 } else {
1958 pmap_phys_attributes[pai] |=
1959 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1960 }
1961
1962 pmap_update_pte(is_ept, pte, PTE_WRITE(is_ept), 0, true);
1963 if (options & PMAP_OPTIONS_NOFLUSH) {
1964 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1965 } else {
1966 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1967 }
1968 }
1969 pvh_e = nexth;
1970 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1971
1972
1973 /*
1974 * If pv_head mapping was removed, fix it up.
1975 */
1976 if (pv_h->pmap == PMAP_NULL) {
1977 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1978
1979 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1980 pv_hash_remove(pvh_e);
1981 pv_h->pmap = pvh_e->pmap;
1982 pv_h->va_and_flags = pvh_e->va_and_flags;
1983 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1984 pvh_eh = pvh_e;
1985
1986 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1987 pvh_et = pvh_e;
1988 }
1989 pvh_cnt++;
1990 }
1991 }
1992 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1993 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1994 }
1995 done:
1996 UNLOCK_PVH(pai);
1997
1998 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
1999 }
2000
2001
2002 /*
2003 * Clear specified attribute bits.
2004 */
2005 void
phys_attribute_clear(ppnum_t pn,int bits,unsigned int options,void * arg)2006 phys_attribute_clear(
2007 ppnum_t pn,
2008 int bits,
2009 unsigned int options,
2010 void *arg)
2011 {
2012 pv_rooted_entry_t pv_h;
2013 pv_hashed_entry_t pv_e;
2014 pt_entry_t *pte = NULL;
2015 int pai;
2016 pmap_t pmap;
2017 char attributes = 0;
2018 boolean_t is_internal, is_reusable, is_altacct, is_ept;
2019 int ept_bits_to_clear;
2020 boolean_t ept_keep_global_mod = FALSE;
2021
2022 if ((bits & PHYS_MODIFIED) &&
2023 (options & PMAP_OPTIONS_NOFLUSH) &&
2024 arg == NULL) {
2025 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
2026 "should not clear 'modified' without flushing TLBs\n",
2027 pn, bits, options, arg);
2028 }
2029
2030 /* We only support converting MOD and REF bits for EPT PTEs in this function */
2031 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
2032
2033 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
2034
2035 pmap_intr_assert();
2036 assert(pn != vm_page_fictitious_addr);
2037 if (pn == vm_page_guard_addr) {
2038 return;
2039 }
2040
2041 pai = ppn_to_pai(pn);
2042
2043 if (!IS_MANAGED_PAGE(pai)) {
2044 /*
2045 * Not a managed page.
2046 */
2047 return;
2048 }
2049
2050 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
2051
2052 pv_h = pai_to_pvh(pai);
2053
2054 LOCK_PVH(pai);
2055
2056
2057 /*
2058 * Walk down PV list, clearing all modify or reference bits.
2059 * We do not have to lock the pv_list because we have
2060 * the per-pmap lock
2061 */
2062 if (pv_h->pmap != PMAP_NULL) {
2063 /*
2064 * There are some mappings.
2065 */
2066
2067 is_internal = IS_INTERNAL_PAGE(pai);
2068 is_reusable = IS_REUSABLE_PAGE(pai);
2069
2070 pv_e = (pv_hashed_entry_t)pv_h;
2071
2072 do {
2073 vm_map_offset_t va;
2074 char pte_bits;
2075
2076 pmap = pv_e->pmap;
2077 is_ept = is_ept_pmap(pmap);
2078 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
2079 va = PVE_VA(pv_e);
2080 pte_bits = 0;
2081
2082 if (bits) {
2083 pte = pmap_pte(pmap, va);
2084 /* grab ref/mod bits from this PTE */
2085 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
2086 /* propagate to page's global attributes */
2087 if (!is_ept) {
2088 attributes |= pte_bits;
2089 } else {
2090 attributes |= ept_refmod_to_physmap(pte_bits);
2091 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
2092 ept_keep_global_mod = TRUE;
2093 }
2094 }
2095 /* which bits to clear for this PTE? */
2096 if (!is_ept) {
2097 pte_bits &= bits;
2098 } else {
2099 pte_bits &= ept_bits_to_clear;
2100 }
2101 }
2102 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
2103 pte_bits |= PTE_WRITE(is_ept);
2104 }
2105
2106 /*
2107 * Clear modify and/or reference bits.
2108 */
2109 if (pte_bits) {
2110 pmap_update_pte(is_ept, pte, pte_bits, 0, true);
2111
2112 /* Ensure all processors using this translation
2113 * invalidate this TLB entry. The invalidation
2114 * *must* follow the PTE update, to ensure that
2115 * the TLB shadow of the 'D' bit (in particular)
2116 * is synchronized with the updated PTE.
2117 */
2118 if (!(options & PMAP_OPTIONS_NOFLUSH)) {
2119 /* flush TLBS now */
2120 PMAP_UPDATE_TLBS(pmap,
2121 va,
2122 va + PAGE_SIZE);
2123 } else if (arg) {
2124 /* delayed TLB flush: add "pmap" info */
2125 PMAP_UPDATE_TLBS_DELAYED(
2126 pmap,
2127 va,
2128 va + PAGE_SIZE,
2129 (pmap_flush_context *)arg);
2130 } else {
2131 /* no TLB flushing at all */
2132 }
2133 }
2134
2135 /* update pmap "reusable" stats */
2136 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2137 is_reusable &&
2138 pmap != kernel_pmap) {
2139 /* one less "reusable" */
2140 pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
2141 if (is_internal) {
2142 /* one more "internal" */
2143 if (is_altacct) {
2144 /* no impact on ledgers */
2145 } else {
2146 pmap_ledger_credit(pmap,
2147 task_ledgers.internal,
2148 PAGE_SIZE);
2149 pmap_ledger_credit(
2150 pmap,
2151 task_ledgers.phys_footprint,
2152 PAGE_SIZE);
2153 }
2154 } else {
2155 /* one more "external" */
2156 pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
2157 }
2158 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
2159 !is_reusable &&
2160 pmap != kernel_pmap) {
2161 /* one more "reusable" */
2162 pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
2163 if (is_internal) {
2164 /* one less "internal" */
2165 if (is_altacct) {
2166 /* no impact on footprint */
2167 } else {
2168 pmap_ledger_debit(pmap,
2169 task_ledgers.internal,
2170 PAGE_SIZE);
2171 pmap_ledger_debit(
2172 pmap,
2173 task_ledgers.phys_footprint,
2174 PAGE_SIZE);
2175 }
2176 } else {
2177 /* one less "external" */
2178 pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
2179 }
2180 }
2181
2182 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2183 } while (pv_e != (pv_hashed_entry_t)pv_h);
2184 }
2185 /* Opportunistic refmod collection, annulled
2186 * if both REF and MOD are being cleared.
2187 */
2188
2189 pmap_phys_attributes[pai] |= attributes;
2190
2191 if (ept_keep_global_mod) {
2192 /*
2193 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2194 * requesting that we clear the modified bit for a phys page, we need
2195 * to ensure that there are no EPT mappings for the page with the
2196 * modified bit set. If there are, we cannot clear the global modified bit.
2197 */
2198 bits &= ~PHYS_MODIFIED;
2199 }
2200 pmap_phys_attributes[pai] &= ~(bits);
2201
2202 /* update this page's "reusable" status */
2203 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2204 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2205 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2206 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2207 }
2208
2209 UNLOCK_PVH(pai);
2210
2211 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
2212 }
2213
2214 /*
2215 * Check specified attribute bits.
2216 */
2217 int
phys_attribute_test(ppnum_t pn,int bits)2218 phys_attribute_test(
2219 ppnum_t pn,
2220 int bits)
2221 {
2222 pv_rooted_entry_t pv_h;
2223 pv_hashed_entry_t pv_e;
2224 pt_entry_t *pte;
2225 int pai;
2226 pmap_t pmap;
2227 int attributes = 0;
2228 boolean_t is_ept;
2229
2230 pmap_intr_assert();
2231 assert(pn != vm_page_fictitious_addr);
2232 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
2233 if (pn == vm_page_guard_addr) {
2234 return 0;
2235 }
2236
2237 pai = ppn_to_pai(pn);
2238
2239 if (!IS_MANAGED_PAGE(pai)) {
2240 /*
2241 * Not a managed page.
2242 */
2243 return 0;
2244 }
2245
2246 /*
2247 * Fast check... if bits already collected
2248 * no need to take any locks...
2249 * if not set, we need to recheck after taking
2250 * the lock in case they got pulled in while
2251 * we were waiting for the lock
2252 */
2253 if ((pmap_phys_attributes[pai] & bits) == bits) {
2254 return bits;
2255 }
2256
2257 pv_h = pai_to_pvh(pai);
2258
2259 LOCK_PVH(pai);
2260
2261 attributes = pmap_phys_attributes[pai] & bits;
2262
2263
2264 /*
2265 * Walk down PV list, checking the mappings until we
2266 * reach the end or we've found the desired attributes.
2267 */
2268 if (attributes != bits &&
2269 pv_h->pmap != PMAP_NULL) {
2270 /*
2271 * There are some mappings.
2272 */
2273 pv_e = (pv_hashed_entry_t)pv_h;
2274 do {
2275 vm_map_offset_t va;
2276
2277 pmap = pv_e->pmap;
2278 is_ept = is_ept_pmap(pmap);
2279 va = PVE_VA(pv_e);
2280 /*
2281 * pick up modify and/or reference bits from mapping
2282 */
2283
2284 pte = pmap_pte(pmap, va);
2285 if (!is_ept) {
2286 attributes |= (int)(*pte & bits);
2287 } else {
2288 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
2289 }
2290
2291 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2292 } while ((attributes != bits) &&
2293 (pv_e != (pv_hashed_entry_t)pv_h));
2294 }
2295 pmap_phys_attributes[pai] |= attributes;
2296
2297 UNLOCK_PVH(pai);
2298 return attributes;
2299 }
2300
2301 /*
2302 * Routine: pmap_change_wiring
2303 * Function: Change the wiring attribute for a map/virtual-address
2304 * pair.
2305 * In/out conditions:
2306 * The mapping must already exist in the pmap.
2307 */
2308 void
pmap_change_wiring(pmap_t map,vm_map_offset_t vaddr,boolean_t wired)2309 pmap_change_wiring(
2310 pmap_t map,
2311 vm_map_offset_t vaddr,
2312 boolean_t wired)
2313 {
2314 pt_entry_t *pte;
2315
2316 PMAP_LOCK_SHARED(map);
2317
2318 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
2319 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2320 map, vaddr, wired);
2321 }
2322
2323 if (wired && !iswired(*pte)) {
2324 /*
2325 * wiring down mapping
2326 */
2327 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
2328 pmap_update_pte(is_ept_pmap(map), pte, 0, PTE_WIRED, false);
2329 } else if (!wired && iswired(*pte)) {
2330 /*
2331 * unwiring mapping
2332 */
2333 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
2334 pmap_update_pte(is_ept_pmap(map), pte, PTE_WIRED, 0, false);
2335 }
2336
2337 PMAP_UNLOCK_SHARED(map);
2338 }
2339
2340 /*
2341 * "Backdoor" direct map routine for early mappings.
2342 * Useful for mapping memory outside the range
2343 * Sets A, D and NC if requested
2344 */
2345
2346 vm_offset_t
pmap_map_bd(vm_offset_t virt,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int flags)2347 pmap_map_bd(
2348 vm_offset_t virt,
2349 vm_map_offset_t start_addr,
2350 vm_map_offset_t end_addr,
2351 vm_prot_t prot,
2352 unsigned int flags)
2353 {
2354 pt_entry_t template;
2355 pt_entry_t *ptep;
2356
2357 vm_offset_t base = virt;
2358 boolean_t doflush = FALSE;
2359
2360 template = pa_to_pte(start_addr)
2361 | INTEL_PTE_REF
2362 | INTEL_PTE_MOD
2363 | INTEL_PTE_WIRED
2364 | INTEL_PTE_VALID;
2365
2366 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2367 template |= INTEL_PTE_NCACHE;
2368 if (!(flags & (VM_MEM_GUARDED))) {
2369 template |= INTEL_PTE_PAT;
2370 }
2371 }
2372
2373 if ((prot & VM_PROT_EXECUTE) == 0) {
2374 template |= INTEL_PTE_NX;
2375 }
2376
2377 if (prot & VM_PROT_WRITE) {
2378 template |= INTEL_PTE_WRITE;
2379 }
2380 vm_map_offset_t caddr = start_addr;
2381 while (caddr < end_addr) {
2382 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2383 if (ptep == PT_ENTRY_NULL) {
2384 panic("pmap_map_bd: Invalid kernel address");
2385 }
2386 if (pte_to_pa(*ptep)) {
2387 doflush = TRUE;
2388 }
2389 pmap_store_pte(FALSE, ptep, template);
2390 pte_increment_pa(template);
2391 virt += PAGE_SIZE;
2392 caddr += PAGE_SIZE;
2393 }
2394 if (doflush) {
2395 pmap_tlbi_range(0, ~0ULL, true, 0);
2396 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2397 }
2398 return virt;
2399 }
2400
2401 /* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2402 * range. The aliased pagetable range is expanded if
2403 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2404 * assumes caller has stabilized the source and destination ranges. Currently
2405 * used to populate sections of the trampoline "doublemap" at CPU startup.
2406 */
2407
2408 void
pmap_alias(vm_offset_t ava,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int eoptions)2409 pmap_alias(
2410 vm_offset_t ava,
2411 vm_map_offset_t start_addr,
2412 vm_map_offset_t end_addr,
2413 vm_prot_t prot,
2414 unsigned int eoptions)
2415 {
2416 pt_entry_t prot_template, template;
2417 pt_entry_t *aptep, *sptep;
2418
2419 prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
2420 if ((prot & VM_PROT_EXECUTE) == 0) {
2421 prot_template |= INTEL_PTE_NX;
2422 }
2423
2424 if (prot & VM_PROT_WRITE) {
2425 prot_template |= INTEL_PTE_WRITE;
2426 }
2427 assert(((start_addr | end_addr) & PAGE_MASK) == 0);
2428 while (start_addr < end_addr) {
2429 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2430 if (aptep == PT_ENTRY_NULL) {
2431 if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
2432 pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
2433 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2434 } else {
2435 panic("pmap_alias: Invalid alias address");
2436 }
2437 }
2438 /* The aliased range should not have any active mappings */
2439 assert(pte_to_pa(*aptep) == 0);
2440
2441 sptep = pmap_pte(kernel_pmap, start_addr);
2442 assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
2443 template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
2444 pmap_store_pte(FALSE, aptep, template);
2445
2446 ava += PAGE_SIZE;
2447 start_addr += PAGE_SIZE;
2448 }
2449 }
2450
2451 mach_vm_size_t
pmap_query_resident(pmap_t pmap,addr64_t s64,addr64_t e64,mach_vm_size_t * compressed_bytes_p)2452 pmap_query_resident(
2453 pmap_t pmap,
2454 addr64_t s64,
2455 addr64_t e64,
2456 mach_vm_size_t *compressed_bytes_p)
2457 {
2458 pt_entry_t *pde;
2459 pt_entry_t *spte, *epte;
2460 addr64_t l64;
2461 uint64_t deadline = 0;
2462 mach_vm_size_t resident_bytes;
2463 mach_vm_size_t compressed_bytes;
2464 boolean_t is_ept;
2465
2466 pmap_intr_assert();
2467
2468 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2469 if (compressed_bytes_p) {
2470 *compressed_bytes_p = 0;
2471 }
2472 return 0;
2473 }
2474
2475 is_ept = is_ept_pmap(pmap);
2476
2477 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2478 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
2479 VM_KERNEL_ADDRHIDE(e64));
2480
2481 resident_bytes = 0;
2482 compressed_bytes = 0;
2483
2484 PMAP_LOCK_EXCLUSIVE(pmap);
2485 uint32_t traverse_count = 0;
2486
2487 while (s64 < e64) {
2488 if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
2489 l64 = e64;
2490 } else {
2491 l64 &= ~(PDE_MAPPED_SIZE - 1);
2492
2493 if (l64 > e64) {
2494 l64 = e64;
2495 }
2496 }
2497
2498 pde = pmap_pde(pmap, s64);
2499
2500 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2501 if (*pde & PTE_PS) {
2502 /* superpage: not supported */
2503 } else {
2504 spte = pmap_pte(pmap,
2505 (s64 & ~(PDE_MAPPED_SIZE - 1)));
2506 spte = &spte[ptenum(s64)];
2507 epte = &spte[intel_btop(l64 - s64)];
2508
2509 for (; spte < epte; spte++) {
2510 if (pte_to_pa(*spte) != 0) {
2511 resident_bytes += PAGE_SIZE;
2512 } else if (*spte & PTE_COMPRESSED) {
2513 compressed_bytes += PAGE_SIZE;
2514 }
2515 }
2516 }
2517 }
2518 s64 = l64;
2519
2520 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
2521 if (deadline == 0) {
2522 deadline = rdtsc64() + max_preemption_latency_tsc;
2523 } else {
2524 if (rdtsc64() > deadline) {
2525 PMAP_UNLOCK_EXCLUSIVE(pmap);
2526 __builtin_ia32_pause();
2527 PMAP_LOCK_EXCLUSIVE(pmap);
2528 deadline = rdtsc64() + max_preemption_latency_tsc;
2529 }
2530 }
2531 }
2532 }
2533
2534 PMAP_UNLOCK_EXCLUSIVE(pmap);
2535
2536 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2537 resident_bytes);
2538
2539 if (compressed_bytes_p) {
2540 *compressed_bytes_p = compressed_bytes;
2541 }
2542 return resident_bytes;
2543 }
2544
2545 uint64_t pmap_query_page_info_retries;
2546
2547 kern_return_t
pmap_query_page_info(pmap_t pmap,vm_map_offset_t va,int * disp_p)2548 pmap_query_page_info(
2549 pmap_t pmap,
2550 vm_map_offset_t va,
2551 int *disp_p)
2552 {
2553 int disp;
2554 boolean_t is_ept;
2555 pmap_paddr_t pa;
2556 ppnum_t pai;
2557 pd_entry_t *pde_p;
2558 pt_entry_t *pte_p, pte;
2559
2560 pmap_intr_assert();
2561 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2562 *disp_p = 0;
2563 return KERN_INVALID_ARGUMENT;
2564 }
2565
2566 disp = 0;
2567 is_ept = is_ept_pmap(pmap);
2568
2569 PMAP_LOCK_EXCLUSIVE(pmap);
2570
2571 pde_p = pmap_pde(pmap, va);
2572 if (!pde_p ||
2573 !(*pde_p & PTE_VALID_MASK(is_ept)) ||
2574 (*pde_p & PTE_PS)) {
2575 goto done;
2576 }
2577
2578 try_again:
2579 disp = 0;
2580
2581 pte_p = pmap_pte(pmap, va);
2582 if (pte_p == PT_ENTRY_NULL) {
2583 goto done;
2584 }
2585
2586 pte = *pte_p;
2587 pa = pte_to_pa(pte);
2588 if (pa == 0) {
2589 if (PTE_IS_COMPRESSED(pte, pte_p, pmap, va)) {
2590 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2591 if (pte & PTE_COMPRESSED_ALT) {
2592 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2593 }
2594 }
2595 } else {
2596 disp |= PMAP_QUERY_PAGE_PRESENT;
2597 pai = pa_index(pa);
2598 if (!IS_MANAGED_PAGE(pai)) {
2599 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2600 assert(IS_INTERNAL_PAGE(pai));
2601 disp |= PMAP_QUERY_PAGE_INTERNAL;
2602 disp |= PMAP_QUERY_PAGE_ALTACCT;
2603 } else if (IS_REUSABLE_PAGE(pai)) {
2604 disp |= PMAP_QUERY_PAGE_REUSABLE;
2605 } else if (IS_INTERNAL_PAGE(pai)) {
2606 disp |= PMAP_QUERY_PAGE_INTERNAL;
2607 }
2608 }
2609 if (__improbable(pte_p != pmap_pte(pmap, va) || pte != *pte_p)) {
2610 /* something changed: try again */
2611 pmap_query_page_info_retries++;
2612 goto try_again;
2613 }
2614 done:
2615 PMAP_UNLOCK_EXCLUSIVE(pmap);
2616 *disp_p = disp;
2617 return KERN_SUCCESS;
2618 }
2619
2620 void
pmap_set_vm_map_cs_enforced(pmap_t pmap,bool new_value)2621 pmap_set_vm_map_cs_enforced(
2622 pmap_t pmap,
2623 bool new_value)
2624 {
2625 PMAP_LOCK_EXCLUSIVE(pmap);
2626 pmap->pm_vm_map_cs_enforced = new_value;
2627 PMAP_UNLOCK_EXCLUSIVE(pmap);
2628 }
2629 extern int cs_process_enforcement_enable;
2630 bool
pmap_get_vm_map_cs_enforced(pmap_t pmap)2631 pmap_get_vm_map_cs_enforced(
2632 pmap_t pmap)
2633 {
2634 if (cs_process_enforcement_enable) {
2635 return true;
2636 }
2637 return pmap->pm_vm_map_cs_enforced;
2638 }
2639
2640 void
pmap_set_jit_entitled(__unused pmap_t pmap)2641 pmap_set_jit_entitled(__unused pmap_t pmap)
2642 {
2643 /* The x86 pmap layer does not care if a map has a JIT entry. */
2644 return;
2645 }
2646
2647 bool
pmap_get_jit_entitled(__unused pmap_t pmap)2648 pmap_get_jit_entitled(__unused pmap_t pmap)
2649 {
2650 /* The x86 pmap layer does not care if a map is using JIT. */
2651 return false;
2652 }
2653
2654 void
pmap_set_tpro(__unused pmap_t pmap)2655 pmap_set_tpro(__unused pmap_t pmap)
2656 {
2657 /* The x86 pmap layer does not care if a map is using TPRO */
2658 return;
2659 }
2660
2661 bool
pmap_get_tpro(__unused pmap_t pmap)2662 pmap_get_tpro(__unused pmap_t pmap)
2663 {
2664 /* The x86 pmap layer does not care if a map is using TPRO */
2665 return false;
2666 }
2667
2668 bool
pmap_has_prot_policy(__unused pmap_t pmap,__unused bool translated_allow_execute,__unused vm_prot_t prot)2669 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
2670 {
2671 /*
2672 * The x86 pmap layer does not apply any policy to any protection
2673 * types.
2674 */
2675 return false;
2676 }
2677
2678 uint64_t
pmap_release_pages_fast(void)2679 pmap_release_pages_fast(void)
2680 {
2681 return 0;
2682 }
2683
2684 void
pmap_trim(__unused pmap_t grand,__unused pmap_t subord,__unused addr64_t vstart,__unused uint64_t size)2685 pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused uint64_t size)
2686 {
2687 return;
2688 }
2689
2690 __dead2
2691 void
pmap_ledger_verify_size(size_t size)2692 pmap_ledger_verify_size(size_t size)
2693 {
2694 panic("%s: unsupported, "
2695 "size=%lu",
2696 __func__, size);
2697 }
2698
2699 __dead2
2700 ledger_t
pmap_ledger_alloc(void)2701 pmap_ledger_alloc(void)
2702 {
2703 panic("%s: unsupported",
2704 __func__);
2705 }
2706
2707 __dead2
2708 void
pmap_ledger_free(ledger_t ledger)2709 pmap_ledger_free(ledger_t ledger)
2710 {
2711 panic("%s: unsupported, "
2712 "ledger=%p",
2713 __func__, ledger);
2714 }
2715
2716 kern_return_t
pmap_dump_page_tables(pmap_t pmap __unused,void * bufp __unused,void * buf_end __unused,unsigned int level_mask __unused,size_t * bytes_copied __unused)2717 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
2718 unsigned int level_mask __unused, size_t *bytes_copied __unused)
2719 {
2720 return KERN_NOT_SUPPORTED;
2721 }
2722
2723 void *
pmap_map_compressor_page(ppnum_t pn)2724 pmap_map_compressor_page(ppnum_t pn)
2725 {
2726 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
2727 return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
2728 }
2729
2730 void
pmap_unmap_compressor_page(ppnum_t pn __unused,void * kva __unused)2731 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
2732 {
2733 }
2734
2735 bool
pmap_clear_refmod_range_options(pmap_t pmap __unused,vm_map_address_t start __unused,vm_map_address_t end __unused,unsigned int mask __unused,unsigned int options __unused)2736 pmap_clear_refmod_range_options(
2737 pmap_t pmap __unused,
2738 vm_map_address_t start __unused,
2739 vm_map_address_t end __unused,
2740 unsigned int mask __unused,
2741 unsigned int options __unused)
2742 {
2743 /*
2744 * x86 doesn't have ranged tlbi instructions, and we already have
2745 * the pmap_flush_context. This operation isn't implemented.
2746 */
2747 return false;
2748 }
2749
2750 bool
pmap_supported_feature(pmap_t pmap,pmap_feature_flags_t feat)2751 pmap_supported_feature(pmap_t pmap, pmap_feature_flags_t feat)
2752 {
2753 switch (feat) {
2754 case PMAP_FEAT_UEXEC:
2755 return pmap != NULL && is_ept_pmap(pmap);
2756 default:
2757 return false;
2758 }
2759 }
2760