1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
91 #include <string.h>
92 #include <mach_ldebug.h>
93
94 #include <libkern/OSAtomic.h>
95
96 #include <mach/machine/vm_types.h>
97
98 #include <mach/boolean.h>
99 #include <kern/thread.h>
100 #include <kern/zalloc.h>
101 #include <kern/zalloc_internal.h>
102 #include <kern/queue.h>
103 #include <kern/ledger.h>
104 #include <kern/mach_param.h>
105
106 #include <kern/spl.h>
107
108 #include <vm/pmap.h>
109 #include <vm/pmap_cs.h>
110 #include <vm/vm_map.h>
111 #include <vm/vm_kern.h>
112 #include <mach/vm_param.h>
113 #include <mach/vm_prot.h>
114 #include <vm/vm_object.h>
115 #include <vm/vm_page.h>
116
117 #include <mach/machine/vm_param.h>
118 #include <machine/thread.h>
119
120 #include <kern/misc_protos.h> /* prototyping */
121 #include <i386/misc_protos.h>
122 #include <i386/i386_lowmem.h>
123 #include <x86_64/lowglobals.h>
124
125 #include <i386/cpuid.h>
126 #include <i386/cpu_data.h>
127 #include <i386/cpu_number.h>
128 #include <i386/machine_cpu.h>
129 #include <i386/seg.h>
130 #include <i386/serial_io.h>
131 #include <i386/cpu_capabilities.h>
132 #include <i386/machine_routines.h>
133 #include <i386/proc_reg.h>
134 #include <i386/tsc.h>
135 #include <i386/pmap_internal.h>
136 #include <i386/pmap_pcid.h>
137 #if CONFIG_VMX
138 #include <i386/vmx/vmx_cpu.h>
139 #endif
140
141 #include <vm/vm_protos.h>
142 #include <san/kasan.h>
143
144 #include <i386/mp.h>
145 #include <i386/mp_desc.h>
146 #include <libkern/kernel_mach_header.h>
147
148 #include <pexpert/i386/efi.h>
149 #include <libkern/section_keywords.h>
150 #if MACH_ASSERT
151 int pmap_stats_assert = 1;
152 #endif /* MACH_ASSERT */
153
154 #ifdef IWANTTODEBUG
155 #undef DEBUG
156 #define DEBUG 1
157 #define POSTCODE_DELAY 1
158 #include <i386/postcode.h>
159 #endif /* IWANTTODEBUG */
160
161 #ifdef PMAP_DEBUG
162 #define DBG(x...) kprintf("DBG: " x)
163 #else
164 #define DBG(x...)
165 #endif
166 /* Compile time assert to ensure adjacency/alignment of per-CPU data fields used
167 * in the trampolines for kernel/user boundary TLB coherency.
168 */
169 char pmap_cpu_data_assert[(((offsetof(cpu_data_t, cpu_tlb_invalid) - offsetof(cpu_data_t, cpu_active_cr3)) == 8) && (offsetof(cpu_data_t, cpu_active_cr3) % 64 == 0)) ? 1 : -1];
170 boolean_t pmap_trace = FALSE;
171
172 boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */
173
174 #if DEVELOPMENT || DEBUG
175 int nx_enabled = 1; /* enable no-execute protection -- set during boot */
176 #else
177 const int nx_enabled = 1;
178 #endif
179
180 #if DEBUG || DEVELOPMENT
181 int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
182 int allow_stack_exec = 0; /* No apps may execute from the stack by default */
183 #else /* DEBUG || DEVELOPMENT */
184 const int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
185 const int allow_stack_exec = 0; /* No apps may execute from the stack by default */
186 #endif /* DEBUG || DEVELOPMENT */
187
188 uint64_t max_preemption_latency_tsc = 0;
189
190 pv_hashed_entry_t *pv_hash_table; /* hash lists */
191
192 uint32_t npvhashmask = 0, npvhashbuckets = 0;
193
194 pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
195 pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
196 SIMPLE_LOCK_DECLARE(pv_hashed_free_list_lock, 0);
197 SIMPLE_LOCK_DECLARE(pv_hashed_kern_free_list_lock, 0);
198 SIMPLE_LOCK_DECLARE(pv_hash_table_lock, 0);
199 SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
200
201 SECURITY_READ_ONLY_LATE(zone_t) pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
202
203 /*
204 * First and last physical addresses that we maintain any information
205 * for. Initialized to zero so that pmap operations done before
206 * pmap_init won't touch any non-existent structures.
207 */
208 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
209
210 static struct vm_object kptobj_object_store VM_PAGE_PACKED_ALIGNED;
211 static struct vm_object kpml4obj_object_store VM_PAGE_PACKED_ALIGNED;
212 static struct vm_object kpdptobj_object_store VM_PAGE_PACKED_ALIGNED;
213
214 /*
215 * Array of physical page attribites for managed pages.
216 * One byte per physical page.
217 */
218 char *pmap_phys_attributes;
219 ppnum_t last_managed_page = 0;
220
221 unsigned pmap_memory_region_count;
222 unsigned pmap_memory_region_current;
223
224 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
225
226 /*
227 * Other useful macros.
228 */
229 #define current_pmap() (vm_map_pmap(current_thread()->map))
230
231 struct pmap kernel_pmap_store;
232 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = NULL;
233 SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
234 SECURITY_READ_ONLY_LATE(zone_t) pmap_anchor_zone;
235 SECURITY_READ_ONLY_LATE(zone_t) pmap_uanchor_zone;
236 int pmap_debug = 0; /* flag for debugging prints */
237
238 unsigned int inuse_ptepages_count = 0;
239 long long alloc_ptepages_count __attribute__((aligned(8))) = 0; /* aligned for atomic access */
240 unsigned int bootstrap_wired_pages = 0;
241
242 extern long NMIPI_acks;
243
244 SECURITY_READ_ONLY_LATE(boolean_t) kernel_text_ps_4K = TRUE;
245
246 extern char end;
247
248 static int nkpt;
249
250 #if DEVELOPMENT || DEBUG
251 SECURITY_READ_ONLY_LATE(boolean_t) pmap_disable_kheap_nx = FALSE;
252 SECURITY_READ_ONLY_LATE(boolean_t) pmap_disable_kstack_nx = FALSE;
253 SECURITY_READ_ONLY_LATE(boolean_t) wpkernel = TRUE;
254 #else
255 const boolean_t wpkernel = TRUE;
256 #endif
257
258 extern long __stack_chk_guard[];
259
260 static uint64_t pmap_eptp_flags = 0;
261 boolean_t pmap_ept_support_ad = FALSE;
262
263 static void process_pmap_updates(pmap_t, bool, addr64_t, addr64_t);
264 /*
265 * Map memory at initialization. The physical addresses being
266 * mapped are not managed and are never unmapped.
267 *
268 * For now, VM is already on, we only need to map the
269 * specified memory.
270 */
271 vm_offset_t
pmap_map(vm_offset_t virt,vm_map_offset_t start_addr,vm_map_offset_t end_addr,vm_prot_t prot,unsigned int flags)272 pmap_map(
273 vm_offset_t virt,
274 vm_map_offset_t start_addr,
275 vm_map_offset_t end_addr,
276 vm_prot_t prot,
277 unsigned int flags)
278 {
279 kern_return_t kr;
280 int ps;
281
282 ps = PAGE_SIZE;
283 while (start_addr < end_addr) {
284 kr = pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
285 (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, TRUE);
286
287 if (kr != KERN_SUCCESS) {
288 panic("%s: failed pmap_enter, "
289 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
290 __FUNCTION__,
291 (void *)virt, (void *)start_addr, (void *)end_addr, prot, flags);
292 }
293
294 virt += ps;
295 start_addr += ps;
296 }
297 return virt;
298 }
299
300 extern char *first_avail;
301 extern vm_offset_t virtual_avail, virtual_end;
302 extern pmap_paddr_t avail_start, avail_end;
303 extern vm_offset_t sHIB;
304 extern vm_offset_t eHIB;
305 extern vm_offset_t stext;
306 extern vm_offset_t etext;
307 extern vm_offset_t sdata, edata;
308 extern vm_offset_t sconst, econst;
309
310 extern void *KPTphys;
311
312 boolean_t pmap_smep_enabled = FALSE;
313 boolean_t pmap_smap_enabled = FALSE;
314
315 void
pmap_cpu_init(void)316 pmap_cpu_init(void)
317 {
318 cpu_data_t *cdp = current_cpu_datap();
319
320 set_cr4(get_cr4() | CR4_PGE);
321
322 /*
323 * Initialize the per-cpu, TLB-related fields.
324 */
325 cdp->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
326 cpu_shadowp(cdp->cpu_number)->cpu_kernel_cr3 = cdp->cpu_kernel_cr3;
327 cdp->cpu_active_cr3 = kernel_pmap->pm_cr3;
328 cdp->cpu_tlb_invalid = 0;
329 cdp->cpu_task_map = TASK_MAP_64BIT;
330
331 pmap_pcid_configure();
332 if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMEP) {
333 pmap_smep_enabled = TRUE;
334 #if DEVELOPMENT || DEBUG
335 boolean_t nsmep;
336 if (PE_parse_boot_argn("-pmap_smep_disable", &nsmep, sizeof(nsmep))) {
337 pmap_smep_enabled = FALSE;
338 }
339 #endif
340 if (pmap_smep_enabled) {
341 set_cr4(get_cr4() | CR4_SMEP);
342 }
343 }
344 if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMAP) {
345 pmap_smap_enabled = TRUE;
346 #if DEVELOPMENT || DEBUG
347 boolean_t nsmap;
348 if (PE_parse_boot_argn("-pmap_smap_disable", &nsmap, sizeof(nsmap))) {
349 pmap_smap_enabled = FALSE;
350 }
351 #endif
352 if (pmap_smap_enabled) {
353 set_cr4(get_cr4() | CR4_SMAP);
354 }
355 }
356
357 #if !MONOTONIC
358 if (cdp->cpu_fixed_pmcs_enabled) {
359 boolean_t enable = TRUE;
360 cpu_pmc_control(&enable);
361 }
362 #endif /* !MONOTONIC */
363 }
364
365 static void
pmap_ro_zone_validate_element(zone_id_t zid,vm_offset_t va,vm_offset_t offset,const vm_offset_t new_data,vm_size_t new_data_size)366 pmap_ro_zone_validate_element(
367 zone_id_t zid,
368 vm_offset_t va,
369 vm_offset_t offset,
370 const vm_offset_t new_data,
371 vm_size_t new_data_size)
372 {
373 vm_size_t elem_size = zone_elem_size_ro(zid);
374 vm_offset_t sum = 0, page = trunc_page(va);
375 if (__improbable(new_data_size > (elem_size - offset))) {
376 panic("%s: New data size %lu too large for elem size %lu at addr %p",
377 __func__, (uintptr_t)new_data_size, (uintptr_t)elem_size, (void*)va);
378 }
379 if (__improbable(offset >= elem_size)) {
380 panic("%s: Offset %lu too large for elem size %lu at addr %p",
381 __func__, (uintptr_t)offset, (uintptr_t)elem_size, (void*)va);
382 }
383 if (__improbable(os_add3_overflow(va, offset, new_data_size, &sum))) {
384 panic("%s: Integer addition overflow %p + %lu + %lu = %lu",
385 __func__, (void*)va, (uintptr_t)offset, (uintptr_t) new_data_size,
386 (uintptr_t)sum);
387 }
388 if (__improbable(os_add_overflow(new_data, new_data_size, &sum))) {
389 panic("%s: Integer addition overflow %p + %lu = %lu",
390 __func__, (void*)new_data, (uintptr_t)new_data_size, (uintptr_t)sum);
391 }
392 if (__improbable((va - page) % elem_size)) {
393 panic("%s: Start of element %p is not aligned to element size %lu",
394 __func__, (void *)va, (uintptr_t)elem_size);
395 }
396
397 /* Check element is from correct zone */
398 zone_require_ro(zid, elem_size, (void*)va);
399 }
400
401 void
pmap_ro_zone_memcpy(zone_id_t zid,vm_offset_t va,vm_offset_t offset,const vm_offset_t new_data,vm_size_t new_data_size)402 pmap_ro_zone_memcpy(
403 zone_id_t zid,
404 vm_offset_t va,
405 vm_offset_t offset,
406 const vm_offset_t new_data,
407 vm_size_t new_data_size)
408 {
409 const pmap_paddr_t pa = kvtophys(va + offset);
410
411 if (!new_data || new_data_size == 0) {
412 return;
413 }
414
415 pmap_ro_zone_validate_element(zid, va, offset, new_data, new_data_size);
416 /* Write through Physical Aperture */
417 memcpy((void*)phystokv(pa), (void*)new_data, new_data_size);
418 }
419
420 void
pmap_ro_zone_bzero(zone_id_t zid,vm_offset_t va,vm_offset_t offset,vm_size_t size)421 pmap_ro_zone_bzero(
422 zone_id_t zid,
423 vm_offset_t va,
424 vm_offset_t offset,
425 vm_size_t size)
426 {
427 const pmap_paddr_t pa = kvtophys(va + offset);
428 pmap_ro_zone_validate_element(zid, va, offset, 0, size);
429 bzero((void*)phystokv(pa), size);
430 }
431
432 static uint32_t
pmap_scale_shift(void)433 pmap_scale_shift(void)
434 {
435 uint32_t scale = 0;
436
437 if (sane_size <= 8 * GB) {
438 scale = (uint32_t)(sane_size / (2 * GB));
439 } else if (sane_size <= 32 * GB) {
440 scale = 4 + (uint32_t)((sane_size - (8 * GB)) / (4 * GB));
441 } else {
442 scale = 10 + (uint32_t)MIN(4, ((sane_size - (32 * GB)) / (8 * GB)));
443 }
444 return scale;
445 }
446
447 LCK_GRP_DECLARE(pmap_lck_grp, "pmap");
448 LCK_ATTR_DECLARE(pmap_lck_rw_attr, 0, LCK_ATTR_DEBUG);
449
450 /*
451 * Bootstrap the system enough to run with virtual memory.
452 * Map the kernel's code and data, and allocate the system page table.
453 * Called with mapping OFF. Page_size must already be set.
454 */
455
456 void
pmap_bootstrap(__unused vm_offset_t load_start,__unused boolean_t IA32e)457 pmap_bootstrap(
458 __unused vm_offset_t load_start,
459 __unused boolean_t IA32e)
460 {
461 assert(IA32e);
462
463 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
464 * known to VM */
465 /*
466 * The kernel's pmap is statically allocated so we don't
467 * have to use pmap_create, which is unlikely to work
468 * correctly at this part of the boot sequence.
469 */
470
471 kernel_pmap = &kernel_pmap_store;
472 os_ref_init(&kernel_pmap->ref_count, NULL);
473 #if DEVELOPMENT || DEBUG
474 kernel_pmap->nx_enabled = TRUE;
475 #endif
476 kernel_pmap->pm_task_map = TASK_MAP_64BIT;
477 kernel_pmap->pm_obj = (vm_object_t) NULL;
478 kernel_pmap->pm_pml4 = IdlePML4;
479 kernel_pmap->pm_upml4 = IdlePML4;
480 kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4);
481 kernel_pmap->pm_ucr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4);
482 kernel_pmap->pm_eptp = 0;
483
484 pmap_pcid_initialize_kernel(kernel_pmap);
485
486 current_cpu_datap()->cpu_kernel_cr3 = cpu_shadowp(cpu_number())->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
487
488 nkpt = NKPT;
489 OSAddAtomic(NKPT, &inuse_ptepages_count);
490 OSAddAtomic64(NKPT, &alloc_ptepages_count);
491 bootstrap_wired_pages = NKPT;
492
493 virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail;
494 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
495
496 if (!PE_parse_boot_argn("npvhash", &npvhashmask, sizeof(npvhashmask))) {
497 npvhashmask = ((NPVHASHBUCKETS) << pmap_scale_shift()) - 1;
498 }
499
500 npvhashbuckets = npvhashmask + 1;
501
502 if (0 != ((npvhashbuckets) & npvhashmask)) {
503 panic("invalid hash %d, must be ((2^N)-1), "
504 "using default %d\n", npvhashmask, NPVHASHMASK);
505 }
506
507 lck_rw_init(&kernel_pmap->pmap_rwl, &pmap_lck_grp, &pmap_lck_rw_attr);
508 kernel_pmap->pmap_rwl.lck_rw_can_sleep = FALSE;
509
510 pmap_cpu_init();
511
512 if (pmap_pcid_ncpus) {
513 printf("PMAP: PCID enabled\n");
514 }
515
516 if (pmap_smep_enabled) {
517 printf("PMAP: Supervisor Mode Execute Protection enabled\n");
518 }
519 if (pmap_smap_enabled) {
520 printf("PMAP: Supervisor Mode Access Protection enabled\n");
521 }
522
523 #if DEBUG
524 printf("Stack canary: 0x%lx\n", __stack_chk_guard[0]);
525 printf("early_random(): 0x%qx\n", early_random());
526 #endif
527 #if DEVELOPMENT || DEBUG
528 boolean_t ptmp;
529 /* Check if the user has requested disabling stack or heap no-execute
530 * enforcement. These are "const" variables; that qualifier is cast away
531 * when altering them. The TEXT/DATA const sections are marked
532 * write protected later in the kernel startup sequence, so altering
533 * them is possible at this point, in pmap_bootstrap().
534 */
535 if (PE_parse_boot_argn("-pmap_disable_kheap_nx", &ptmp, sizeof(ptmp))) {
536 boolean_t *pdknxp = (boolean_t *) &pmap_disable_kheap_nx;
537 *pdknxp = TRUE;
538 }
539
540 if (PE_parse_boot_argn("-pmap_disable_kstack_nx", &ptmp, sizeof(ptmp))) {
541 boolean_t *pdknhp = (boolean_t *) &pmap_disable_kstack_nx;
542 *pdknhp = TRUE;
543 }
544 #endif /* DEVELOPMENT || DEBUG */
545
546 boot_args *args = (boot_args *)PE_state.bootArgs;
547 if (args->efiMode == kBootArgsEfiMode32) {
548 printf("EFI32: kernel virtual space limited to 4GB\n");
549 virtual_end = VM_MAX_KERNEL_ADDRESS_EFI32;
550 }
551 kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n",
552 (long)KERNEL_BASE, (long)virtual_end);
553 kprintf("Available physical space from 0x%llx to 0x%llx\n",
554 avail_start, avail_end);
555
556 /*
557 * The -no_shared_cr3 boot-arg is a debugging feature (set by default
558 * in the DEBUG kernel) to force the kernel to switch to its own map
559 * (and cr3) when control is in kernelspace. The kernel's map does not
560 * include (i.e. share) userspace so wild references will cause
561 * a panic. Only copyin and copyout are exempt from this.
562 */
563 (void) PE_parse_boot_argn("-no_shared_cr3",
564 &no_shared_cr3, sizeof(no_shared_cr3));
565 if (no_shared_cr3) {
566 kprintf("Kernel not sharing user map\n");
567 }
568
569 #ifdef PMAP_TRACES
570 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof(pmap_trace))) {
571 kprintf("Kernel traces for pmap operations enabled\n");
572 }
573 #endif /* PMAP_TRACES */
574
575 #if MACH_ASSERT
576 PE_parse_boot_argn("pmap_asserts", &pmap_asserts_enabled, sizeof(pmap_asserts_enabled));
577 PE_parse_boot_argn("pmap_stats_assert",
578 &pmap_stats_assert,
579 sizeof(pmap_stats_assert));
580 #endif /* MACH_ASSERT */
581 }
582
583 void
pmap_virtual_space(vm_offset_t * startp,vm_offset_t * endp)584 pmap_virtual_space(
585 vm_offset_t *startp,
586 vm_offset_t *endp)
587 {
588 *startp = virtual_avail;
589 *endp = virtual_end;
590 }
591
592
593
594
595 #if HIBERNATION
596
597 #include <IOKit/IOHibernatePrivate.h>
598 #include <machine/pal_hibernate.h>
599
600 int32_t pmap_npages;
601 int32_t pmap_teardown_last_valid_compact_indx = -1;
602
603 void pmap_pack_index(uint32_t);
604 int32_t pmap_unpack_index(pv_rooted_entry_t);
605
606 int32_t
pmap_unpack_index(pv_rooted_entry_t pv_h)607 pmap_unpack_index(pv_rooted_entry_t pv_h)
608 {
609 int32_t indx = 0;
610
611 indx = (int32_t)(*((uint64_t *)(&pv_h->qlink.next)) >> 48);
612 indx = indx << 16;
613 indx |= (int32_t)(*((uint64_t *)(&pv_h->qlink.prev)) >> 48);
614
615 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)0xffff << 48);
616 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)0xffff << 48);
617
618 return indx;
619 }
620
621
622 void
pmap_pack_index(uint32_t indx)623 pmap_pack_index(uint32_t indx)
624 {
625 pv_rooted_entry_t pv_h;
626
627 pv_h = &pv_head_table[indx];
628
629 *((uint64_t *)(&pv_h->qlink.next)) &= ~((uint64_t)0xffff << 48);
630 *((uint64_t *)(&pv_h->qlink.prev)) &= ~((uint64_t)0xffff << 48);
631
632 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)(indx >> 16)) << 48;
633 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)(indx & 0xffff)) << 48;
634 }
635
636
637 void
pal_hib_teardown_pmap_structs(addr64_t * unneeded_start,addr64_t * unneeded_end)638 pal_hib_teardown_pmap_structs(addr64_t *unneeded_start, addr64_t *unneeded_end)
639 {
640 int32_t i;
641 int32_t compact_target_indx;
642
643 compact_target_indx = 0;
644
645 for (i = 0; i < pmap_npages; i++) {
646 if (pv_head_table[i].pmap == PMAP_NULL) {
647 if (pv_head_table[compact_target_indx].pmap != PMAP_NULL) {
648 compact_target_indx = i;
649 }
650 } else {
651 pmap_pack_index((uint32_t)i);
652
653 if (pv_head_table[compact_target_indx].pmap == PMAP_NULL) {
654 /*
655 * we've got a hole to fill, so
656 * move this pv_rooted_entry_t to it's new home
657 */
658 pv_head_table[compact_target_indx] = pv_head_table[i];
659 pv_head_table[i].pmap = PMAP_NULL;
660
661 pmap_teardown_last_valid_compact_indx = compact_target_indx;
662 compact_target_indx++;
663 } else {
664 pmap_teardown_last_valid_compact_indx = i;
665 }
666 }
667 }
668 *unneeded_start = (addr64_t)&pv_head_table[pmap_teardown_last_valid_compact_indx + 1];
669 *unneeded_end = (addr64_t)&pv_head_table[pmap_npages - 1];
670
671 HIBLOG("pal_hib_teardown_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx);
672 }
673
674
675 void
pal_hib_rebuild_pmap_structs(void)676 pal_hib_rebuild_pmap_structs(void)
677 {
678 int32_t cindx, eindx, rindx = 0;
679 pv_rooted_entry_t pv_h;
680
681 eindx = (int32_t)pmap_npages;
682
683 for (cindx = pmap_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
684 pv_h = &pv_head_table[cindx];
685
686 rindx = pmap_unpack_index(pv_h);
687 assert(rindx < pmap_npages);
688
689 if (rindx != cindx) {
690 /*
691 * this pv_rooted_entry_t was moved by pal_hib_teardown_pmap_structs,
692 * so move it back to its real location
693 */
694 pv_head_table[rindx] = pv_head_table[cindx];
695 }
696 if (rindx + 1 != eindx) {
697 /*
698 * the 'hole' between this vm_rooted_entry_t and the previous
699 * vm_rooted_entry_t we moved needs to be initialized as
700 * a range of zero'd vm_rooted_entry_t's
701 */
702 bzero((char *)&pv_head_table[rindx + 1], (eindx - rindx - 1) * sizeof(struct pv_rooted_entry));
703 }
704 eindx = rindx;
705 }
706 if (rindx) {
707 bzero((char *)&pv_head_table[0], rindx * sizeof(struct pv_rooted_entry));
708 }
709
710 HIBLOG("pal_hib_rebuild_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx);
711 }
712
713 #endif
714
715 /*
716 * Create pv entries for kernel pages mapped by early startup code.
717 * These have to exist so we can ml_static_mfree() them later.
718 */
719 static void
pmap_pv_fixup(vm_offset_t start_va,vm_offset_t end_va)720 pmap_pv_fixup(vm_offset_t start_va, vm_offset_t end_va)
721 {
722 ppnum_t ppn;
723 pv_rooted_entry_t pv_h;
724 uint32_t pgsz;
725
726 start_va = round_page(start_va);
727 end_va = trunc_page(end_va);
728 while (start_va < end_va) {
729 pgsz = PAGE_SIZE;
730 ppn = pmap_find_phys(kernel_pmap, start_va);
731 if (ppn != 0 && IS_MANAGED_PAGE(ppn)) {
732 pv_h = pai_to_pvh(ppn);
733 assert(pv_h->qlink.next == 0); /* shouldn't be init'd yet */
734 assert(pv_h->pmap == 0);
735 pv_h->va_and_flags = start_va;
736 pv_h->pmap = kernel_pmap;
737 queue_init(&pv_h->qlink);
738 if (pmap_query_pagesize(kernel_pmap, start_va) == I386_LPGBYTES) {
739 pgsz = I386_LPGBYTES;
740 }
741 }
742 start_va += pgsz;
743 }
744 }
745
746 /*
747 * Initialize the pmap module.
748 * Called by vm_init, to initialize any structures that the pmap
749 * system needs to map virtual memory.
750 */
751 void
pmap_init(void)752 pmap_init(void)
753 {
754 long npages;
755 vm_offset_t addr;
756 vm_size_t s, vsize;
757 vm_map_offset_t vaddr;
758 ppnum_t ppn;
759
760
761 kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store;
762 _vm_object_allocate((vm_object_size_t)NPML4PGS * PAGE_SIZE, &kpml4obj_object_store);
763
764 kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store;
765 _vm_object_allocate((vm_object_size_t)NPDPTPGS * PAGE_SIZE, &kpdptobj_object_store);
766
767 kernel_pmap->pm_obj = &kptobj_object_store;
768 _vm_object_allocate((vm_object_size_t)NPDEPGS * PAGE_SIZE, &kptobj_object_store);
769
770 /*
771 * Allocate memory for the pv_head_table and its lock bits,
772 * the modify bit array, and the pte_page table.
773 */
774
775 /*
776 * zero bias all these arrays now instead of off avail_start
777 * so we cover all memory
778 */
779
780 npages = i386_btop(avail_end);
781 #if HIBERNATION
782 pmap_npages = (uint32_t)npages;
783 #endif
784 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
785 + (sizeof(struct pv_hashed_entry_t *) * (npvhashbuckets))
786 + pv_lock_table_size(npages)
787 + pv_hash_lock_table_size((npvhashbuckets))
788 + npages);
789 s = round_page(s);
790 if (kernel_memory_allocate(kernel_map, &addr, s, 0,
791 KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PMAP)
792 != KERN_SUCCESS) {
793 panic("pmap_init");
794 }
795
796 memset((char *)addr, 0, s);
797
798 vaddr = addr;
799 vsize = s;
800
801 #if PV_DEBUG
802 if (0 == npvhashmask) {
803 panic("npvhashmask not initialized");
804 }
805 #endif
806
807 /*
808 * Allocate the structures first to preserve word-alignment.
809 */
810 pv_head_table = (pv_rooted_entry_t) addr;
811 addr = (vm_offset_t) (pv_head_table + npages);
812
813 pv_hash_table = (pv_hashed_entry_t *)addr;
814 addr = (vm_offset_t) (pv_hash_table + (npvhashbuckets));
815
816 pv_lock_table = (char *) addr;
817 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
818
819 pv_hash_lock_table = (char *) addr;
820 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhashbuckets)));
821
822 pmap_phys_attributes = (char *) addr;
823
824 ppnum_t last_pn = i386_btop(avail_end);
825 unsigned int i;
826 pmap_memory_region_t *pmptr = pmap_memory_regions;
827 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
828 if (pmptr->type != kEfiConventionalMemory) {
829 continue;
830 }
831 ppnum_t pn;
832 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
833 if (pn < last_pn) {
834 pmap_phys_attributes[pn] |= PHYS_MANAGED;
835
836 if (pn > last_managed_page) {
837 last_managed_page = pn;
838 }
839
840 if ((pmap_high_used_bottom <= pn && pn <= pmap_high_used_top) ||
841 (pmap_middle_used_bottom <= pn && pn <= pmap_middle_used_top)) {
842 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
843 }
844 }
845 }
846 }
847 while (vsize) {
848 ppn = pmap_find_phys(kernel_pmap, vaddr);
849
850 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT;
851
852 vaddr += PAGE_SIZE;
853 vsize -= PAGE_SIZE;
854 }
855 /*
856 * Create the zone of physical maps,
857 * and of the physical-to-virtual entries.
858 */
859 pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
860 ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
861
862 /* The anchor is required to be page aligned. Zone debugging adds
863 * padding which may violate that requirement. Tell the zone
864 * subsystem that alignment is required.
865 */
866 pmap_anchor_zone = zone_create("pagetable anchors", PAGE_SIZE,
867 ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED);
868
869 /* TODO: possible general optimisation...pre-allocate via zones commonly created
870 * level3/2 pagetables
871 */
872 /* The anchor is required to be page aligned. Zone debugging adds
873 * padding which may violate that requirement. Tell the zone
874 * subsystem that alignment is required.
875 */
876 pmap_uanchor_zone = zone_create("pagetable user anchors", PAGE_SIZE,
877 ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED);
878
879 pv_hashed_list_zone = zone_create("pv_list", sizeof(struct pv_hashed_entry),
880 ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED);
881
882 /*
883 * Create pv entries for kernel pages that might get pmap_remove()ed.
884 *
885 * - very low pages that were identity mapped.
886 * - vm_pages[] entries that might be unused and reclaimed.
887 */
888 assert((uintptr_t)VM_MIN_KERNEL_ADDRESS + avail_start <= (uintptr_t)vm_page_array_beginning_addr);
889 pmap_pv_fixup((uintptr_t)VM_MIN_KERNEL_ADDRESS, (uintptr_t)VM_MIN_KERNEL_ADDRESS + avail_start);
890 pmap_pv_fixup((uintptr_t)vm_page_array_beginning_addr, (uintptr_t)vm_page_array_ending_addr);
891
892 pmap_initialized = TRUE;
893
894 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
895
896 /*
897 * Ensure the kernel's PML4 entry exists for the basement
898 * before this is shared with any user.
899 */
900 pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT, PMAP_EXPAND_OPTIONS_NONE);
901
902 #if CONFIG_VMX
903 pmap_ept_support_ad = vmx_hv_support() && (VMX_CAP(MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_EPT_VPID_CAP_AD_SHIFT, 1) ? TRUE : FALSE);
904 pmap_eptp_flags = HV_VMX_EPTP_MEMORY_TYPE_WB | HV_VMX_EPTP_WALK_LENGTH(4) | (pmap_ept_support_ad ? HV_VMX_EPTP_ENABLE_AD_FLAGS : 0);
905 #endif /* CONFIG_VMX */
906 }
907
908 void
pmap_mark_range(pmap_t npmap,uint64_t sv,uint64_t nxrosz,boolean_t NX,boolean_t ro)909 pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, boolean_t ro)
910 {
911 uint64_t ev = sv + nxrosz, cv = sv;
912 pd_entry_t *pdep;
913 pt_entry_t *ptep = NULL;
914
915 /* XXX what if nxrosz is 0? we end up marking the page whose address is passed in via sv -- is that kosher? */
916 assert(!is_ept_pmap(npmap));
917
918 assert(((sv & 0xFFFULL) | (nxrosz & 0xFFFULL)) == 0);
919
920 for (pdep = pmap_pde(npmap, cv); pdep != NULL && (cv < ev);) {
921 uint64_t pdev = (cv & ~((uint64_t)PDEMASK));
922
923 if (*pdep & INTEL_PTE_PS) {
924 #ifdef REMAP_DEBUG
925 if ((NX ^ !!(*pdep & INTEL_PTE_NX)) || (ro ^ !!!(*pdep & INTEL_PTE_WRITE))) {
926 kprintf("WARNING: Remapping PDE for %p from %s%s%s to %s%s%s\n", (void *)cv,
927 (*pdep & INTEL_PTE_VALID) ? "R" : "",
928 (*pdep & INTEL_PTE_WRITE) ? "W" : "",
929 (*pdep & INTEL_PTE_NX) ? "" : "X",
930 "R",
931 ro ? "" : "W",
932 NX ? "" : "X");
933 }
934 #endif
935
936 if (NX) {
937 *pdep |= INTEL_PTE_NX;
938 } else {
939 *pdep &= ~INTEL_PTE_NX;
940 }
941 if (ro) {
942 *pdep &= ~INTEL_PTE_WRITE;
943 } else {
944 *pdep |= INTEL_PTE_WRITE;
945 }
946 cv += NBPD;
947 cv &= ~((uint64_t) PDEMASK);
948 pdep = pmap_pde(npmap, cv);
949 continue;
950 }
951
952 for (ptep = pmap_pte(npmap, cv); ptep != NULL && (cv < (pdev + NBPD)) && (cv < ev);) {
953 #ifdef REMAP_DEBUG
954 if ((NX ^ !!(*ptep & INTEL_PTE_NX)) || (ro ^ !!!(*ptep & INTEL_PTE_WRITE))) {
955 kprintf("WARNING: Remapping PTE for %p from %s%s%s to %s%s%s\n", (void *)cv,
956 (*ptep & INTEL_PTE_VALID) ? "R" : "",
957 (*ptep & INTEL_PTE_WRITE) ? "W" : "",
958 (*ptep & INTEL_PTE_NX) ? "" : "X",
959 "R",
960 ro ? "" : "W",
961 NX ? "" : "X");
962 }
963 #endif
964 if (NX) {
965 *ptep |= INTEL_PTE_NX;
966 } else {
967 *ptep &= ~INTEL_PTE_NX;
968 }
969 if (ro) {
970 *ptep &= ~INTEL_PTE_WRITE;
971 } else {
972 *ptep |= INTEL_PTE_WRITE;
973 }
974 cv += NBPT;
975 ptep = pmap_pte(npmap, cv);
976 }
977 }
978 DPRINTF("%s(0x%llx, 0x%llx, %u, %u): 0x%llx, 0x%llx\n", __FUNCTION__, sv, nxrosz, NX, ro, cv, ptep ? *ptep: 0);
979 }
980
981 /*
982 * Reclaim memory for early boot 4K page tables that were converted to large page mappings.
983 * We know this memory is part of the KPTphys[] array that was allocated in Idle_PTs_init(),
984 * so we can free it using its address in that array.
985 */
986 static void
pmap_free_early_PT(ppnum_t ppn,uint32_t cnt)987 pmap_free_early_PT(ppnum_t ppn, uint32_t cnt)
988 {
989 ppnum_t KPTphys_ppn;
990 vm_offset_t offset;
991
992 KPTphys_ppn = pmap_find_phys(kernel_pmap, (uintptr_t)KPTphys);
993 assert(ppn >= KPTphys_ppn);
994 assert(ppn + cnt <= KPTphys_ppn + NKPT);
995 offset = (ppn - KPTphys_ppn) << PAGE_SHIFT;
996 ml_static_mfree((uintptr_t)KPTphys + offset, PAGE_SIZE * cnt);
997 }
998
999 /*
1000 * Called once VM is fully initialized so that we can release unused
1001 * sections of low memory to the general pool.
1002 * Also complete the set-up of identity-mapped sections of the kernel:
1003 * 1) write-protect kernel text
1004 * 2) map kernel text using large pages if possible
1005 * 3) read and write-protect page zero (for K32)
1006 * 4) map the global page at the appropriate virtual address.
1007 *
1008 * Use of large pages
1009 * ------------------
1010 * To effectively map and write-protect all kernel text pages, the text
1011 * must be 2M-aligned at the base, and the data section above must also be
1012 * 2M-aligned. That is, there's padding below and above. This is achieved
1013 * through linker directives. Large pages are used only if this alignment
1014 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
1015 * memory layout is:
1016 *
1017 * : :
1018 * | __DATA |
1019 * sdata: ================== 2Meg
1020 * | |
1021 * | zero-padding |
1022 * | |
1023 * etext: ------------------
1024 * | |
1025 * : :
1026 * | |
1027 * | __TEXT |
1028 * | |
1029 * : :
1030 * | |
1031 * stext: ================== 2Meg
1032 * | |
1033 * | zero-padding |
1034 * | |
1035 * eHIB: ------------------
1036 * | __HIB |
1037 * : :
1038 *
1039 * Prior to changing the mapping from 4K to 2M, the zero-padding pages
1040 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
1041 * 4K pages covering [stext,etext] are coalesced as 2M large pages.
1042 * The now unused level-1 PTE pages are also freed.
1043 */
1044 extern ppnum_t vm_kernel_base_page;
1045 static uint32_t dataptes = 0;
1046
1047 void
pmap_lowmem_finalize(void)1048 pmap_lowmem_finalize(void)
1049 {
1050 spl_t spl;
1051 int i;
1052
1053 /*
1054 * Update wired memory statistics for early boot pages
1055 */
1056 PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE);
1057
1058 /*
1059 * Free pages in pmap regions below the base:
1060 * rdar://6332712
1061 * We can't free all the pages to VM that EFI reports available.
1062 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
1063 * There's also a size miscalculation here: pend is one page less
1064 * than it should be but this is not fixed to be backwards
1065 * compatible.
1066 * This is important for KASLR because up to 256*2MB = 512MB of space
1067 * needs has to be released to VM.
1068 */
1069 for (i = 0;
1070 pmap_memory_regions[i].end < vm_kernel_base_page;
1071 i++) {
1072 vm_offset_t pbase = i386_ptob(pmap_memory_regions[i].base);
1073 vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end + 1);
1074
1075 DBG("pmap region %d [%p..[%p\n",
1076 i, (void *) pbase, (void *) pend);
1077
1078 if (pmap_memory_regions[i].attribute & EFI_MEMORY_KERN_RESERVED) {
1079 continue;
1080 }
1081 /*
1082 * rdar://6332712
1083 * Adjust limits not to free pages in range 0xc0000-0xff000.
1084 */
1085 if (pbase >= 0xc0000 && pend <= 0x100000) {
1086 continue;
1087 }
1088 if (pbase < 0xc0000 && pend > 0x100000) {
1089 /* page range entirely within region, free lower part */
1090 DBG("- ml_static_mfree(%p,%p)\n",
1091 (void *) ml_static_ptovirt(pbase),
1092 (void *) (0xc0000 - pbase));
1093 ml_static_mfree(ml_static_ptovirt(pbase), 0xc0000 - pbase);
1094 pbase = 0x100000;
1095 }
1096 if (pbase < 0xc0000) {
1097 pend = MIN(pend, 0xc0000);
1098 }
1099 if (pend > 0x100000) {
1100 pbase = MAX(pbase, 0x100000);
1101 }
1102 DBG("- ml_static_mfree(%p,%p)\n",
1103 (void *) ml_static_ptovirt(pbase),
1104 (void *) (pend - pbase));
1105 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
1106 }
1107
1108 /* A final pass to get rid of all initial identity mappings to
1109 * low pages.
1110 */
1111 DPRINTF("%s: Removing mappings from 0->0x%lx\n", __FUNCTION__, vm_kernel_base);
1112
1113 /*
1114 * Remove all mappings past the boot-cpu descriptor aliases and low globals.
1115 * Non-boot-cpu GDT aliases will be remapped later as needed.
1116 */
1117 pmap_remove(kernel_pmap, LOWGLOBAL_ALIAS + PAGE_SIZE, vm_kernel_base);
1118
1119 /*
1120 * Release any memory for early boot 4K page table pages that got replaced
1121 * with large page mappings for vm_pages[]. We know this memory is part of
1122 * the KPTphys[] array that was allocated in Idle_PTs_init(), so we can free
1123 * it using that address.
1124 */
1125 pmap_free_early_PT(released_PT_ppn, released_PT_cnt);
1126
1127 /*
1128 * If text and data are both 2MB-aligned,
1129 * we can map text with large-pages,
1130 * unless the -kernel_text_ps_4K boot-arg overrides.
1131 */
1132 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
1133 kprintf("Kernel text is 2MB aligned");
1134 kernel_text_ps_4K = FALSE;
1135 if (PE_parse_boot_argn("-kernel_text_ps_4K",
1136 &kernel_text_ps_4K,
1137 sizeof(kernel_text_ps_4K))) {
1138 kprintf(" but will be mapped with 4K pages\n");
1139 } else {
1140 kprintf(" and will be mapped with 2M pages\n");
1141 }
1142 }
1143 #if DEVELOPMENT || DEBUG
1144 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof(wpkernel));
1145 #endif
1146 if (wpkernel) {
1147 kprintf("Kernel text %p-%p to be write-protected\n",
1148 (void *) stext, (void *) etext);
1149 }
1150
1151 spl = splhigh();
1152
1153 /*
1154 * Scan over text if mappings are to be changed:
1155 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0
1156 * - Change to large-pages if possible and not overriden.
1157 */
1158 if (kernel_text_ps_4K && wpkernel) {
1159 vm_offset_t myva;
1160 for (myva = stext; myva < etext; myva += PAGE_SIZE) {
1161 pt_entry_t *ptep;
1162
1163 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
1164 if (ptep) {
1165 pmap_store_pte(FALSE, ptep, *ptep & ~INTEL_PTE_WRITE);
1166 }
1167 }
1168 }
1169
1170 if (!kernel_text_ps_4K) {
1171 vm_offset_t myva;
1172
1173 /*
1174 * Release zero-filled page padding used for 2M-alignment.
1175 */
1176 DBG("ml_static_mfree(%p,%p) for padding below text\n",
1177 (void *) eHIB, (void *) (stext - eHIB));
1178 ml_static_mfree(eHIB, stext - eHIB);
1179 DBG("ml_static_mfree(%p,%p) for padding above text\n",
1180 (void *) etext, (void *) (sdata - etext));
1181 ml_static_mfree(etext, sdata - etext);
1182
1183 /*
1184 * Coalesce text pages into large pages.
1185 */
1186 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
1187 pt_entry_t *ptep;
1188 vm_offset_t pte_phys;
1189 pt_entry_t *pdep;
1190 pt_entry_t pde;
1191 ppnum_t KPT_ppn;
1192
1193 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
1194 KPT_ppn = (ppnum_t)((*pdep & PG_FRAME) >> PAGE_SHIFT);
1195 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
1196 DBG("myva: %p pdep: %p ptep: %p\n",
1197 (void *) myva, (void *) pdep, (void *) ptep);
1198 if ((*ptep & INTEL_PTE_VALID) == 0) {
1199 continue;
1200 }
1201 pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
1202 pde = *pdep & PTMASK; /* page attributes from pde */
1203 pde |= INTEL_PTE_PS; /* make it a 2M entry */
1204 pde |= pte_phys; /* take page frame from pte */
1205
1206 if (wpkernel) {
1207 pde &= ~INTEL_PTE_WRITE;
1208 }
1209 DBG("pmap_store_pte(%p,0x%llx)\n",
1210 (void *)pdep, pde);
1211 pmap_store_pte(FALSE, pdep, pde);
1212
1213 /*
1214 * Free the now-unused level-1 pte.
1215 */
1216 pmap_free_early_PT(KPT_ppn, 1);
1217 }
1218
1219 /* Change variable read by sysctl machdep.pmap */
1220 pmap_kernel_text_ps = I386_LPGBYTES;
1221 }
1222
1223 vm_offset_t dva;
1224
1225 for (dva = sdata; dva < edata; dva += I386_PGBYTES) {
1226 assert(((sdata | edata) & PAGE_MASK) == 0);
1227 pt_entry_t dpte, *dptep = pmap_pte(kernel_pmap, dva);
1228
1229 dpte = *dptep;
1230 assert((dpte & INTEL_PTE_VALID));
1231 dpte |= INTEL_PTE_NX;
1232 pmap_store_pte(FALSE, dptep, dpte);
1233 dataptes++;
1234 }
1235 assert(dataptes > 0);
1236
1237 kernel_segment_command_t * seg;
1238 kernel_section_t * sec;
1239 kc_format_t kc_format;
1240
1241 PE_get_primary_kc_format(&kc_format);
1242
1243 for (seg = firstseg(); seg != NULL; seg = nextsegfromheader(&_mh_execute_header, seg)) {
1244 if (!strcmp(seg->segname, "__TEXT") ||
1245 !strcmp(seg->segname, "__DATA")) {
1246 continue;
1247 }
1248
1249 /* XXX: FIXME_IN_dyld: This is a workaround (see below) */
1250 if (kc_format != KCFormatFileset) {
1251 //XXX
1252 if (!strcmp(seg->segname, "__KLD")) {
1253 continue;
1254 }
1255 }
1256
1257 if (!strcmp(seg->segname, "__HIB")) {
1258 for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) {
1259 if (sec->addr & PAGE_MASK) {
1260 panic("__HIB segment's sections misaligned");
1261 }
1262 if (!strcmp(sec->sectname, "__text")) {
1263 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), FALSE, TRUE);
1264 } else {
1265 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), TRUE, FALSE);
1266 }
1267 }
1268 } else {
1269 if (kc_format == KCFormatFileset) {
1270 #if 0
1271 /*
1272 * This block of code is commented out because it may or may not have induced an earlier panic
1273 * in ledger init.
1274 */
1275
1276
1277 boolean_t NXbit = !(seg->initprot & VM_PROT_EXECUTE),
1278 robit = (seg->initprot & (VM_PROT_READ | VM_PROT_WRITE)) == VM_PROT_READ;
1279
1280 /*
1281 * XXX: FIXME_IN_dyld: This is a workaround for primary KC containing incorrect inaccurate
1282 * initprot for segments containing code.
1283 */
1284 if (!strcmp(seg->segname, "__KLD") || !strcmp(seg->segname, "__VECTORS")) {
1285 NXbit = FALSE;
1286 robit = FALSE;
1287 }
1288
1289 pmap_mark_range(kernel_pmap, seg->vmaddr & ~(uint64_t)PAGE_MASK,
1290 round_page_64(seg->vmsize), NXbit, robit);
1291 #endif
1292
1293 /*
1294 * XXX: We are marking *every* segment with rwx permissions as a workaround
1295 * XXX: until the primary KC's kernel segments are page-aligned.
1296 */
1297 kprintf("Marking (%p, %p) as rwx\n", (void *)(seg->vmaddr & ~(uint64_t)PAGE_MASK),
1298 (void *)((seg->vmaddr & ~(uint64_t)PAGE_MASK) + round_page_64(seg->vmsize)));
1299 pmap_mark_range(kernel_pmap, seg->vmaddr & ~(uint64_t)PAGE_MASK,
1300 round_page_64(seg->vmsize), FALSE, FALSE);
1301 } else {
1302 pmap_mark_range(kernel_pmap, seg->vmaddr, round_page_64(seg->vmsize), TRUE, FALSE);
1303 }
1304 }
1305 }
1306
1307 /*
1308 * If we're debugging, map the low global vector page at the fixed
1309 * virtual address. Otherwise, remove the mapping for this.
1310 */
1311 if (debug_boot_arg) {
1312 pt_entry_t *pte = NULL;
1313 if (0 == (pte = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS))) {
1314 panic("lowmem pte");
1315 }
1316
1317 /* make sure it is defined on page boundary */
1318 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
1319 pmap_store_pte(FALSE, pte, kvtophys((vm_offset_t)&lowGlo)
1320 | INTEL_PTE_REF
1321 | INTEL_PTE_MOD
1322 | INTEL_PTE_WIRED
1323 | INTEL_PTE_VALID
1324 | INTEL_PTE_WRITE
1325 | INTEL_PTE_NX);
1326
1327 #if KASAN
1328 kasan_notify_address(LOWGLOBAL_ALIAS, PAGE_SIZE);
1329 #endif
1330 } else {
1331 pmap_remove(kernel_pmap,
1332 LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
1333 }
1334 pmap_tlbi_range(0, ~0ULL, true, 0);
1335 splx(spl);
1336 }
1337
1338 /*
1339 * Mark the const data segment as read-only, non-executable.
1340 */
1341 void
x86_64_protect_data_const()1342 x86_64_protect_data_const()
1343 {
1344 boolean_t doconstro = TRUE;
1345 #if DEVELOPMENT || DEBUG
1346 (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro));
1347 #endif
1348 if (doconstro) {
1349 if (sconst & PAGE_MASK) {
1350 panic("CONST segment misaligned 0x%lx 0x%lx",
1351 sconst, econst);
1352 }
1353 kprintf("Marking const DATA read-only\n");
1354 pmap_protect(kernel_pmap, sconst, econst, VM_PROT_READ);
1355 }
1356 }
1357 /*
1358 * this function is only used for debugging fron the vm layer
1359 */
1360 bool
pmap_verify_free(ppnum_t pn)1361 pmap_verify_free(
1362 ppnum_t pn)
1363 {
1364 pv_rooted_entry_t pv_h;
1365 int pai;
1366 bool result;
1367
1368 assert(pn != vm_page_fictitious_addr);
1369
1370 if (!pmap_initialized) {
1371 return true;
1372 }
1373
1374 if (pn == vm_page_guard_addr) {
1375 return true;
1376 }
1377
1378 pai = ppn_to_pai(pn);
1379 if (!IS_MANAGED_PAGE(pai)) {
1380 return false;
1381 }
1382 pv_h = pai_to_pvh(pn);
1383 result = (pv_h->pmap == PMAP_NULL);
1384 return result;
1385 }
1386
1387 #if MACH_ASSERT
1388 void
pmap_assert_free(ppnum_t pn)1389 pmap_assert_free(ppnum_t pn)
1390 {
1391 int pai;
1392 pv_rooted_entry_t pv_h = NULL;
1393 pmap_t pmap = NULL;
1394 vm_offset_t va = 0;
1395 static char buffer[32];
1396 static char *pr_name = "not managed pn";
1397 uint_t attr;
1398 pt_entry_t *ptep;
1399 pt_entry_t pte = -1ull;
1400
1401 if (pmap_verify_free(pn)) {
1402 return;
1403 }
1404
1405 if (pn > last_managed_page) {
1406 attr = 0xff;
1407 goto done;
1408 }
1409
1410 pai = ppn_to_pai(pn);
1411 attr = pmap_phys_attributes[pai];
1412 pv_h = pai_to_pvh(pai);
1413 va = pv_h->va_and_flags;
1414 pmap = pv_h->pmap;
1415 if (pmap == kernel_pmap) {
1416 pr_name = "kernel";
1417 } else if (pmap == NULL) {
1418 pr_name = "pmap NULL";
1419 } else if (pmap->pmap_procname[0] != 0) {
1420 pr_name = &pmap->pmap_procname[0];
1421 } else {
1422 snprintf(buffer, sizeof(buffer), "pmap %p", pv_h->pmap);
1423 pr_name = buffer;
1424 }
1425
1426 if (pmap != NULL) {
1427 ptep = pmap_pte(pmap, va);
1428 if (ptep != NULL) {
1429 pte = (uintptr_t)*ptep;
1430 }
1431 }
1432
1433 done:
1434 panic("page not FREE page: 0x%lx attr: 0x%x %s va: 0x%lx PTE: 0x%llx",
1435 (ulong_t)pn, attr, pr_name, va, pte);
1436 }
1437 #endif /* MACH_ASSERT */
1438
1439 boolean_t
pmap_is_empty(pmap_t pmap,vm_map_offset_t va_start,vm_map_offset_t va_end)1440 pmap_is_empty(
1441 pmap_t pmap,
1442 vm_map_offset_t va_start,
1443 vm_map_offset_t va_end)
1444 {
1445 vm_map_offset_t offset;
1446 ppnum_t phys_page;
1447 ledger_amount_t phys_mem;
1448
1449 if (pmap == PMAP_NULL) {
1450 return TRUE;
1451 }
1452
1453 /*
1454 * Check the ledger's phys_mem value
1455 * - if it's zero, the pmap is completely empty.
1456 * This short-circuit test prevents a virtual address scan which is
1457 * painfully slow for 64-bit spaces.
1458 * This assumes the count is correct
1459 * .. the debug kernel ought to be checking perhaps by page table walk.
1460 */
1461 if (pmap != kernel_pmap) {
1462 ledger_get_balance(pmap->ledger, task_ledgers.phys_mem, &phys_mem);
1463 if (phys_mem == 0) {
1464 return TRUE;
1465 }
1466 }
1467
1468 for (offset = va_start;
1469 offset < va_end;
1470 offset += PAGE_SIZE_64) {
1471 phys_page = pmap_find_phys(pmap, offset);
1472 if (phys_page) {
1473 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
1474 "page %d at 0x%llx\n",
1475 pmap, va_start, va_end, phys_page, offset);
1476 return FALSE;
1477 }
1478 }
1479
1480 return TRUE;
1481 }
1482
1483 void
hv_ept_pmap_create(void ** ept_pmap,void ** eptp)1484 hv_ept_pmap_create(void **ept_pmap, void **eptp)
1485 {
1486 pmap_t p;
1487
1488 if ((ept_pmap == NULL) || (eptp == NULL)) {
1489 return;
1490 }
1491
1492 p = pmap_create_options(get_task_ledger(current_task()), 0, (PMAP_CREATE_64BIT | PMAP_CREATE_EPT));
1493 if (p == PMAP_NULL) {
1494 *ept_pmap = NULL;
1495 *eptp = NULL;
1496 return;
1497 }
1498
1499 assert(is_ept_pmap(p));
1500
1501 *ept_pmap = (void*)p;
1502 *eptp = (void*)(p->pm_eptp);
1503 return;
1504 }
1505
1506 /*
1507 * pmap_create() is used by some special, legacy 3rd party kexts.
1508 * In our kernel code, always use pmap_create_options().
1509 */
1510 extern pmap_t pmap_create(ledger_t ledger, vm_map_size_t sz, boolean_t is_64bit);
1511
1512 __attribute__((used))
1513 pmap_t
pmap_create(ledger_t ledger,vm_map_size_t sz,boolean_t is_64bit)1514 pmap_create(
1515 ledger_t ledger,
1516 vm_map_size_t sz,
1517 boolean_t is_64bit)
1518 {
1519 return pmap_create_options(ledger, sz, is_64bit ? PMAP_CREATE_64BIT : 0);
1520 }
1521
1522 /*
1523 * Create and return a physical map.
1524 *
1525 * If the size specified for the map
1526 * is zero, the map is an actual physical
1527 * map, and may be referenced by the
1528 * hardware.
1529 *
1530 * If the size specified is non-zero,
1531 * the map will be used in software only, and
1532 * is bounded by that size.
1533 */
1534
1535 pmap_t
pmap_create_options(ledger_t ledger,vm_map_size_t sz,unsigned int flags)1536 pmap_create_options(
1537 ledger_t ledger,
1538 vm_map_size_t sz,
1539 unsigned int flags)
1540 {
1541 pmap_t p;
1542 vm_size_t size;
1543 pml4_entry_t *pml4;
1544 pml4_entry_t *kpml4;
1545 int i;
1546
1547 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, sz, flags);
1548
1549 size = (vm_size_t) sz;
1550
1551 /*
1552 * A software use-only map doesn't even need a map.
1553 */
1554
1555 if (size != 0) {
1556 return PMAP_NULL;
1557 }
1558
1559 /*
1560 * Return error when unrecognized flags are passed.
1561 */
1562 if (__improbable((flags & ~(PMAP_CREATE_KNOWN_FLAGS)) != 0)) {
1563 return PMAP_NULL;
1564 }
1565
1566 p = zalloc_flags(pmap_zone, Z_WAITOK | Z_ZERO);
1567 if (PMAP_NULL == p) {
1568 panic("pmap_create zalloc");
1569 }
1570
1571 lck_rw_init(&p->pmap_rwl, &pmap_lck_grp, &pmap_lck_rw_attr);
1572 p->pmap_rwl.lck_rw_can_sleep = FALSE;
1573
1574 os_ref_init(&p->ref_count, NULL);
1575 #if DEVELOPMENT || DEBUG
1576 p->nx_enabled = 1;
1577 #endif
1578 p->pm_shared = FALSE;
1579 ledger_reference(ledger);
1580 p->ledger = ledger;
1581
1582 p->pm_task_map = ((flags & PMAP_CREATE_64BIT) ? TASK_MAP_64BIT : TASK_MAP_32BIT);
1583
1584 p->pagezero_accessible = FALSE;
1585 p->pm_vm_map_cs_enforced = FALSE;
1586
1587 if (pmap_pcid_ncpus) {
1588 pmap_pcid_initialize(p);
1589 }
1590
1591 p->pm_pml4 = zalloc(pmap_anchor_zone);
1592 p->pm_upml4 = zalloc(pmap_uanchor_zone); //cleanup for EPT
1593
1594 pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0);
1595 pmap_assert((((uintptr_t)p->pm_upml4) & PAGE_MASK) == 0);
1596
1597 memset((char *)p->pm_pml4, 0, PAGE_SIZE);
1598 memset((char *)p->pm_upml4, 0, PAGE_SIZE);
1599
1600 if (flags & PMAP_CREATE_EPT) {
1601 p->pm_eptp = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4) | pmap_eptp_flags;
1602 p->pm_cr3 = 0;
1603 } else {
1604 p->pm_eptp = 0;
1605 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
1606 p->pm_ucr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_upml4);
1607 }
1608
1609 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1610
1611 p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS) *PAGE_SIZE);
1612 if (NULL == p->pm_obj_pml4) {
1613 panic("pmap_create pdpt obj");
1614 }
1615
1616 p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS) *PAGE_SIZE);
1617 if (NULL == p->pm_obj_pdpt) {
1618 panic("pmap_create pdpt obj");
1619 }
1620
1621 p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS) *PAGE_SIZE);
1622 if (NULL == p->pm_obj) {
1623 panic("pmap_create pte obj");
1624 }
1625
1626 if (!(flags & PMAP_CREATE_EPT)) {
1627 /* All host pmaps share the kernel's pml4 */
1628 pml4 = pmap64_pml4(p, 0ULL);
1629 kpml4 = kernel_pmap->pm_pml4;
1630 for (i = KERNEL_PML4_INDEX; i < (KERNEL_PML4_INDEX + KERNEL_PML4_COUNT); i++) {
1631 pml4[i] = kpml4[i];
1632 }
1633 pml4[KERNEL_KEXTS_INDEX] = kpml4[KERNEL_KEXTS_INDEX];
1634 for (i = KERNEL_PHYSMAP_PML4_INDEX; i < (KERNEL_PHYSMAP_PML4_INDEX + KERNEL_PHYSMAP_PML4_COUNT); i++) {
1635 pml4[i] = kpml4[i];
1636 }
1637 pml4[KERNEL_DBLMAP_PML4_INDEX] = kpml4[KERNEL_DBLMAP_PML4_INDEX];
1638 #if KASAN
1639 for (i = KERNEL_KASAN_PML4_FIRST; i <= KERNEL_KASAN_PML4_LAST; i++) {
1640 pml4[i] = kpml4[i];
1641 }
1642 #endif
1643 pml4_entry_t *pml4u = pmap64_user_pml4(p, 0ULL);
1644 pml4u[KERNEL_DBLMAP_PML4_INDEX] = kpml4[KERNEL_DBLMAP_PML4_INDEX];
1645 }
1646
1647 #if MACH_ASSERT
1648 p->pmap_stats_assert = TRUE;
1649 p->pmap_pid = 0;
1650 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
1651 #endif /* MACH_ASSERT */
1652
1653 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END,
1654 VM_KERNEL_ADDRHIDE(p));
1655
1656 return p;
1657 }
1658
1659 /*
1660 * We maintain stats and ledgers so that a task's physical footprint is:
1661 * phys_footprint = ((internal - alternate_accounting)
1662 * + (internal_compressed - alternate_accounting_compressed)
1663 * + iokit_mapped
1664 * + purgeable_nonvolatile
1665 * + purgeable_nonvolatile_compressed
1666 * + page_table)
1667 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
1668 */
1669
1670 #if MACH_ASSERT
1671 static void pmap_check_ledgers(pmap_t pmap);
1672 #else /* MACH_ASSERT */
1673 static inline void
pmap_check_ledgers(__unused pmap_t pmap)1674 pmap_check_ledgers(__unused pmap_t pmap)
1675 {
1676 }
1677 #endif /* MACH_ASSERT */
1678
1679 /*
1680 * Retire the given physical map from service.
1681 * Should only be called if the map contains
1682 * no valid mappings.
1683 */
1684 extern int vm_wired_objects_page_count;
1685
1686 void
pmap_destroy(pmap_t p)1687 pmap_destroy(pmap_t p)
1688 {
1689 os_ref_count_t c;
1690
1691 if (p == PMAP_NULL) {
1692 return;
1693 }
1694
1695 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
1696 VM_KERNEL_ADDRHIDe(p));
1697
1698 PMAP_LOCK_EXCLUSIVE(p);
1699
1700 c = os_ref_release_locked(&p->ref_count);
1701
1702 pmap_assert((current_thread() && (current_thread()->map)) ? (current_thread()->map->pmap != p) : TRUE);
1703
1704 if (c == 0) {
1705 /*
1706 * If some cpu is not using the physical pmap pointer that it
1707 * is supposed to be (see set_dirbase), we might be using the
1708 * pmap that is being destroyed! Make sure we are
1709 * physically on the right pmap:
1710 */
1711 PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL);
1712 if (pmap_pcid_ncpus) {
1713 pmap_destroy_pcid_sync(p);
1714 }
1715 }
1716
1717 PMAP_UNLOCK_EXCLUSIVE(p);
1718
1719 if (c != 0) {
1720 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
1721 pmap_assert(p == kernel_pmap);
1722 return; /* still in use */
1723 }
1724
1725 /*
1726 * Free the memory maps, then the
1727 * pmap structure.
1728 */
1729 int inuse_ptepages = 0;
1730
1731 zfree(pmap_anchor_zone, p->pm_pml4);
1732 zfree(pmap_uanchor_zone, p->pm_upml4);
1733
1734 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
1735 vm_object_deallocate(p->pm_obj_pml4);
1736
1737 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
1738 vm_object_deallocate(p->pm_obj_pdpt);
1739
1740 inuse_ptepages += p->pm_obj->resident_page_count;
1741 vm_object_deallocate(p->pm_obj);
1742
1743 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count);
1744 PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE);
1745
1746 pmap_check_ledgers(p);
1747 ledger_dereference(p->ledger);
1748 lck_rw_destroy(&p->pmap_rwl, &pmap_lck_grp);
1749 zfree(pmap_zone, p);
1750
1751 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
1752 }
1753
1754 /*
1755 * Add a reference to the specified pmap.
1756 */
1757
1758 void
pmap_reference(pmap_t p)1759 pmap_reference(pmap_t p)
1760 {
1761 if (p != PMAP_NULL) {
1762 PMAP_LOCK_EXCLUSIVE(p);
1763 os_ref_retain_locked(&p->ref_count);
1764 PMAP_UNLOCK_EXCLUSIVE(p);
1765 }
1766 }
1767
1768 /*
1769 * Remove phys addr if mapped in specified map
1770 *
1771 */
1772 void
pmap_remove_some_phys(__unused pmap_t map,__unused ppnum_t pn)1773 pmap_remove_some_phys(
1774 __unused pmap_t map,
1775 __unused ppnum_t pn)
1776 {
1777 /* Implement to support working set code */
1778 }
1779
1780
1781 void
pmap_protect(pmap_t map,vm_map_offset_t sva,vm_map_offset_t eva,vm_prot_t prot)1782 pmap_protect(
1783 pmap_t map,
1784 vm_map_offset_t sva,
1785 vm_map_offset_t eva,
1786 vm_prot_t prot)
1787 {
1788 pmap_protect_options(map, sva, eva, prot, 0, NULL);
1789 }
1790
1791
1792 /*
1793 * Set the physical protection on the
1794 * specified range of this map as requested.
1795 *
1796 * VERY IMPORTANT: Will *NOT* increase permissions.
1797 * pmap_protect_options() should protect the range against any access types
1798 * that are not in "prot" but it should never grant extra access.
1799 * For example, if "prot" is READ|EXECUTE, that means "remove write
1800 * access" but it does *not* mean "add read and execute" access.
1801 * VM relies on getting soft-faults to enforce extra checks (code
1802 * signing, for example), for example.
1803 * New access permissions are granted via pmap_enter() only.
1804 * ***NOTE***:
1805 * The only exception is for EPT pmaps, where we MUST populate all exec
1806 * bits when the protection API is invoked (so that the HV fault handler
1807 * can make decisions based on the exit qualification information, which
1808 * includes the execute bits in the EPT entries. Soft-faulting them
1809 * in would cause a chicken-and-egg problem where the HV fault handler
1810 * would not be able to identify mode-based execute control (MBE) faults.)
1811 */
1812 void
pmap_protect_options(pmap_t map,vm_map_offset_t sva,vm_map_offset_t eva,vm_prot_t prot,unsigned int options,void * arg)1813 pmap_protect_options(
1814 pmap_t map,
1815 vm_map_offset_t sva,
1816 vm_map_offset_t eva,
1817 vm_prot_t prot,
1818 unsigned int options,
1819 void *arg)
1820 {
1821 pt_entry_t *pde;
1822 pt_entry_t *spte, *epte;
1823 vm_map_offset_t lva;
1824 vm_map_offset_t orig_sva;
1825 boolean_t set_NX;
1826 int num_found = 0;
1827 boolean_t is_ept;
1828 uint64_t cur_vaddr;
1829
1830 pmap_intr_assert();
1831
1832 if (map == PMAP_NULL) {
1833 return;
1834 }
1835
1836 if (prot == VM_PROT_NONE) {
1837 pmap_remove_options(map, sva, eva, options);
1838 return;
1839 }
1840
1841 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
1842 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(sva),
1843 VM_KERNEL_ADDRHIDE(eva));
1844
1845 is_ept = is_ept_pmap(map);
1846
1847 if ((prot & VM_PROT_EXECUTE) || __improbable(is_ept && (prot & VM_PROT_UEXEC))) {
1848 set_NX = FALSE;
1849 } else {
1850 set_NX = TRUE;
1851 }
1852
1853 #if DEVELOPMENT || DEBUG
1854 if (__improbable(set_NX && (!nx_enabled || !map->nx_enabled))) {
1855 set_NX = FALSE;
1856 }
1857 #endif
1858 PMAP_LOCK_EXCLUSIVE(map);
1859
1860 orig_sva = sva;
1861 cur_vaddr = sva;
1862 while (sva < eva) {
1863 uint64_t vaddr_incr;
1864 lva = (sva + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
1865 if (lva > eva) {
1866 lva = eva;
1867 }
1868 pde = pmap_pde(map, sva);
1869 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1870 if (*pde & PTE_PS) {
1871 /* superpage */
1872 spte = pde;
1873 epte = spte + 1; /* excluded */
1874 vaddr_incr = I386_LPGBYTES;
1875 } else {
1876 spte = pmap_pte(map, (sva & ~(PDE_MAPPED_SIZE - 1)));
1877 spte = &spte[ptenum(sva)];
1878 epte = &spte[intel_btop(lva - sva)];
1879 vaddr_incr = I386_PGBYTES;
1880 }
1881
1882 for (; spte < epte; spte++) {
1883 uint64_t clear_bits, set_bits;
1884
1885 if (!(*spte & PTE_VALID_MASK(is_ept))) {
1886 continue;
1887 }
1888
1889 clear_bits = 0;
1890 set_bits = 0;
1891
1892 if (is_ept) {
1893 if (!(prot & VM_PROT_READ)) {
1894 clear_bits |= PTE_READ(is_ept);
1895 }
1896 }
1897 if (!(prot & VM_PROT_WRITE)) {
1898 clear_bits |= PTE_WRITE(is_ept);
1899 }
1900 #if DEVELOPMENT || DEBUG
1901 else if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) &&
1902 map == kernel_pmap) {
1903 set_bits |= PTE_WRITE(is_ept);
1904 }
1905 #endif /* DEVELOPMENT || DEBUG */
1906
1907 if (set_NX) {
1908 if (!is_ept) {
1909 set_bits |= INTEL_PTE_NX;
1910 } else {
1911 clear_bits |= INTEL_EPT_EX | INTEL_EPT_UEX;
1912 }
1913 } else if (is_ept) {
1914 /* This is the exception to the "Don't add permissions" statement, above */
1915 set_bits |= ((prot & VM_PROT_EXECUTE) ? INTEL_EPT_EX : 0) |
1916 ((prot & VM_PROT_UEXEC) ? INTEL_EPT_UEX : 0);
1917 }
1918
1919 pmap_update_pte(is_ept, spte, clear_bits, set_bits, false);
1920
1921 DTRACE_VM3(set_pte, pmap_t, map, void *, cur_vaddr, uint64_t, *spte);
1922 cur_vaddr += vaddr_incr;
1923
1924 num_found++;
1925 }
1926 }
1927 sva = lva;
1928 }
1929 if (num_found) {
1930 if (options & PMAP_OPTIONS_NOFLUSH) {
1931 PMAP_UPDATE_TLBS_DELAYED(map, orig_sva, eva, (pmap_flush_context *)arg);
1932 } else {
1933 PMAP_UPDATE_TLBS(map, orig_sva, eva);
1934 }
1935 }
1936
1937 PMAP_UNLOCK_EXCLUSIVE(map);
1938
1939 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
1940 }
1941
1942 /* Map a (possibly) autogenned block */
1943 kern_return_t
pmap_map_block_addr(pmap_t pmap,addr64_t va,pmap_paddr_t pa,uint32_t size,vm_prot_t prot,int attr,unsigned int flags)1944 pmap_map_block_addr(
1945 pmap_t pmap,
1946 addr64_t va,
1947 pmap_paddr_t pa,
1948 uint32_t size,
1949 vm_prot_t prot,
1950 int attr,
1951 unsigned int flags)
1952 {
1953 return pmap_map_block(pmap, va, intel_btop(pa), size, prot, attr, flags);
1954 }
1955
1956 kern_return_t
pmap_map_block(pmap_t pmap,addr64_t va,ppnum_t pa,uint32_t size,vm_prot_t prot,int attr,__unused unsigned int flags)1957 pmap_map_block(
1958 pmap_t pmap,
1959 addr64_t va,
1960 ppnum_t pa,
1961 uint32_t size,
1962 vm_prot_t prot,
1963 int attr,
1964 __unused unsigned int flags)
1965 {
1966 kern_return_t kr;
1967 addr64_t original_va = va;
1968 uint32_t page;
1969 int cur_page_size;
1970
1971 if (attr & VM_MEM_SUPERPAGE) {
1972 cur_page_size = SUPERPAGE_SIZE;
1973 } else {
1974 cur_page_size = PAGE_SIZE;
1975 }
1976
1977 for (page = 0; page < size; page += cur_page_size / PAGE_SIZE) {
1978 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
1979
1980 if (kr != KERN_SUCCESS) {
1981 /*
1982 * This will panic for now, as it is unclear that
1983 * removing the mappings is correct.
1984 */
1985 panic("%s: failed pmap_enter, "
1986 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
1987 __FUNCTION__,
1988 pmap, va, pa, size, prot, flags);
1989
1990 pmap_remove(pmap, original_va, va - original_va);
1991 return kr;
1992 }
1993
1994 va += cur_page_size;
1995 pa += cur_page_size / PAGE_SIZE;
1996 }
1997
1998 return KERN_SUCCESS;
1999 }
2000
2001 kern_return_t
pmap_expand_pml4(pmap_t map,vm_map_offset_t vaddr,unsigned int options)2002 pmap_expand_pml4(
2003 pmap_t map,
2004 vm_map_offset_t vaddr,
2005 unsigned int options)
2006 {
2007 vm_page_t m;
2008 pmap_paddr_t pa;
2009 uint64_t i;
2010 ppnum_t pn;
2011 pml4_entry_t *pml4p;
2012 boolean_t is_ept = is_ept_pmap(map);
2013
2014 DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr);
2015
2016 /* With the exception of the kext "basement", the kernel's level 4
2017 * pagetables must not be dynamically expanded.
2018 */
2019 assert(map != kernel_pmap || (vaddr == KERNEL_BASEMENT));
2020 /*
2021 * Allocate a VM page for the pml4 page
2022 */
2023 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2024 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) {
2025 return KERN_RESOURCE_SHORTAGE;
2026 }
2027 VM_PAGE_WAIT();
2028 }
2029 /*
2030 * put the page into the pmap's obj list so it
2031 * can be found later.
2032 */
2033 pn = VM_PAGE_GET_PHYS_PAGE(m);
2034 pa = i386_ptob(pn);
2035 i = pml4idx(map, vaddr);
2036
2037 /*
2038 * Zero the page.
2039 */
2040 pmap_zero_page(pn);
2041
2042 vm_page_lockspin_queues();
2043 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2044 vm_page_unlock_queues();
2045
2046 OSAddAtomic(1, &inuse_ptepages_count);
2047 OSAddAtomic64(1, &alloc_ptepages_count);
2048 PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
2049
2050 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2051 vm_object_lock(map->pm_obj_pml4);
2052
2053 PMAP_LOCK_EXCLUSIVE(map);
2054 /*
2055 * See if someone else expanded us first
2056 */
2057 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
2058 PMAP_UNLOCK_EXCLUSIVE(map);
2059 vm_object_unlock(map->pm_obj_pml4);
2060
2061 VM_PAGE_FREE(m);
2062
2063 OSAddAtomic(-1, &inuse_ptepages_count);
2064 PMAP_ZINFO_PFREE(map, PAGE_SIZE);
2065 return KERN_SUCCESS;
2066 }
2067
2068 #if 0 /* DEBUG */
2069 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE)) {
2070 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx",
2071 map, map->pm_obj_pml4, vaddr, i);
2072 }
2073 #endif
2074 vm_page_insert_wired(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
2075 vm_object_unlock(map->pm_obj_pml4);
2076
2077 /*
2078 * Set the page directory entry for this page table.
2079 */
2080 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
2081
2082 /*
2083 * Note that INTEL_EPT_UEX is unconditionally set (as is INTEL_EPT_EX) for
2084 * all intermediate paging levels, from PML4Es to PDEs. Processors with
2085 * VT-x implementations that do not support MBE ignore the INTEL_EPT_UEX
2086 * bit at all levels of the EPT, so there is no risk of inducing EPT
2087 * violation faults.
2088 */
2089 pmap_store_pte(is_ept, pml4p, pa_to_pte(pa)
2090 | PTE_READ(is_ept)
2091 | (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER)
2092 | PTE_WRITE(is_ept));
2093 pml4_entry_t *upml4p;
2094
2095 upml4p = pmap64_user_pml4(map, vaddr);
2096 pmap_store_pte(is_ept, upml4p, pa_to_pte(pa)
2097 | PTE_READ(is_ept)
2098 | (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER)
2099 | PTE_WRITE(is_ept));
2100
2101 PMAP_UNLOCK_EXCLUSIVE(map);
2102
2103 return KERN_SUCCESS;
2104 }
2105
2106 kern_return_t
pmap_expand_pdpt(pmap_t map,vm_map_offset_t vaddr,unsigned int options)2107 pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
2108 {
2109 vm_page_t m;
2110 pmap_paddr_t pa;
2111 uint64_t i;
2112 ppnum_t pn;
2113 pdpt_entry_t *pdptp;
2114 boolean_t is_ept = is_ept_pmap(map);
2115
2116 DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr);
2117
2118 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
2119 kern_return_t pep4kr = pmap_expand_pml4(map, vaddr, options);
2120 if (pep4kr != KERN_SUCCESS) {
2121 return pep4kr;
2122 }
2123 }
2124
2125 /*
2126 * Allocate a VM page for the pdpt page
2127 */
2128 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2129 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) {
2130 return KERN_RESOURCE_SHORTAGE;
2131 }
2132 VM_PAGE_WAIT();
2133 }
2134
2135 /*
2136 * put the page into the pmap's obj list so it
2137 * can be found later.
2138 */
2139 pn = VM_PAGE_GET_PHYS_PAGE(m);
2140 pa = i386_ptob(pn);
2141 i = pdptidx(map, vaddr);
2142
2143 /*
2144 * Zero the page.
2145 */
2146 pmap_zero_page(pn);
2147
2148 vm_page_lockspin_queues();
2149 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2150 vm_page_unlock_queues();
2151
2152 OSAddAtomic(1, &inuse_ptepages_count);
2153 OSAddAtomic64(1, &alloc_ptepages_count);
2154 PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
2155
2156 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2157 vm_object_lock(map->pm_obj_pdpt);
2158
2159 PMAP_LOCK_EXCLUSIVE(map);
2160 /*
2161 * See if someone else expanded us first
2162 */
2163 if (pmap_pde(map, vaddr) != PD_ENTRY_NULL) {
2164 PMAP_UNLOCK_EXCLUSIVE(map);
2165 vm_object_unlock(map->pm_obj_pdpt);
2166
2167 VM_PAGE_FREE(m);
2168
2169 OSAddAtomic(-1, &inuse_ptepages_count);
2170 PMAP_ZINFO_PFREE(map, PAGE_SIZE);
2171 return KERN_SUCCESS;
2172 }
2173
2174 #if 0 /* DEBUG */
2175 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE)) {
2176 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx",
2177 map, map->pm_obj_pdpt, vaddr, i);
2178 }
2179 #endif
2180 vm_page_insert_wired(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
2181 vm_object_unlock(map->pm_obj_pdpt);
2182
2183 /*
2184 * Set the page directory entry for this page table.
2185 */
2186 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
2187
2188 pmap_store_pte(is_ept, pdptp, pa_to_pte(pa)
2189 | PTE_READ(is_ept)
2190 | (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER)
2191 | PTE_WRITE(is_ept));
2192
2193 PMAP_UNLOCK_EXCLUSIVE(map);
2194
2195 return KERN_SUCCESS;
2196 }
2197
2198
2199
2200 /*
2201 * Routine: pmap_expand
2202 *
2203 * Expands a pmap to be able to map the specified virtual address.
2204 *
2205 * Allocates new virtual memory for the P0 or P1 portion of the
2206 * pmap, then re-maps the physical pages that were in the old
2207 * pmap to be in the new pmap.
2208 *
2209 * Must be called with the pmap system and the pmap unlocked,
2210 * since these must be unlocked to use vm_allocate or vm_deallocate.
2211 * Thus it must be called in a loop that checks whether the map
2212 * has been expanded enough.
2213 * (We won't loop forever, since page tables aren't shrunk.)
2214 */
2215 kern_return_t
pmap_expand(pmap_t map,vm_map_offset_t vaddr,unsigned int options)2216 pmap_expand(
2217 pmap_t map,
2218 vm_map_offset_t vaddr,
2219 unsigned int options)
2220 {
2221 pt_entry_t *pdp;
2222 vm_page_t m;
2223 pmap_paddr_t pa;
2224 uint64_t i;
2225 ppnum_t pn;
2226 boolean_t is_ept = is_ept_pmap(map);
2227
2228
2229 /*
2230 * For the kernel, the virtual address must be in or above the basement
2231 * which is for kexts and is in the 512GB immediately below the kernel..
2232 * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT
2233 */
2234 if (__improbable(map == kernel_pmap &&
2235 !(vaddr >= KERNEL_BASEMENT && vaddr <= VM_MAX_KERNEL_ADDRESS))) {
2236 if ((options & PMAP_EXPAND_OPTIONS_ALIASMAP) == 0) {
2237 panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr);
2238 }
2239 }
2240
2241 while ((pdp = pmap_pde(map, vaddr)) == PD_ENTRY_NULL) {
2242 assert((options & PMAP_EXPAND_OPTIONS_ALIASMAP) == 0);
2243 kern_return_t pepkr = pmap_expand_pdpt(map, vaddr, options);
2244 if (pepkr != KERN_SUCCESS) {
2245 return pepkr;
2246 }
2247 }
2248
2249 /*
2250 * Allocate a VM page for the pde entries.
2251 */
2252 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2253 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) {
2254 return KERN_RESOURCE_SHORTAGE;
2255 }
2256 VM_PAGE_WAIT();
2257 }
2258
2259 /*
2260 * put the page into the pmap's obj list so it
2261 * can be found later.
2262 */
2263 pn = VM_PAGE_GET_PHYS_PAGE(m);
2264 pa = i386_ptob(pn);
2265 i = pdeidx(map, vaddr);
2266
2267 /*
2268 * Zero the page.
2269 */
2270 pmap_zero_page(pn);
2271
2272 vm_page_lockspin_queues();
2273 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2274 vm_page_unlock_queues();
2275
2276 OSAddAtomic(1, &inuse_ptepages_count);
2277 OSAddAtomic64(1, &alloc_ptepages_count);
2278 PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
2279
2280 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2281 vm_object_lock(map->pm_obj);
2282
2283 PMAP_LOCK_EXCLUSIVE(map);
2284
2285 /*
2286 * See if someone else expanded us first
2287 */
2288 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2289 PMAP_UNLOCK_EXCLUSIVE(map);
2290 vm_object_unlock(map->pm_obj);
2291
2292 VM_PAGE_FREE(m);
2293
2294 OSAddAtomic(-1, &inuse_ptepages_count); //todo replace all with inlines
2295 PMAP_ZINFO_PFREE(map, PAGE_SIZE);
2296 return KERN_SUCCESS;
2297 }
2298
2299 #if 0 /* DEBUG */
2300 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE)) {
2301 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx",
2302 map, map->pm_obj, vaddr, i);
2303 }
2304 #endif
2305 vm_page_insert_wired(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
2306 vm_object_unlock(map->pm_obj);
2307
2308 /*
2309 * Set the page directory entry for this page table.
2310 */
2311 pdp = pmap_pde(map, vaddr);
2312
2313 pmap_store_pte(is_ept, pdp, pa_to_pte(pa)
2314 | PTE_READ(is_ept)
2315 | (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER)
2316 | PTE_WRITE(is_ept));
2317
2318 PMAP_UNLOCK_EXCLUSIVE(map);
2319
2320 return KERN_SUCCESS;
2321 }
2322 /*
2323 * Query a pmap to see what size a given virtual address is mapped with.
2324 * If the vaddr is not mapped, returns 0.
2325 */
2326 vm_size_t
pmap_query_pagesize(pmap_t pmap,vm_map_offset_t vaddr)2327 pmap_query_pagesize(
2328 pmap_t pmap,
2329 vm_map_offset_t vaddr)
2330 {
2331 pd_entry_t *pdep;
2332 vm_size_t size = 0;
2333
2334 assert(!is_ept_pmap(pmap));
2335 PMAP_LOCK_EXCLUSIVE(pmap);
2336
2337 pdep = pmap_pde(pmap, vaddr);
2338 if (pdep != PD_ENTRY_NULL) {
2339 if (*pdep & INTEL_PTE_PS) {
2340 size = I386_LPGBYTES;
2341 } else if (pmap_pte(pmap, vaddr) != PT_ENTRY_NULL) {
2342 size = I386_PGBYTES;
2343 }
2344 }
2345
2346 PMAP_UNLOCK_EXCLUSIVE(pmap);
2347
2348 return size;
2349 }
2350
2351 /*
2352 * Ensure the page table hierarchy is filled in down to
2353 * the large page level. Additionally returns FAILURE if
2354 * a lower page table already exists.
2355 */
2356 static kern_return_t
pmap_pre_expand_large_internal(pmap_t pmap,vm_map_offset_t vaddr)2357 pmap_pre_expand_large_internal(
2358 pmap_t pmap,
2359 vm_map_offset_t vaddr)
2360 {
2361 ppnum_t pn;
2362 pt_entry_t *pte;
2363 boolean_t is_ept = is_ept_pmap(pmap);
2364 kern_return_t kr = KERN_SUCCESS;
2365
2366 if (pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) {
2367 if (!pmap_next_page_hi(&pn, FALSE)) {
2368 panic("pmap_pre_expand_large no PDPT");
2369 }
2370
2371 pmap_zero_page(pn);
2372
2373 pte = pmap64_pml4(pmap, vaddr);
2374
2375 pmap_store_pte(is_ept, pte, pa_to_pte(i386_ptob(pn)) |
2376 PTE_READ(is_ept) |
2377 (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER) |
2378 PTE_WRITE(is_ept));
2379
2380 pte = pmap64_user_pml4(pmap, vaddr);
2381
2382 pmap_store_pte(is_ept, pte, pa_to_pte(i386_ptob(pn)) |
2383 PTE_READ(is_ept) |
2384 (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER) |
2385 PTE_WRITE(is_ept));
2386 }
2387
2388 if (pmap_pde(pmap, vaddr) == PD_ENTRY_NULL) {
2389 if (!pmap_next_page_hi(&pn, FALSE)) {
2390 panic("pmap_pre_expand_large no PDE");
2391 }
2392
2393 pmap_zero_page(pn);
2394
2395 pte = pmap64_pdpt(pmap, vaddr);
2396
2397 pmap_store_pte(is_ept, pte, pa_to_pte(i386_ptob(pn)) |
2398 PTE_READ(is_ept) |
2399 (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER) |
2400 PTE_WRITE(is_ept));
2401 } else if (pmap_pte(pmap, vaddr) != PT_ENTRY_NULL) {
2402 kr = KERN_FAILURE;
2403 }
2404
2405 return kr;
2406 }
2407
2408 /*
2409 * Wrapper that locks the pmap.
2410 */
2411 kern_return_t
pmap_pre_expand_large(pmap_t pmap,vm_map_offset_t vaddr)2412 pmap_pre_expand_large(
2413 pmap_t pmap,
2414 vm_map_offset_t vaddr)
2415 {
2416 kern_return_t kr;
2417
2418 PMAP_LOCK_EXCLUSIVE(pmap);
2419 kr = pmap_pre_expand_large_internal(pmap, vaddr);
2420 PMAP_UNLOCK_EXCLUSIVE(pmap);
2421 return kr;
2422 }
2423
2424 /*
2425 * On large memory machines, pmap_steal_memory() will allocate past
2426 * the 1GB of pre-allocated/mapped virtual kernel area. This function
2427 * expands kernel the page tables to cover a given vaddr. It uses pages
2428 * from the same pool that pmap_steal_memory() uses, since vm_page_grab()
2429 * isn't available yet.
2430 */
2431 void
pmap_pre_expand(pmap_t pmap,vm_map_offset_t vaddr)2432 pmap_pre_expand(
2433 pmap_t pmap,
2434 vm_map_offset_t vaddr)
2435 {
2436 ppnum_t pn;
2437 pt_entry_t *pte;
2438 boolean_t is_ept = is_ept_pmap(pmap);
2439
2440 /*
2441 * This returns failure if a 4K page table already exists.
2442 * Othewise it fills in the page table hierarchy down
2443 * to that level.
2444 */
2445 PMAP_LOCK_EXCLUSIVE(pmap);
2446 if (pmap_pre_expand_large_internal(pmap, vaddr) == KERN_FAILURE) {
2447 PMAP_UNLOCK_EXCLUSIVE(pmap);
2448 return;
2449 }
2450
2451 /* Add the lowest table */
2452 if (!pmap_next_page_hi(&pn, FALSE)) {
2453 panic("pmap_pre_expand");
2454 }
2455
2456 pmap_zero_page(pn);
2457
2458 pte = pmap_pde(pmap, vaddr);
2459
2460 pmap_store_pte(is_ept, pte, pa_to_pte(i386_ptob(pn)) |
2461 PTE_READ(is_ept) |
2462 (is_ept ? (INTEL_EPT_EX | INTEL_EPT_UEX) : INTEL_PTE_USER) |
2463 PTE_WRITE(is_ept));
2464 PMAP_UNLOCK_EXCLUSIVE(pmap);
2465 }
2466
2467 /*
2468 * pmap_sync_page_data_phys(ppnum_t pa)
2469 *
2470 * Invalidates all of the instruction cache on a physical page and
2471 * pushes any dirty data from the data cache for the same physical page
2472 * Not required in i386.
2473 */
2474 void
pmap_sync_page_data_phys(__unused ppnum_t pa)2475 pmap_sync_page_data_phys(__unused ppnum_t pa)
2476 {
2477 return;
2478 }
2479
2480 /*
2481 * pmap_sync_page_attributes_phys(ppnum_t pa)
2482 *
2483 * Write back and invalidate all cachelines on a physical page.
2484 */
2485 void
pmap_sync_page_attributes_phys(ppnum_t pa)2486 pmap_sync_page_attributes_phys(ppnum_t pa)
2487 {
2488 cache_flush_page_phys(pa);
2489 }
2490
2491 void
pmap_copy_page(ppnum_t src,ppnum_t dst)2492 pmap_copy_page(ppnum_t src, ppnum_t dst)
2493 {
2494 bcopy_phys((addr64_t)i386_ptob(src),
2495 (addr64_t)i386_ptob(dst),
2496 PAGE_SIZE);
2497 }
2498
2499
2500 /*
2501 * Routine: pmap_pageable
2502 * Function:
2503 * Make the specified pages (by pmap, offset)
2504 * pageable (or not) as requested.
2505 *
2506 * A page which is not pageable may not take
2507 * a fault; therefore, its page table entry
2508 * must remain valid for the duration.
2509 *
2510 * This routine is merely advisory; pmap_enter
2511 * will specify that these pages are to be wired
2512 * down (or not) as appropriate.
2513 */
2514 void
pmap_pageable(__unused pmap_t pmap,__unused vm_map_offset_t start_addr,__unused vm_map_offset_t end_addr,__unused boolean_t pageable)2515 pmap_pageable(
2516 __unused pmap_t pmap,
2517 __unused vm_map_offset_t start_addr,
2518 __unused vm_map_offset_t end_addr,
2519 __unused boolean_t pageable)
2520 {
2521 #ifdef lint
2522 pmap++; start_addr++; end_addr++; pageable++;
2523 #endif /* lint */
2524 }
2525
2526 void
invalidate_icache(__unused vm_offset_t addr,__unused unsigned cnt,__unused int phys)2527 invalidate_icache(__unused vm_offset_t addr,
2528 __unused unsigned cnt,
2529 __unused int phys)
2530 {
2531 return;
2532 }
2533
2534 void
flush_dcache(__unused vm_offset_t addr,__unused unsigned count,__unused int phys)2535 flush_dcache(__unused vm_offset_t addr,
2536 __unused unsigned count,
2537 __unused int phys)
2538 {
2539 return;
2540 }
2541
2542 #if CONFIG_DTRACE
2543 /*
2544 * Constrain DTrace copyin/copyout actions
2545 */
2546 extern kern_return_t dtrace_copyio_preflight(addr64_t);
2547 extern kern_return_t dtrace_copyio_postflight(addr64_t);
2548
2549 kern_return_t
dtrace_copyio_preflight(__unused addr64_t va)2550 dtrace_copyio_preflight(__unused addr64_t va)
2551 {
2552 thread_t thread = current_thread();
2553 uint64_t ccr3;
2554 if (current_map() == kernel_map) {
2555 return KERN_FAILURE;
2556 } else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE)) {
2557 return KERN_FAILURE;
2558 } else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3)) {
2559 return KERN_FAILURE;
2560 } else {
2561 return KERN_SUCCESS;
2562 }
2563 }
2564
2565 kern_return_t
dtrace_copyio_postflight(__unused addr64_t va)2566 dtrace_copyio_postflight(__unused addr64_t va)
2567 {
2568 return KERN_SUCCESS;
2569 }
2570 #endif /* CONFIG_DTRACE */
2571
2572 #include <mach_vm_debug.h>
2573 #if MACH_VM_DEBUG
2574 #include <vm/vm_debug.h>
2575
2576 int
pmap_list_resident_pages(__unused pmap_t pmap,__unused vm_offset_t * listp,__unused int space)2577 pmap_list_resident_pages(
2578 __unused pmap_t pmap,
2579 __unused vm_offset_t *listp,
2580 __unused int space)
2581 {
2582 return 0;
2583 }
2584 #endif /* MACH_VM_DEBUG */
2585
2586
2587 #if CONFIG_COREDUMP
2588 /* temporary workaround */
2589 boolean_t
coredumpok(__unused vm_map_t map,__unused mach_vm_offset_t va)2590 coredumpok(__unused vm_map_t map, __unused mach_vm_offset_t va)
2591 {
2592 #if 0
2593 pt_entry_t *ptep;
2594
2595 ptep = pmap_pte(map->pmap, va);
2596 if (0 == ptep) {
2597 return FALSE;
2598 }
2599 return (*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED);
2600 #else
2601 return TRUE;
2602 #endif
2603 }
2604 #endif
2605
2606 boolean_t
phys_page_exists(ppnum_t pn)2607 phys_page_exists(ppnum_t pn)
2608 {
2609 assert(pn != vm_page_fictitious_addr);
2610
2611 if (!pmap_initialized) {
2612 return TRUE;
2613 }
2614
2615 if (pn == vm_page_guard_addr) {
2616 return FALSE;
2617 }
2618
2619 if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) {
2620 return FALSE;
2621 }
2622
2623 return TRUE;
2624 }
2625
2626
2627
2628 void
pmap_switch(pmap_t tpmap)2629 pmap_switch(pmap_t tpmap)
2630 {
2631 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(tpmap));
2632 assert(ml_get_interrupts_enabled() == FALSE);
2633 set_dirbase(tpmap, current_thread(), cpu_number());
2634 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
2635 }
2636
2637 void
pmap_require(pmap_t pmap)2638 pmap_require(pmap_t pmap)
2639 {
2640 if (pmap != kernel_pmap) {
2641 zone_id_require(ZONE_ID_PMAP, sizeof(struct pmap), pmap);
2642 }
2643 }
2644
2645 /*
2646 * disable no-execute capability on
2647 * the specified pmap
2648 */
2649 void
pmap_disable_NX(__unused pmap_t pmap)2650 pmap_disable_NX(__unused pmap_t pmap)
2651 {
2652 #if DEVELOPMENT || DEBUG
2653 pmap->nx_enabled = 0;
2654 #endif
2655 }
2656
2657 void
pmap_flush_context_init(pmap_flush_context * pfc)2658 pmap_flush_context_init(pmap_flush_context *pfc)
2659 {
2660 pfc->pfc_cpus = 0;
2661 pfc->pfc_invalid_global = 0;
2662 }
2663
2664 static bool
pmap_tlbi_response(uint32_t lcpu,uint32_t rcpu,bool ngflush)2665 pmap_tlbi_response(uint32_t lcpu, uint32_t rcpu, bool ngflush)
2666 {
2667 bool responded = false;
2668 bool gflushed = (cpu_datap(rcpu)->cpu_tlb_invalid_global_count !=
2669 cpu_datap(lcpu)->cpu_tlb_gen_counts_global[rcpu]);
2670
2671 if (ngflush) {
2672 if (gflushed) {
2673 responded = true;
2674 }
2675 } else {
2676 if (gflushed) {
2677 responded = true;
2678 } else {
2679 bool lflushed = (cpu_datap(rcpu)->cpu_tlb_invalid_local_count !=
2680 cpu_datap(lcpu)->cpu_tlb_gen_counts_local[rcpu]);
2681 if (lflushed) {
2682 responded = true;
2683 }
2684 }
2685 }
2686
2687 if (responded == false) {
2688 if ((cpu_datap(rcpu)->cpu_tlb_invalid == 0) ||
2689 !CPU_CR3_IS_ACTIVE(rcpu) ||
2690 !cpu_is_running(rcpu)) {
2691 responded = true;
2692 }
2693 }
2694 return responded;
2695 }
2696
2697 extern uint64_t TLBTimeOut;
2698 void
pmap_flush(pmap_flush_context * pfc)2699 pmap_flush(
2700 pmap_flush_context *pfc)
2701 {
2702 unsigned int my_cpu;
2703 unsigned int cpu;
2704 cpumask_t cpu_bit;
2705 cpumask_t cpus_to_respond = 0;
2706 cpumask_t cpus_to_signal = 0;
2707 cpumask_t cpus_signaled = 0;
2708 boolean_t flush_self = FALSE;
2709 uint64_t deadline;
2710 bool need_global_flush = false;
2711
2712 mp_disable_preemption();
2713
2714 my_cpu = cpu_number();
2715 cpus_to_signal = pfc->pfc_cpus;
2716
2717 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_START,
2718 NULL, cpus_to_signal);
2719
2720 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus && cpus_to_signal; cpu++, cpu_bit <<= 1) {
2721 if (cpus_to_signal & cpu_bit) {
2722 cpus_to_signal &= ~cpu_bit;
2723
2724 if (!cpu_is_running(cpu)) {
2725 continue;
2726 }
2727
2728 if (pfc->pfc_invalid_global & cpu_bit) {
2729 cpu_datap(cpu)->cpu_tlb_invalid_global = 1;
2730 need_global_flush = true;
2731 } else {
2732 cpu_datap(cpu)->cpu_tlb_invalid_local = 1;
2733 }
2734 cpu_datap(my_cpu)->cpu_tlb_gen_counts_global[cpu] = cpu_datap(cpu)->cpu_tlb_invalid_global_count;
2735 cpu_datap(my_cpu)->cpu_tlb_gen_counts_local[cpu] = cpu_datap(cpu)->cpu_tlb_invalid_local_count;
2736 mfence();
2737
2738 if (cpu == my_cpu) {
2739 flush_self = TRUE;
2740 continue;
2741 }
2742 if (CPU_CR3_IS_ACTIVE(cpu)) {
2743 cpus_to_respond |= cpu_bit;
2744 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
2745 }
2746 }
2747 }
2748 cpus_signaled = cpus_to_respond;
2749
2750 /*
2751 * Flush local tlb if required.
2752 * Do this now to overlap with other processors responding.
2753 */
2754 if (flush_self) {
2755 process_pmap_updates(NULL, (pfc->pfc_invalid_global != 0), 0ULL, ~0ULL);
2756 }
2757
2758 if (cpus_to_respond) {
2759 deadline = mach_absolute_time() +
2760 (TLBTimeOut ? TLBTimeOut : LockTimeOut);
2761 boolean_t is_timeout_traced = FALSE;
2762
2763 /*
2764 * Wait for those other cpus to acknowledge
2765 */
2766 while (cpus_to_respond != 0) {
2767 long orig_acks = 0;
2768
2769 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
2770 bool responded = false;
2771 if ((cpus_to_respond & cpu_bit) != 0) {
2772 responded = pmap_tlbi_response(my_cpu, cpu, need_global_flush);
2773 if (responded) {
2774 cpus_to_respond &= ~cpu_bit;
2775 }
2776 cpu_pause();
2777 }
2778
2779 if (cpus_to_respond == 0) {
2780 break;
2781 }
2782 }
2783 if (cpus_to_respond && (mach_absolute_time() > deadline)) {
2784 if (machine_timeout_suspended()) {
2785 continue;
2786 }
2787 if (TLBTimeOut == 0) {
2788 if (is_timeout_traced) {
2789 continue;
2790 }
2791
2792 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS_TO),
2793 NULL, cpus_to_signal, cpus_to_respond);
2794
2795 is_timeout_traced = TRUE;
2796 continue;
2797 }
2798 orig_acks = NMIPI_acks;
2799 NMIPI_panic(cpus_to_respond, TLB_FLUSH_TIMEOUT);
2800 panic("Uninterruptible processor(s): CPU bitmap: 0x%llx, NMIPI acks: 0x%lx, now: 0x%lx, deadline: %llu",
2801 cpus_to_respond, orig_acks, NMIPI_acks, deadline);
2802 }
2803 }
2804 }
2805
2806 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_END,
2807 NULL, cpus_signaled, flush_self);
2808
2809 mp_enable_preemption();
2810 }
2811
2812
2813 static void
invept(void * eptp)2814 invept(void *eptp)
2815 {
2816 struct {
2817 uint64_t eptp;
2818 uint64_t reserved;
2819 } __attribute__((aligned(16), packed)) invept_descriptor = {(uint64_t)eptp, 0};
2820
2821 __asm__ volatile ("invept (%%rax), %%rcx"
2822 : : "c" (PMAP_INVEPT_SINGLE_CONTEXT), "a" (&invept_descriptor)
2823 : "cc", "memory");
2824 }
2825
2826 /*
2827 * Called with pmap locked, we:
2828 * - scan through per-cpu data to see which other cpus need to flush
2829 * - send an IPI to each non-idle cpu to be flushed
2830 * - wait for all to signal back that they are inactive or we see that
2831 * they are at a safe point (idle).
2832 * - flush the local tlb if active for this pmap
2833 * - return ... the caller will unlock the pmap
2834 */
2835
2836 void
pmap_flush_tlbs(pmap_t pmap,vm_map_offset_t startv,vm_map_offset_t endv,int options,pmap_flush_context * pfc)2837 pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int options, pmap_flush_context *pfc)
2838 {
2839 unsigned int cpu;
2840 cpumask_t cpu_bit;
2841 cpumask_t cpus_to_signal = 0;
2842 unsigned int my_cpu = cpu_number();
2843 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
2844 boolean_t flush_self = FALSE;
2845 uint64_t deadline;
2846 boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
2847 bool need_global_flush = false;
2848 uint32_t event_code;
2849 vm_map_offset_t event_startv, event_endv;
2850 boolean_t is_ept = is_ept_pmap(pmap);
2851
2852 assert((processor_avail_count < 2) ||
2853 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
2854
2855 assert((endv - startv) >= PAGE_SIZE);
2856 assert(((endv | startv) & PAGE_MASK) == 0);
2857
2858 if (__improbable(kdebug_enable)) {
2859 if (pmap == kernel_pmap) {
2860 event_code = PMAP_CODE(PMAP__FLUSH_KERN_TLBS);
2861 event_startv = VM_KERNEL_UNSLIDE_OR_PERM(startv);
2862 event_endv = VM_KERNEL_UNSLIDE_OR_PERM(endv);
2863 } else if (__improbable(is_ept)) {
2864 event_code = PMAP_CODE(PMAP__FLUSH_EPT);
2865 event_startv = startv;
2866 event_endv = endv;
2867 } else {
2868 event_code = PMAP_CODE(PMAP__FLUSH_TLBS);
2869 event_startv = startv;
2870 event_endv = endv;
2871 }
2872 }
2873
2874 PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START,
2875 VM_KERNEL_UNSLIDE_OR_PERM(pmap), options,
2876 event_startv, event_endv);
2877
2878 if (__improbable(is_ept)) {
2879 mp_cpus_call(CPUMASK_ALL, ASYNC, invept, (void*)pmap->pm_eptp);
2880 goto out;
2881 }
2882
2883 /*
2884 * Scan other cpus for matching active or task CR3.
2885 * For idle cpus (with no active map) we mark them invalid but
2886 * don't signal -- they'll check as they go busy.
2887 */
2888 if (pmap_pcid_ncpus) {
2889 if (pmap_is_shared) {
2890 need_global_flush = true;
2891 }
2892 pmap_pcid_invalidate_all_cpus(pmap);
2893 mfence();
2894 }
2895
2896 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
2897 if (!cpu_is_running(cpu)) {
2898 continue;
2899 }
2900 uint64_t cpu_active_cr3 = CPU_GET_ACTIVE_CR3(cpu);
2901 uint64_t cpu_task_cr3 = CPU_GET_TASK_CR3(cpu);
2902
2903 if ((pmap_cr3 == cpu_task_cr3) ||
2904 (pmap_cr3 == cpu_active_cr3) ||
2905 (pmap_is_shared)) {
2906 if (options & PMAP_DELAY_TLB_FLUSH) {
2907 if (need_global_flush == true) {
2908 pfc->pfc_invalid_global |= cpu_bit;
2909 }
2910 pfc->pfc_cpus |= cpu_bit;
2911
2912 continue;
2913 }
2914 if (need_global_flush == true) {
2915 cpu_datap(my_cpu)->cpu_tlb_gen_counts_global[cpu] = cpu_datap(cpu)->cpu_tlb_invalid_global_count;
2916 cpu_datap(cpu)->cpu_tlb_invalid_global = 1;
2917 } else {
2918 cpu_datap(my_cpu)->cpu_tlb_gen_counts_local[cpu] = cpu_datap(cpu)->cpu_tlb_invalid_local_count;
2919 cpu_datap(cpu)->cpu_tlb_invalid_local = 1;
2920 }
2921
2922 if (cpu == my_cpu) {
2923 flush_self = TRUE;
2924 continue;
2925 }
2926
2927 mfence();
2928
2929 /*
2930 * We don't need to signal processors which will flush
2931 * lazily at the idle state or kernel boundary.
2932 * For example, if we're invalidating the kernel pmap,
2933 * processors currently in userspace don't need to flush
2934 * their TLBs until the next time they enter the kernel.
2935 * Alterations to the address space of a task active
2936 * on a remote processor result in a signal, to
2937 * account for copy operations. (There may be room
2938 * for optimization in such cases).
2939 * The order of the loads below with respect
2940 * to the store to the "cpu_tlb_invalid" field above
2941 * is important--hence the barrier.
2942 */
2943 if (CPU_CR3_IS_ACTIVE(cpu) &&
2944 (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) ||
2945 pmap->pm_shared ||
2946 (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) {
2947 cpus_to_signal |= cpu_bit;
2948 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
2949 }
2950 }
2951 }
2952
2953 if ((options & PMAP_DELAY_TLB_FLUSH)) {
2954 goto out;
2955 }
2956
2957 /*
2958 * Flush local tlb if required.
2959 * Do this now to overlap with other processors responding.
2960 */
2961 if (flush_self) {
2962 process_pmap_updates(pmap, pmap_is_shared, startv, endv);
2963 }
2964
2965 if (cpus_to_signal) {
2966 cpumask_t cpus_to_respond = cpus_to_signal;
2967
2968 deadline = mach_absolute_time() +
2969 (TLBTimeOut ? TLBTimeOut : LockTimeOut);
2970 boolean_t is_timeout_traced = FALSE;
2971
2972 /*
2973 * Wait for those other cpus to acknowledge
2974 */
2975 while (cpus_to_respond != 0) {
2976 long orig_acks = 0;
2977
2978 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
2979 bool responded = false;
2980 if ((cpus_to_respond & cpu_bit) != 0) {
2981 responded = pmap_tlbi_response(my_cpu, cpu, need_global_flush);
2982 if (responded) {
2983 cpus_to_respond &= ~cpu_bit;
2984 }
2985 cpu_pause();
2986 }
2987 if (cpus_to_respond == 0) {
2988 break;
2989 }
2990 }
2991 if (cpus_to_respond && (mach_absolute_time() > deadline)) {
2992 if (machine_timeout_suspended()) {
2993 continue;
2994 }
2995 if (TLBTimeOut == 0) {
2996 /* cut tracepoint but don't panic */
2997 if (is_timeout_traced) {
2998 continue;
2999 }
3000
3001 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS_TO),
3002 VM_KERNEL_UNSLIDE_OR_PERM(pmap),
3003 cpus_to_signal,
3004 cpus_to_respond);
3005
3006 is_timeout_traced = TRUE;
3007 continue;
3008 }
3009 orig_acks = NMIPI_acks;
3010 uint64_t tstamp1 = mach_absolute_time();
3011 NMIPI_panic(cpus_to_respond, TLB_FLUSH_TIMEOUT);
3012 uint64_t tstamp2 = mach_absolute_time();
3013 panic("IPI timeout, unresponsive CPU bitmap: 0x%llx, NMIPI acks: 0x%lx, now: 0x%lx, deadline: %llu, pre-NMIPI time: 0x%llx, current: 0x%llx, global: %d",
3014 cpus_to_respond, orig_acks, NMIPI_acks, deadline, tstamp1, tstamp2, need_global_flush);
3015 }
3016 }
3017 }
3018
3019 if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) {
3020 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, pmap_cr3: 0x%llx, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, pmap_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
3021 }
3022
3023 out:
3024 PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_END,
3025 VM_KERNEL_UNSLIDE_OR_PERM(pmap), cpus_to_signal,
3026 event_startv, event_endv);
3027 }
3028
3029 static void
process_pmap_updates(pmap_t p,bool pshared,addr64_t istart,addr64_t iend)3030 process_pmap_updates(pmap_t p, bool pshared, addr64_t istart, addr64_t iend)
3031 {
3032 int ccpu = cpu_number();
3033 bool gtlbf = false;
3034
3035 pmap_assert(ml_get_interrupts_enabled() == 0 ||
3036 get_preemption_level() != 0);
3037
3038 if (cpu_datap(ccpu)->cpu_tlb_invalid_global) {
3039 cpu_datap(ccpu)->cpu_tlb_invalid_global_count++;
3040 cpu_datap(ccpu)->cpu_tlb_invalid = 0;
3041 gtlbf = true;
3042 } else {
3043 cpu_datap(ccpu)->cpu_tlb_invalid_local_count++;
3044 cpu_datap(ccpu)->cpu_tlb_invalid_local = 0;
3045 }
3046
3047 if (pmap_pcid_ncpus) {
3048 if (p) {
3049 /* TODO global generation count to
3050 * avoid potentially redundant
3051 * csw invalidations post-global invalidation
3052 */
3053 pmap_pcid_validate_cpu(p, ccpu);
3054 pmap_tlbi_range(istart, iend, (pshared || gtlbf), p->pmap_pcid_cpus[ccpu]);
3055 } else {
3056 pmap_pcid_validate_current();
3057 pmap_tlbi_range(istart, iend, true, 0);
3058 }
3059 } else {
3060 pmap_tlbi_range(0, ~0ULL, true, 0);
3061 }
3062 }
3063
3064 void
pmap_update_interrupt(void)3065 pmap_update_interrupt(void)
3066 {
3067 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START);
3068
3069 if (current_cpu_datap()->cpu_tlb_invalid) {
3070 process_pmap_updates(NULL, true, 0ULL, ~0ULL);
3071 }
3072
3073 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END);
3074 }
3075
3076 #include <mach/mach_vm.h> /* mach_vm_region_recurse() */
3077 /* Scan kernel pmap for W+X PTEs, scan kernel VM map for W+X map entries
3078 * and identify ranges with mismatched VM permissions and PTE permissions
3079 */
3080 kern_return_t
pmap_permissions_verify(pmap_t ipmap,vm_map_t ivmmap,vm_offset_t sv,vm_offset_t ev)3081 pmap_permissions_verify(pmap_t ipmap, vm_map_t ivmmap, vm_offset_t sv, vm_offset_t ev)
3082 {
3083 vm_offset_t cv = sv;
3084 kern_return_t rv = KERN_SUCCESS;
3085 uint64_t skip4 = 0, skip2 = 0;
3086
3087 assert(!is_ept_pmap(ipmap));
3088
3089 sv &= ~PAGE_MASK_64;
3090 ev &= ~PAGE_MASK_64;
3091 while (cv < ev) {
3092 if (__improbable((cv > 0x00007FFFFFFFFFFFULL) &&
3093 (cv < 0xFFFF800000000000ULL))) {
3094 cv = 0xFFFF800000000000ULL;
3095 }
3096 /* Potential inconsistencies from not holding pmap lock
3097 * but harmless for the moment.
3098 */
3099 if (((cv & PML4MASK) == 0) && (pmap64_pml4(ipmap, cv) == 0)) {
3100 if ((cv + NBPML4) > cv) {
3101 cv += NBPML4;
3102 } else {
3103 break;
3104 }
3105 skip4++;
3106 continue;
3107 }
3108 if (((cv & PDMASK) == 0) && (pmap_pde(ipmap, cv) == 0)) {
3109 if ((cv + NBPD) > cv) {
3110 cv += NBPD;
3111 } else {
3112 break;
3113 }
3114 skip2++;
3115 continue;
3116 }
3117
3118 pt_entry_t *ptep = pmap_pte(ipmap, cv);
3119 if (ptep && (*ptep & INTEL_PTE_VALID)) {
3120 if (*ptep & INTEL_PTE_WRITE) {
3121 if (!(*ptep & INTEL_PTE_NX)) {
3122 kprintf("W+X PTE at 0x%lx, P4: 0x%llx, P3: 0x%llx, P2: 0x%llx, PT: 0x%llx, VP: %u\n", cv, *pmap64_pml4(ipmap, cv), *pmap64_pdpt(ipmap, cv), *pmap_pde(ipmap, cv), *ptep, pmap_valid_page((ppnum_t)(i386_btop(pte_to_pa(*ptep)))));
3123 rv = KERN_FAILURE;
3124 }
3125 }
3126 }
3127 cv += PAGE_SIZE;
3128 }
3129 kprintf("Completed pmap scan\n");
3130 cv = sv;
3131
3132 struct vm_region_submap_info_64 vbr;
3133 mach_msg_type_number_t vbrcount = 0;
3134 mach_vm_size_t vmsize;
3135 vm_prot_t prot;
3136 uint32_t nesting_depth = 0;
3137 kern_return_t kret;
3138
3139 while (cv < ev) {
3140 for (;;) {
3141 vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
3142 if ((kret = mach_vm_region_recurse(ivmmap,
3143 (mach_vm_address_t *) &cv, &vmsize, &nesting_depth,
3144 (vm_region_recurse_info_t)&vbr,
3145 &vbrcount)) != KERN_SUCCESS) {
3146 break;
3147 }
3148
3149 if (vbr.is_submap) {
3150 nesting_depth++;
3151 continue;
3152 } else {
3153 break;
3154 }
3155 }
3156
3157 if (kret != KERN_SUCCESS) {
3158 break;
3159 }
3160
3161 prot = vbr.protection;
3162
3163 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
3164 kprintf("W+X map entry at address 0x%lx\n", cv);
3165 rv = KERN_FAILURE;
3166 }
3167
3168 if (prot) {
3169 vm_offset_t pcv;
3170 for (pcv = cv; pcv < cv + vmsize; pcv += PAGE_SIZE) {
3171 pt_entry_t *ptep = pmap_pte(ipmap, pcv);
3172 vm_prot_t tprot;
3173
3174 if ((ptep == NULL) || !(*ptep & INTEL_PTE_VALID)) {
3175 continue;
3176 }
3177 tprot = VM_PROT_READ;
3178 if (*ptep & INTEL_PTE_WRITE) {
3179 tprot |= VM_PROT_WRITE;
3180 }
3181 if ((*ptep & INTEL_PTE_NX) == 0) {
3182 tprot |= VM_PROT_EXECUTE;
3183 }
3184 if (tprot != prot) {
3185 kprintf("PTE/map entry permissions mismatch at address 0x%lx, pte: 0x%llx, protection: 0x%x\n", pcv, *ptep, prot);
3186 rv = KERN_FAILURE;
3187 }
3188 }
3189 }
3190 cv += vmsize;
3191 }
3192 return rv;
3193 }
3194
3195 #if MACH_ASSERT
3196 extern int pmap_ledgers_panic;
3197 extern int pmap_ledgers_panic_leeway;
3198
3199 static void
pmap_check_ledgers(pmap_t pmap)3200 pmap_check_ledgers(
3201 pmap_t pmap)
3202 {
3203 int pid;
3204 char *procname;
3205
3206 if (pmap->pmap_pid == 0) {
3207 /*
3208 * This pmap was not or is no longer fully associated
3209 * with a task (e.g. the old pmap after a fork()/exec() or
3210 * spawn()). Its "ledger" still points at a task that is
3211 * now using a different (and active) address space, so
3212 * we can't check that all the pmap ledgers are balanced here.
3213 *
3214 * If the "pid" is set, that means that we went through
3215 * pmap_set_process() in task_terminate_internal(), so
3216 * this task's ledger should not have been re-used and
3217 * all the pmap ledgers should be back to 0.
3218 */
3219 return;
3220 }
3221
3222 pid = pmap->pmap_pid;
3223 procname = pmap->pmap_procname;
3224
3225 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
3226 }
3227
3228 void
pmap_set_process(pmap_t pmap,int pid,char * procname)3229 pmap_set_process(
3230 pmap_t pmap,
3231 int pid,
3232 char *procname)
3233 {
3234 if (pmap == NULL) {
3235 return;
3236 }
3237
3238 pmap->pmap_pid = pid;
3239 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3240 if (pmap_ledgers_panic_leeway) {
3241 /*
3242 * XXX FBDP
3243 * Some processes somehow trigger some issues that make
3244 * the pmap stats and ledgers go off track, causing
3245 * some assertion failures and ledger panics.
3246 * Turn off the sanity checks if we allow some ledger leeway
3247 * because of that. We'll still do a final check in
3248 * pmap_check_ledgers() for discrepancies larger than the
3249 * allowed leeway after the address space has been fully
3250 * cleaned up.
3251 */
3252 pmap->pmap_stats_assert = FALSE;
3253 ledger_disable_panic_on_negative(pmap->ledger,
3254 task_ledgers.phys_footprint);
3255 ledger_disable_panic_on_negative(pmap->ledger,
3256 task_ledgers.internal);
3257 ledger_disable_panic_on_negative(pmap->ledger,
3258 task_ledgers.internal_compressed);
3259 ledger_disable_panic_on_negative(pmap->ledger,
3260 task_ledgers.iokit_mapped);
3261 ledger_disable_panic_on_negative(pmap->ledger,
3262 task_ledgers.alternate_accounting);
3263 ledger_disable_panic_on_negative(pmap->ledger,
3264 task_ledgers.alternate_accounting_compressed);
3265 }
3266 }
3267 #endif /* MACH_ASSERT */
3268
3269
3270 #if DEVELOPMENT || DEBUG
3271 int pmap_pagezero_mitigation = 1;
3272 #endif
3273
3274 void
pmap_advise_pagezero_range(pmap_t lpmap,uint64_t low_bound)3275 pmap_advise_pagezero_range(pmap_t lpmap, uint64_t low_bound)
3276 {
3277 #if DEVELOPMENT || DEBUG
3278 if (pmap_pagezero_mitigation == 0) {
3279 lpmap->pagezero_accessible = FALSE;
3280 return;
3281 }
3282 #endif
3283 lpmap->pagezero_accessible = ((pmap_smap_enabled == FALSE) && (low_bound < 0x1000));
3284 if (lpmap == current_pmap()) {
3285 mp_disable_preemption();
3286 current_cpu_datap()->cpu_pagezero_mapped = lpmap->pagezero_accessible;
3287 mp_enable_preemption();
3288 }
3289 }
3290
3291 uintptr_t
pmap_verify_noncacheable(uintptr_t vaddr)3292 pmap_verify_noncacheable(uintptr_t vaddr)
3293 {
3294 pt_entry_t *ptep = NULL;
3295 ptep = pmap_pte(kernel_pmap, vaddr);
3296 if (ptep == NULL) {
3297 panic("pmap_verify_noncacheable: no translation for 0x%lx", vaddr);
3298 }
3299 /* Non-cacheable OK */
3300 if (*ptep & (INTEL_PTE_NCACHE)) {
3301 return pte_to_pa(*ptep) | (vaddr & INTEL_OFFMASK);
3302 }
3303 /* Write-combined OK */
3304 if (*ptep & (INTEL_PTE_PAT)) {
3305 return pte_to_pa(*ptep) | (vaddr & INTEL_OFFMASK);
3306 }
3307 panic("pmap_verify_noncacheable: IO read from a cacheable address? address: 0x%lx, PTE: %p, *PTE: 0x%llx", vaddr, ptep, *ptep);
3308 /*NOTREACHED*/
3309 return 0;
3310 }
3311
3312 void
trust_cache_init(void)3313 trust_cache_init(void)
3314 {
3315 // Unsupported on this architecture.
3316 }
3317
3318 kern_return_t
pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused * trust_cache,const vm_size_t __unused trust_cache_len)3319 pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused *trust_cache,
3320 const vm_size_t __unused trust_cache_len)
3321 {
3322 // Unsupported on this architecture.
3323 return KERN_NOT_SUPPORTED;
3324 }
3325
3326 pmap_tc_ret_t
pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused * trust_cache,const vm_size_t __unused trust_cache_len,uint8_t const * __unused img4_manifest,const vm_size_t __unused img4_manifest_buffer_len,const vm_size_t __unused img4_manifest_actual_len,bool __unused dry_run)3327 pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused *trust_cache,
3328 const vm_size_t __unused trust_cache_len,
3329 uint8_t const * __unused img4_manifest,
3330 const vm_size_t __unused img4_manifest_buffer_len,
3331 const vm_size_t __unused img4_manifest_actual_len,
3332 bool __unused dry_run)
3333 {
3334 // Unsupported on this architecture.
3335 return PMAP_TC_UNKNOWN_FORMAT;
3336 }
3337
3338
3339 bool
pmap_is_trust_cache_loaded(const uuid_t __unused uuid)3340 pmap_is_trust_cache_loaded(const uuid_t __unused uuid)
3341 {
3342 // Unsupported on this architecture.
3343 return false;
3344 }
3345
3346 bool
pmap_lookup_in_loaded_trust_caches(const uint8_t __unused cdhash[20])3347 pmap_lookup_in_loaded_trust_caches(const uint8_t __unused cdhash[20])
3348 {
3349 // Unsupported on this architecture.
3350 return false;
3351 }
3352
3353 uint32_t
pmap_lookup_in_static_trust_cache(const uint8_t __unused cdhash[20])3354 pmap_lookup_in_static_trust_cache(const uint8_t __unused cdhash[20])
3355 {
3356 // Unsupported on this architecture.
3357 return false;
3358 }
3359
3360 int
pmap_cs_configuration(void)3361 pmap_cs_configuration(void)
3362 {
3363 // Unsupported on this architecture.
3364 return 0;
3365 }
3366
3367 SIMPLE_LOCK_DECLARE(pmap_compilation_service_cdhash_lock, 0);
3368 uint8_t pmap_compilation_service_cdhash[CS_CDHASH_LEN] = { 0 };
3369
3370 void
pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])3371 pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
3372 {
3373 simple_lock(&pmap_compilation_service_cdhash_lock, LCK_GRP_NULL);
3374 memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
3375 simple_unlock(&pmap_compilation_service_cdhash_lock);
3376
3377 #if DEVELOPMENT || DEBUG
3378 printf("Added Compilation Service CDHash through the PMAP: 0x%02X 0x%02X 0x%02X 0x%02X\n", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
3379 #endif
3380 }
3381
3382 bool
pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])3383 pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
3384 {
3385 bool match = false;
3386
3387 simple_lock(&pmap_compilation_service_cdhash_lock, LCK_GRP_NULL);
3388 if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
3389 match = true;
3390 }
3391 simple_unlock(&pmap_compilation_service_cdhash_lock);
3392
3393 #if DEVELOPMENT || DEBUG
3394 if (match) {
3395 printf("Matched Compilation Service CDHash through the PMAP\n");
3396 }
3397 #endif
3398
3399 return match;
3400 }
3401
3402 static bool pmap_local_signing_public_key_set = false;
3403 static uint8_t pmap_local_signing_public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE] = { 0 };
3404
3405 static bool
pmap_local_signing_public_key_is_set(void)3406 pmap_local_signing_public_key_is_set(void)
3407 {
3408 return os_atomic_load(&pmap_local_signing_public_key_set, relaxed);
3409 }
3410
3411 void
pmap_set_local_signing_public_key(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])3412 pmap_set_local_signing_public_key(const uint8_t public_key[PMAP_ECC_P384_PUBLIC_KEY_SIZE])
3413 {
3414 bool key_set = false;
3415
3416 /*
3417 * os_atomic_cmpxchg returns true in case the exchange was successful. For us,
3418 * a successful exchange means that the local signing public key has _not_ been
3419 * set. In case the key has been set, we panic as we would never expect the
3420 * kernel to attempt to set the key more than once.
3421 */
3422 key_set = !os_atomic_cmpxchg(&pmap_local_signing_public_key_set, false, true, relaxed);
3423
3424 if (key_set) {
3425 panic("attempted to set the local signing public key multiple times");
3426 }
3427
3428 memcpy(pmap_local_signing_public_key, public_key, PMAP_ECC_P384_PUBLIC_KEY_SIZE);
3429
3430 #if DEVELOPMENT || DEBUG
3431 printf("Set local signing public key\n");
3432 #endif
3433 }
3434
3435 uint8_t*
pmap_get_local_signing_public_key(void)3436 pmap_get_local_signing_public_key(void)
3437 {
3438 if (pmap_local_signing_public_key_is_set()) {
3439 return pmap_local_signing_public_key;
3440 }
3441 return NULL;
3442 }
3443
3444 void
pmap_unrestrict_local_signing(__unused const uint8_t cdhash[CS_CDHASH_LEN])3445 pmap_unrestrict_local_signing(
3446 __unused const uint8_t cdhash[CS_CDHASH_LEN])
3447 {
3448 // TODO: Once all changes across XNU and AMFI have been submitted, panic.
3449 }
3450
3451 bool
pmap_query_entitlements(__unused pmap_t pmap,__unused CEQuery_t query,__unused size_t queryLength,__unused CEQueryContext_t finalContext)3452 pmap_query_entitlements(
3453 __unused pmap_t pmap,
3454 __unused CEQuery_t query,
3455 __unused size_t queryLength,
3456 __unused CEQueryContext_t finalContext)
3457 {
3458 #if !PMAP_SUPPORTS_ENTITLEMENT_CHECKS
3459 panic("PMAP_CS: do not use this API without checking for \'#if PMAP_SUPPORTS_ENTITLEMENT_CHECKS\'");
3460 #endif
3461
3462 panic("PMAP_SUPPORTS_ENTITLEMENT_CHECKS should not be defined on this platform");
3463 }
3464
3465 bool
pmap_cs_enabled(void)3466 pmap_cs_enabled(void)
3467 {
3468 return false;
3469 }
3470
3471 bool
pmap_in_ppl(void)3472 pmap_in_ppl(void)
3473 {
3474 // Nonexistent on this architecture.
3475 return false;
3476 }
3477
3478 bool
pmap_has_ppl(void)3479 pmap_has_ppl(void)
3480 {
3481 // Not supported on this architecture.
3482 return false;
3483 }
3484
3485 void* __attribute__((noreturn))
pmap_image4_pmap_data(__unused size_t * allocated_size)3486 pmap_image4_pmap_data(
3487 __unused size_t *allocated_size)
3488 {
3489 panic("PMAP_IMG4: image4 data not available on this architecture");
3490 }
3491
3492 void __attribute__((noreturn))
pmap_image4_set_nonce(__unused const img4_nonce_domain_index_t ndi,__unused const img4_nonce_t * nonce)3493 pmap_image4_set_nonce(
3494 __unused const img4_nonce_domain_index_t ndi,
3495 __unused const img4_nonce_t *nonce)
3496 {
3497 panic("PMAP_IMG4: set nonce API not supported on this architecture");
3498 }
3499
3500 void __attribute__((noreturn))
pmap_image4_roll_nonce(__unused const img4_nonce_domain_index_t ndi)3501 pmap_image4_roll_nonce(
3502 __unused const img4_nonce_domain_index_t ndi)
3503 {
3504 panic("PMAP_IMG4: roll nonce API not supported on this architecture");
3505 }
3506
3507 errno_t __attribute__((noreturn))
pmap_image4_copy_nonce(__unused const img4_nonce_domain_index_t ndi,__unused img4_nonce_t * nonce_out)3508 pmap_image4_copy_nonce(
3509 __unused const img4_nonce_domain_index_t ndi,
3510 __unused img4_nonce_t *nonce_out
3511 )
3512 {
3513 panic("PMAP_IMG4: copy nonce API not supported on this architecture");
3514 }
3515
3516 errno_t __attribute__((noreturn))
pmap_image4_execute_object(__unused img4_runtime_object_spec_index_t obj_spec_index,__unused const img4_buff_t * payload,__unused const img4_buff_t * _Nullable manifest)3517 pmap_image4_execute_object(
3518 __unused img4_runtime_object_spec_index_t obj_spec_index,
3519 __unused const img4_buff_t *payload,
3520 __unused const img4_buff_t *_Nullable manifest)
3521 {
3522 panic("PMAP_IMG4: execute object API not supported on this architecture");
3523 }
3524
3525 errno_t __attribute__((noreturn))
pmap_image4_copy_object(__unused img4_runtime_object_spec_index_t obj_spec_index,__unused vm_address_t object_out,__unused size_t * object_length)3526 pmap_image4_copy_object(
3527 __unused img4_runtime_object_spec_index_t obj_spec_index,
3528 __unused vm_address_t object_out,
3529 __unused size_t *object_length)
3530 {
3531 panic("PMAP_IMG4: copy object API not supported on this architecture");
3532 }
3533
3534 void
pmap_lockdown_image4_slab(__unused vm_offset_t slab,__unused vm_size_t slab_len,__unused uint64_t flags)3535 pmap_lockdown_image4_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
3536 {
3537 // Unsupported on this architecture.
3538 }
3539
3540 void
pmap_lockdown_image4_late_slab(__unused vm_offset_t slab,__unused vm_size_t slab_len,__unused uint64_t flags)3541 pmap_lockdown_image4_late_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
3542 {
3543 // Unsupported on this architecture.
3544 }
3545
3546 kern_return_t
pmap_cs_allow_invalid(__unused pmap_t pmap)3547 pmap_cs_allow_invalid(__unused pmap_t pmap)
3548 {
3549 // Unsupported on this architecture.
3550 return KERN_SUCCESS;
3551 }
3552
3553 void *
pmap_claim_reserved_ppl_page(void)3554 pmap_claim_reserved_ppl_page(void)
3555 {
3556 // Unsupported on this architecture.
3557 return NULL;
3558 }
3559
3560 void
pmap_free_reserved_ppl_page(void __unused * kva)3561 pmap_free_reserved_ppl_page(void __unused *kva)
3562 {
3563 // Unsupported on this architecture.
3564 }
3565
3566 kern_return_t
pmap_cs_fork_prepare(__unused pmap_t old_pmap,__unused pmap_t new_pmap)3567 pmap_cs_fork_prepare(__unused pmap_t old_pmap, __unused pmap_t new_pmap)
3568 {
3569 // PMAP_CS isn't enabled for x86_64.
3570 return KERN_SUCCESS;
3571 }
3572
3573 #if DEVELOPMENT || DEBUG
3574 /*
3575 * Used for unit testing recovery from text corruptions.
3576 */
3577 kern_return_t
pmap_test_text_corruption(pmap_paddr_t pa)3578 pmap_test_text_corruption(pmap_paddr_t pa)
3579 {
3580 int pai;
3581 uint8_t *va;
3582
3583 pai = ppn_to_pai(atop(pa));
3584 if (!IS_MANAGED_PAGE(pai)) {
3585 return KERN_FAILURE;
3586 }
3587
3588 va = (uint8_t *)PHYSMAP_PTOV(pa);
3589 va[0] = 0x0f; /* opcode for UD2 */
3590 va[1] = 0x0b;
3591
3592 return KERN_SUCCESS;
3593 }
3594 #endif /* DEVELOPMENT || DEBUG */
3595