1 /*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <vm/vm_page_internal.h>
30 #include <vm/pmap.h>
31 #include <kern/ledger.h>
32 #include <kern/thread.h>
33 #if defined(__arm64__)
34 #include <pexpert/arm64/board_config.h>
35 #if CONFIG_SPTM
36 #include <arm64/sptm/pmap/pmap_pt_geometry.h>
37 #include <arm64/sptm/pmap/pmap_data.h>
38 #else /* CONFIG_SPTM */
39 #include <arm/pmap/pmap_pt_geometry.h>
40 #endif /* CONFIG_SPTM */
41 #endif /* defined(__arm64__) */
42 #include <vm/vm_map_xnu.h>
43 #include <sys/code_signing.h>
44
45 extern void read_random(void* buffer, u_int numBytes);
46
47 extern ledger_template_t task_ledger_template;
48
49 extern boolean_t arm_force_fast_fault(ppnum_t, vm_prot_t, int, void*);
50 extern kern_return_t arm_fast_fault(pmap_t, vm_map_address_t, vm_prot_t, bool, bool);
51
52 kern_return_t test_pmap_enter_disconnect(unsigned int num_loops);
53 kern_return_t test_pmap_compress_remove(unsigned int num_loops);
54 kern_return_t test_pmap_exec_remove(unsigned int num_loops);
55 kern_return_t test_pmap_nesting(unsigned int num_loops);
56 kern_return_t test_pmap_iommu_disconnect(void);
57 kern_return_t test_pmap_extended(void);
58 void test_pmap_call_overhead(unsigned int num_loops);
59 uint64_t test_pmap_page_protect_overhead(unsigned int num_loops, unsigned int num_aliases);
60 #if CONFIG_SPTM
61 kern_return_t test_pmap_huge_pv_list(unsigned int num_loops, unsigned int num_mappings);
62 kern_return_t test_pmap_reentrance(unsigned int num_loops);
63 kern_return_t test_surt(unsigned int num_surts);
64 #endif
65
66 #define PMAP_TEST_VA (0xDEADULL << PAGE_SHIFT)
67
68 typedef struct {
69 pmap_t pmap;
70 vm_map_address_t va;
71 processor_t proc;
72 ppnum_t pn;
73 volatile boolean_t stop;
74 } pmap_test_thread_args;
75
76
77 /**
78 * Helper for creating a new pmap to be used for testing.
79 *
80 * @param flags Flags to pass to pmap_create_options()
81 *
82 * @return The newly-allocated pmap, or NULL if allocation fails.
83 */
84 static pmap_t
pmap_create_wrapper(unsigned int flags)85 pmap_create_wrapper(unsigned int flags)
86 {
87 pmap_t new_pmap = NULL;
88 ledger_t ledger;
89 assert(task_ledger_template != NULL);
90 if ((ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
91 return NULL;
92 }
93 new_pmap = pmap_create_options(ledger, 0, flags);
94 ledger_dereference(ledger);
95 return new_pmap;
96 }
97
98 /**
99 * Helper for allocating a wired VM page to be used for testing.
100 *
101 * @note The allocated page will be wired with the VM_KERN_MEMORY_PTE tag,
102 * which will attribute the page to the pmap module.
103 *
104 * @return the newly-allocated vm_page_t, or NULL if allocation fails.
105 */
106 static vm_page_t
pmap_test_alloc_vm_page(void)107 pmap_test_alloc_vm_page(void)
108 {
109 vm_page_t m = vm_page_grab();
110 if (m != VM_PAGE_NULL) {
111 vm_page_lock_queues();
112 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
113 vm_page_unlock_queues();
114 }
115 return m;
116 }
117
118 /**
119 * Helper for freeing a VM page previously allocated by pmap_test_alloc_vm_page().
120 *
121 * @param m The page to free. This may be NULL, in which case this function will
122 * do nothing.
123 */
124 static void
pmap_test_free_vm_page(vm_page_t m)125 pmap_test_free_vm_page(vm_page_t m)
126 {
127 if (m != VM_PAGE_NULL) {
128 vm_page_lock_queues();
129 vm_page_free(m);
130 vm_page_unlock_queues();
131 }
132 }
133
134 static void
pmap_disconnect_thread(void * arg,wait_result_t __unused wres)135 pmap_disconnect_thread(void *arg, wait_result_t __unused wres)
136 {
137 pmap_test_thread_args *args = arg;
138 do {
139 pmap_disconnect(args->pn);
140 } while (!args->stop);
141 thread_wakeup((event_t)args);
142 }
143
144 kern_return_t
test_pmap_enter_disconnect(unsigned int num_loops)145 test_pmap_enter_disconnect(unsigned int num_loops)
146 {
147 kern_return_t kr = KERN_SUCCESS;
148 thread_t disconnect_thread;
149 pmap_t new_pmap = pmap_create_wrapper(0);
150 if (new_pmap == NULL) {
151 return KERN_FAILURE;
152 }
153 vm_page_t m = pmap_test_alloc_vm_page();
154 if (m == VM_PAGE_NULL) {
155 pmap_destroy(new_pmap);
156 return KERN_FAILURE;
157 }
158 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
159 pmap_test_thread_args args = {.pmap = new_pmap, .stop = FALSE, .pn = phys_page};
160 kern_return_t res = kernel_thread_start_priority(pmap_disconnect_thread,
161 &args, thread_kern_get_pri(current_thread()), &disconnect_thread);
162 if (res) {
163 pmap_destroy(new_pmap);
164 pmap_test_free_vm_page(m);
165 return res;
166 }
167 thread_deallocate(disconnect_thread);
168
169 while (num_loops-- != 0) {
170 kr = pmap_enter(new_pmap, PMAP_TEST_VA, phys_page,
171 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_MAPPING_TYPE_INFER);
172 assert(kr == KERN_SUCCESS);
173 }
174
175 assert_wait((event_t)&args, THREAD_UNINT);
176 args.stop = TRUE;
177 thread_block(THREAD_CONTINUE_NULL);
178
179 pmap_remove(new_pmap, PMAP_TEST_VA, PMAP_TEST_VA + PAGE_SIZE);
180 pmap_test_free_vm_page(m);
181 pmap_destroy(new_pmap);
182 return KERN_SUCCESS;
183 }
184
185 static void
pmap_remove_thread(void * arg,wait_result_t __unused wres)186 pmap_remove_thread(void *arg, wait_result_t __unused wres)
187 {
188 pmap_test_thread_args *args = arg;
189 do {
190 __assert_only kern_return_t kr = pmap_enter_options(args->pmap, args->va, args->pn,
191 VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_OPTIONS_INTERNAL, NULL, PMAP_MAPPING_TYPE_INFER);
192 assert(kr == KERN_SUCCESS);
193 pmap_remove(args->pmap, args->va, args->va + PAGE_SIZE);
194 } while (!args->stop);
195 thread_wakeup((event_t)args);
196 }
197
198 /**
199 * Test that a mapping to a physical page can be concurrently removed while
200 * the page is being compressed, without triggering accounting panics.
201 *
202 * @param num_loops The number of test loops to run
203 *
204 * @return KERN_SUCCESS if the test runs to completion, otherwise an
205 * appropriate error code.
206 */
207 kern_return_t
test_pmap_compress_remove(unsigned int num_loops)208 test_pmap_compress_remove(unsigned int num_loops)
209 {
210 thread_t remove_thread;
211 pmap_t new_pmap = pmap_create_wrapper(0);
212 if (new_pmap == NULL) {
213 return KERN_FAILURE;
214 }
215 vm_page_t m = pmap_test_alloc_vm_page();
216 if (m == VM_PAGE_NULL) {
217 pmap_destroy(new_pmap);
218 return KERN_FAILURE;
219 }
220 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
221 pmap_test_thread_args args = {.pmap = new_pmap, .stop = FALSE, .va = PMAP_TEST_VA, .pn = phys_page};
222 kern_return_t res = kernel_thread_start_priority(pmap_remove_thread,
223 &args, thread_kern_get_pri(current_thread()), &remove_thread);
224 if (res) {
225 pmap_destroy(new_pmap);
226 pmap_test_free_vm_page(m);
227 return res;
228 }
229 thread_deallocate(remove_thread);
230
231 while (num_loops-- != 0) {
232 pmap_disconnect_options(phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
233 }
234
235 assert_wait((event_t)&args, THREAD_UNINT);
236 args.stop = TRUE;
237 thread_block(THREAD_CONTINUE_NULL);
238
239 pmap_remove(new_pmap, PMAP_TEST_VA, PMAP_TEST_VA + PAGE_SIZE);
240 pmap_destroy(new_pmap);
241 pmap_test_free_vm_page(m);
242 return KERN_SUCCESS;
243 }
244
245
246 kern_return_t
test_pmap_exec_remove(unsigned int num_loops __unused)247 test_pmap_exec_remove(unsigned int num_loops __unused)
248 {
249 return KERN_NOT_SUPPORTED;
250 }
251
252
253 #if defined(__arm64__)
254
255 static const vm_map_address_t nesting_start = SHARED_REGION_BASE;
256 static const vm_map_address_t nesting_size = 16 * ARM_16K_TT_L2_SIZE;
257 static const vm_map_address_t final_unnest_size = 2 * ARM_16K_TT_L2_SIZE;
258 static const vm_map_address_t initial_unnest_size = nesting_size - final_unnest_size;
259 static const vm_map_address_t trimmed_start = nesting_start + ARM_16K_TT_L2_SIZE;
260 static const vm_map_address_t trimmed_size = nesting_size - (3 * ARM_16K_TT_L2_SIZE);
261
262 static void
pmap_nest_thread(void * arg,wait_result_t __unused wres)263 pmap_nest_thread(void *arg, wait_result_t __unused wres)
264 {
265 const pmap_test_thread_args *args = arg;
266 pmap_t main_pmap = pmap_create_wrapper(0);
267 kern_return_t kr;
268
269 thread_bind(args->proc);
270 thread_block(THREAD_CONTINUE_NULL);
271
272 /**
273 * Exercise nesting and unnesting while bound to the specified CPU (if non-NULL).
274 * The unnesting size here should match the unnesting size used in the first
275 * unnesting step of the main thread, in order to avoid concurrently unnesting
276 * beyond that region and violating the checks against over-unnesting performed
277 * in the main thread.
278 */
279 if (main_pmap != NULL) {
280 pmap_set_shared_region(main_pmap, args->pmap, nesting_start, nesting_size);
281 kr = pmap_nest(main_pmap, args->pmap, nesting_start, nesting_size);
282 assert(kr == KERN_SUCCESS);
283
284 kr = pmap_unnest(main_pmap, nesting_start, initial_unnest_size);
285 assert(kr == KERN_SUCCESS);
286 }
287
288 thread_bind(PROCESSOR_NULL);
289 thread_block(THREAD_CONTINUE_NULL);
290
291 assert_wait((event_t)(uintptr_t)&(args->stop), THREAD_UNINT);
292 if (!args->stop) {
293 thread_block(THREAD_CONTINUE_NULL);
294 } else {
295 clear_wait(current_thread(), THREAD_AWAKENED);
296 }
297
298 /* Unnest all remaining mappings so that we can safely destroy our pmap. */
299 if (main_pmap != NULL) {
300 kr = pmap_unnest(main_pmap, nesting_start + initial_unnest_size, final_unnest_size);
301 assert(kr == KERN_SUCCESS);
302 pmap_destroy(main_pmap);
303 }
304
305 thread_wakeup((event_t)arg);
306 }
307
308 /**
309 * Test that pmap_nest() and pmap_unnest() work correctly when executed concurrently from
310 * multiple threads. Spawn some worker threads at elevated priority and bound to the
311 * same CPU in order to provoke preemption of the nest/unnest operation.
312 *
313 * @param num_loops The number of nest/unnest loops to perform. This should be kept to
314 * a small number because each cycle is expensive and may consume a global shared
315 * region ID.
316 *
317 * @return KERN_SUCCESS if all tests succeed, an appropriate error code otherwise.
318 */
319 kern_return_t
test_pmap_nesting(unsigned int num_loops)320 test_pmap_nesting(unsigned int num_loops)
321 {
322 kern_return_t kr = KERN_SUCCESS;
323
324 vm_page_t m1 = VM_PAGE_NULL, m2 = VM_PAGE_NULL;
325
326 m1 = pmap_test_alloc_vm_page();
327 m2 = pmap_test_alloc_vm_page();
328 if ((m1 == VM_PAGE_NULL) || (m2 == VM_PAGE_NULL)) {
329 kr = KERN_FAILURE;
330 goto test_nesting_cleanup;
331 }
332 const ppnum_t pp1 = VM_PAGE_GET_PHYS_PAGE(m1);
333 const ppnum_t pp2 = VM_PAGE_GET_PHYS_PAGE(m2);
334 for (unsigned int i = 0; (i < num_loops) && (kr == KERN_SUCCESS); i++) {
335 pmap_t nested_pmap = pmap_create_wrapper(PMAP_CREATE_NESTED);
336 pmap_t main_pmap = pmap_create_wrapper(0);
337 if ((nested_pmap == NULL) || (main_pmap == NULL)) {
338 pmap_destroy(main_pmap);
339 pmap_destroy(nested_pmap);
340 kr = KERN_FAILURE;
341 break;
342 }
343 pmap_set_nested(nested_pmap);
344 #if CODE_SIGNING_MONITOR
345 csm_setup_nested_address_space(nested_pmap, nesting_start, nesting_size);
346 #endif /* CODE_SIGNING_MONITOR */
347 for (vm_map_address_t va = trimmed_start; va < (trimmed_start + trimmed_size); va += PAGE_SIZE) {
348 uint8_t rand;
349 read_random(&rand, sizeof(rand));
350 uint8_t rand_mod = rand % 3;
351 if (rand_mod == 0) {
352 continue;
353 }
354 kr = pmap_enter(nested_pmap, va, (rand_mod == 1) ? pp1 : pp2, VM_PROT_READ,
355 VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_MAPPING_TYPE_INFER);
356 assert(kr == KERN_SUCCESS);
357 }
358 pmap_set_shared_region(main_pmap, nested_pmap, nesting_start, nesting_size);
359 kr = pmap_nest(main_pmap, nested_pmap, nesting_start, nesting_size);
360 assert(kr == KERN_SUCCESS);
361
362 /* Validate the initial nest operation produced global mappings within the nested pmap. */
363 for (vm_map_address_t va = nesting_start; va < (nesting_start + nesting_size); va += PAGE_SIZE) {
364 pt_entry_t *nested_pte = pmap_pte(nested_pmap, va);
365 pt_entry_t *main_pte = pmap_pte(main_pmap, va);
366 if (nested_pte != main_pte) {
367 panic("%s: nested_pte (%p) is not identical to main_pte (%p) for va 0x%llx",
368 __func__, nested_pte, main_pte, (unsigned long long)va);
369 }
370 if ((nested_pte != NULL) && (*nested_pte != ARM_PTE_EMPTY) && (*nested_pte & ARM_PTE_NG)) {
371 panic("%s: nested_pte (%p) is not global for va 0x%llx",
372 __func__, nested_pte, (unsigned long long)va);
373 }
374 }
375
376 pmap_trim(main_pmap, nested_pmap, trimmed_start, trimmed_size);
377
378 /**
379 * Validate that the trimmed-off regions at the beginning and end no longer have L3 tables
380 * in the main or nested pmaps.
381 */
382 if (pmap_pte(main_pmap, nesting_start) != NULL) {
383 panic("%s: L3 table still present in main pmap for trimmed VA 0x%llx", __func__,
384 (unsigned long long)nesting_start);
385 }
386 if (pmap_pte(main_pmap, trimmed_start + trimmed_size) != NULL) {
387 panic("%s: L3 table still present in main pmap for trimmed VA 0x%llx", __func__,
388 (unsigned long long)(trimmed_start + trimmed_size));
389 }
390 if (pmap_pte(nested_pmap, nesting_start) != NULL) {
391 panic("%s: L3 table still present in nested pmap for trimmed VA 0x%llx", __func__,
392 (unsigned long long)nesting_start);
393 }
394 if (pmap_pte(nested_pmap, trimmed_start + trimmed_size) != NULL) {
395 panic("%s: L3 table still present in nested pmap for trimmed VA 0x%llx", __func__,
396 (unsigned long long)(trimmed_start + trimmed_size));
397 }
398
399 /* Now kick off various worker threads to concurrently nest, trim, and unnest. */
400 const processor_t nest_proc = current_processor();
401 thread_bind(nest_proc);
402 thread_block(THREAD_CONTINUE_NULL);
403
404 /**
405 * Avoid clogging the CPUs with high-priority kernel threads on older devices.
406 * Testing has shown this may provoke a userspace watchdog timeout.
407 */
408 #define TEST_NEST_THREADS 4
409 #if TEST_NEST_THREADS >= MAX_CPUS
410 #undef TEST_NEST_THREADS
411 #define TEST_NEST_THREADS MAX_CPUS - 1
412 #endif
413 thread_t nest_threads[TEST_NEST_THREADS];
414 kern_return_t thread_krs[TEST_NEST_THREADS];
415 pmap_test_thread_args args[TEST_NEST_THREADS];
416 for (unsigned int j = 0; j < (sizeof(nest_threads) / sizeof(nest_threads[0])); j++) {
417 args[j].pmap = nested_pmap;
418 args[j].stop = FALSE;
419 /**
420 * Spawn the worker threads at various priorities at the high end of the kernel range,
421 * and bind every other thread to the same CPU as this thread to provoke preemption,
422 * while also allowing some threads to run concurrently on other CPUs.
423 */
424 args[j].proc = ((j % 2) ? PROCESSOR_NULL : nest_proc);
425 thread_krs[j] = kernel_thread_start_priority(pmap_nest_thread, &args[j], MAXPRI_KERNEL - (j % 4), &nest_threads[j]);
426 if (thread_krs[j] == KERN_SUCCESS) {
427 thread_set_thread_name(nest_threads[j], "pmap_nest_thread");
428 }
429 }
430
431 /* Unnest the bulk of the nested region and validate that it produced the expected PTE contents. */
432 kr = pmap_unnest(main_pmap, nesting_start, initial_unnest_size);
433 assert(kr == KERN_SUCCESS);
434
435 /**
436 * Explicitly install a new mapping in the nested pmap after unnesting; this should be created non-global,
437 * which we'll verify below.
438 */
439 kr = pmap_enter(nested_pmap, trimmed_start, pp1, VM_PROT_READ,
440 VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_MAPPING_TYPE_INFER);
441 assert(kr == KERN_SUCCESS);
442
443 for (vm_map_address_t va = trimmed_start; va < (nesting_start + initial_unnest_size); va += PAGE_SIZE) {
444 pt_entry_t *nested_pte = pmap_pte(nested_pmap, va);
445 pt_entry_t *main_pte = pmap_pte(main_pmap, va);
446
447 if (main_pte != NULL) {
448 panic("%s: main_pte (%p) is not NULL for unnested VA 0x%llx",
449 __func__, main_pte, (unsigned long long)va);
450 }
451 if ((nested_pte != NULL) && (*nested_pte != ARM_PTE_EMPTY) && !(*nested_pte & ARM_PTE_NG)) {
452 panic("%s: nested_pte (%p) is global for va 0x%llx following unnest",
453 __func__, nested_pte, (unsigned long long)va);
454 }
455 }
456
457 /* Validate that the prior unnest did not unnest too much. */
458 for (vm_map_address_t va = nesting_start + initial_unnest_size; va < (trimmed_start + trimmed_size); va += PAGE_SIZE) {
459 pt_entry_t *nested_pte = pmap_pte(nested_pmap, va);
460 pt_entry_t *main_pte = pmap_pte(main_pmap, va);
461 if (nested_pte != main_pte) {
462 panic("%s: nested_pte (%p) is not identical to main_pte (%p) for va 0x%llx following adjacent unnest",
463 __func__, nested_pte, main_pte, (unsigned long long)va);
464 }
465 if ((nested_pte != NULL) && (*nested_pte != ARM_PTE_EMPTY) && (*nested_pte & ARM_PTE_NG)) {
466 panic("%s: nested_pte (%p) is not global for va 0x%llx following adjacent unnest",
467 __func__, nested_pte, (unsigned long long)va);
468 }
469 }
470
471 /* Now unnest the remainder. */
472 kr = pmap_unnest(main_pmap, nesting_start + initial_unnest_size, final_unnest_size);
473 assert(kr == KERN_SUCCESS);
474
475 thread_bind(PROCESSOR_NULL);
476 thread_block(THREAD_CONTINUE_NULL);
477
478 for (vm_map_address_t va = nesting_start + initial_unnest_size; va < (trimmed_start + trimmed_size); va += PAGE_SIZE) {
479 pt_entry_t *nested_pte = pmap_pte(nested_pmap, va);
480 pt_entry_t *main_pte = pmap_pte(main_pmap, va);
481
482 if (main_pte != NULL) {
483 panic("%s: main_pte (%p) is not NULL for unnested VA 0x%llx",
484 __func__, main_pte, (unsigned long long)va);
485 }
486 if ((nested_pte != NULL) && (*nested_pte != ARM_PTE_EMPTY) && !(*nested_pte & ARM_PTE_NG)) {
487 panic("%s: nested_pte (%p) is global for va 0x%llx following unnest",
488 __func__, nested_pte, (unsigned long long)va);
489 }
490 }
491
492 for (unsigned int j = 0; j < (sizeof(nest_threads) / sizeof(nest_threads[0])); j++) {
493 if (thread_krs[j] == KERN_SUCCESS) {
494 assert_wait((event_t)&args[j], THREAD_UNINT);
495 args[j].stop = TRUE;
496 thread_wakeup((event_t)(uintptr_t)&(args[j].stop));
497 thread_block(THREAD_CONTINUE_NULL);
498 } else {
499 kr = thread_krs[j];
500 }
501 }
502
503 pmap_remove(nested_pmap, nesting_start, nesting_start + nesting_size);
504 pmap_destroy(main_pmap);
505 pmap_destroy(nested_pmap);
506 }
507
508 test_nesting_cleanup:
509 pmap_test_free_vm_page(m1);
510 pmap_test_free_vm_page(m2);
511
512 return kr;
513 }
514
515 #else /* defined(__arm64__) */
516
517 kern_return_t
test_pmap_nesting(unsigned int num_loops __unused)518 test_pmap_nesting(unsigned int num_loops __unused)
519 {
520 return KERN_NOT_SUPPORTED;
521 }
522
523 #endif /* defined(__arm64__) */
524
525 kern_return_t
test_pmap_iommu_disconnect(void)526 test_pmap_iommu_disconnect(void)
527 {
528 return KERN_SUCCESS;
529 }
530
531
532 kern_return_t
test_pmap_extended(void)533 test_pmap_extended(void)
534 {
535 return KERN_SUCCESS;
536 }
537
538 void
test_pmap_call_overhead(unsigned int num_loops __unused)539 test_pmap_call_overhead(unsigned int num_loops __unused)
540 {
541 #if defined(__arm64__)
542 pmap_t pmap = current_thread()->map->pmap;
543 for (unsigned int i = 0; i < num_loops; ++i) {
544 pmap_nop(pmap);
545 }
546 #endif
547 }
548
549 uint64_t
test_pmap_page_protect_overhead(unsigned int num_loops __unused,unsigned int num_aliases __unused)550 test_pmap_page_protect_overhead(unsigned int num_loops __unused, unsigned int num_aliases __unused)
551 {
552 uint64_t duration = 0;
553 #if defined(__arm64__)
554 pmap_t new_pmap = pmap_create_wrapper(0);
555 vm_page_t m = pmap_test_alloc_vm_page();
556 kern_return_t kr = KERN_SUCCESS;
557
558 if ((new_pmap == NULL) || (m == VM_PAGE_NULL)) {
559 goto ppo_cleanup;
560 }
561
562 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
563
564 for (unsigned int loop = 0; loop < num_loops; ++loop) {
565 for (unsigned int alias = 0; alias < num_aliases; ++alias) {
566 kr = pmap_enter(new_pmap, PMAP_TEST_VA + (PAGE_SIZE * alias), phys_page,
567 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_MAPPING_TYPE_INFER);
568 assert(kr == KERN_SUCCESS);
569 }
570
571 uint64_t start_time = mach_absolute_time();
572
573 pmap_page_protect_options(phys_page, VM_PROT_READ, 0, NULL);
574
575 duration += (mach_absolute_time() - start_time);
576
577 pmap_remove(new_pmap, PMAP_TEST_VA, PMAP_TEST_VA + (num_aliases * PAGE_SIZE));
578 }
579
580 ppo_cleanup:
581 pmap_test_free_vm_page(m);
582 if (new_pmap != NULL) {
583 pmap_destroy(new_pmap);
584 }
585 #endif
586 return duration;
587 }
588
589 #if CONFIG_SPTM
590
591 typedef struct {
592 pmap_test_thread_args args;
593 unsigned int num_mappings;
594 volatile unsigned int nthreads;
595 thread_call_t panic_callout;
596 } pmap_hugepv_test_thread_args;
597
598 /**
599 * Worker thread that exercises pmap_remove() and pmap_enter() with a huge PV list.
600 * This thread relies on the fact that PV lists are structured with newer PTEs at
601 * the beginning of the list, so it maximizes PV list traversal time by removing
602 * mappings sequentially starting with the beginning VA of the mapping region
603 * (thus the oldest mapping), and then re-entering that removed mapping at the
604 * beginning of the list.
605 *
606 * @param arg Thread argument parameter, actually of type pmap_hugepv_test_thread_args*
607 * @param wres Wait result, currently unused.
608 */
609 static void
hugepv_remove_enter_thread(void * arg,wait_result_t __unused wres)610 hugepv_remove_enter_thread(void *arg, wait_result_t __unused wres)
611 {
612 unsigned int mapping = 0;
613 pmap_hugepv_test_thread_args *args = arg;
614 do {
615 vm_map_address_t va = args->args.va + ((vm_offset_t)mapping << PAGE_SHIFT);
616 pmap_remove(args->args.pmap, va, va + PAGE_SIZE);
617 kern_return_t kr = pmap_enter_options(args->args.pmap, va, args->args.pn,
618 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_OPTIONS_INTERNAL,
619 NULL, PMAP_MAPPING_TYPE_INFER);
620 assert(kr == KERN_SUCCESS);
621 if (++mapping == args->num_mappings) {
622 mapping = 0;
623 }
624 } while (!args->args.stop);
625 /* Ensure the update of nthreads is not speculated ahead of checking the stop flag. */
626 os_atomic_thread_fence(acquire);
627 if (os_atomic_dec(&args->nthreads, relaxed) == 0) {
628 thread_wakeup((event_t)args);
629 }
630 }
631
632 /**
633 * Worker thread to exercise fast-fault behavior with a huge PV list.
634 * This thread first removes permissions from all mappings for the page, which
635 * does not actually remove the mappings but rather clears their AF bit.
636 * It then simulates a fast fault on one random mapping in the list, which
637 * also clears the fast-fault state for the first 64 mappings in the list.
638 *
639 * @param arg Thread argument parameter, actually of type pmap_hugepv_test_thread_args*
640 * @param wres Wait result, currently unused.
641 */
642 static void
hugepv_fast_fault_thread(void * arg,wait_result_t __unused wres)643 hugepv_fast_fault_thread(void *arg, wait_result_t __unused wres)
644 {
645 pmap_hugepv_test_thread_args *args = arg;
646 do {
647 boolean_t success = arm_force_fast_fault(args->args.pn, VM_PROT_NONE, 0, NULL);
648 assert(success);
649 unsigned int rand;
650 read_random(&rand, sizeof(rand));
651 unsigned int mapping = rand % args->num_mappings;
652 arm_fast_fault(args->args.pmap, args->args.va + ((vm_offset_t)mapping << PAGE_SHIFT), VM_PROT_READ, false, FALSE);
653 } while (!args->args.stop);
654 /* Ensure the update of nthreads is not speculated ahead of checking the stop flag. */
655 os_atomic_thread_fence(acquire);
656 if (os_atomic_dec(&args->nthreads, relaxed) == 0) {
657 thread_wakeup((event_t)args);
658 }
659 }
660
661 /**
662 * Worker thread for updating cacheability of a physical page with a huge PV list.
663 * This thread simply twiddles all mappings between write-combined and normal (write-back)
664 * cacheability.
665 *
666 * @param arg Thread argument parameter, actually of type pmap_hugepv_test_thread_args*
667 * @param wres Wait result, currently unused.
668 */
669 static void
hugepv_cache_attr_thread(void * arg,wait_result_t __unused wres)670 hugepv_cache_attr_thread(void *arg, wait_result_t __unused wres)
671 {
672 pmap_hugepv_test_thread_args *args = arg;
673 do {
674 pmap_set_cache_attributes(args->args.pn, VM_WIMG_WCOMB);
675 pmap_set_cache_attributes(args->args.pn, VM_WIMG_DEFAULT);
676 } while (!args->args.stop);
677 /* Ensure the update of nthreads is not speculated ahead of checking the stop flag. */
678 os_atomic_thread_fence(acquire);
679 if (os_atomic_dec(&args->nthreads, relaxed) == 0) {
680 thread_wakeup((event_t)args);
681 }
682 }
683
684 /**
685 * Helper function for starting the 2.5-minute panic timer to ensure that we
686 * don't get stuck during test teardown.
687 *
688 * @param panic_callout The timer call to use for the panic callout.
689 */
690 static inline void
huge_pv_start_panic_timer(thread_call_t panic_callout)691 huge_pv_start_panic_timer(thread_call_t panic_callout)
692 {
693 uint64_t deadline;
694 clock_interval_to_deadline(150, NSEC_PER_SEC, &deadline);
695 thread_call_enter_delayed(panic_callout, deadline);
696 }
697
698 /**
699 * Timer callout that executes in case the huge PV test incurs excessive (>= 5min)
700 * runtime, which can happen due to unlucky scheduling of the main thread. In this
701 * case we simply set the "stop" flag and expect the worker threads to exit gracefully.
702 *
703 * @param param0 The pmap_hugepv_test_thread_args used to control the test, cast
704 * as thread_call_param_t.
705 * @param param1 Unused argument.
706 */
707 static void
huge_pv_test_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)708 huge_pv_test_timeout(thread_call_param_t param0, __unused thread_call_param_t param1)
709 {
710 pmap_hugepv_test_thread_args *args = (pmap_hugepv_test_thread_args*)param0;
711 args->args.stop = TRUE;
712 huge_pv_start_panic_timer(args->panic_callout);
713 }
714
715 /**
716 * Timer callout that executes in case the huge PV test was canceled by
717 * huge_pv_test_timeout above, but failed to terminate within 2.5 minutes.
718 * This callout simply panics to allow inspection of the resultant coredump,
719 * as it should never be reached under correct operation.
720 *
721 * @param param0 Unused argument.
722 * @param param1 Unused argument.
723 */
724 static void __attribute__((noreturn))
huge_pv_test_panic(__unused thread_call_param_t param0,__unused thread_call_param_t param1)725 huge_pv_test_panic(__unused thread_call_param_t param0, __unused thread_call_param_t param1)
726 {
727 panic("%s: test timed out", __func__);
728 }
729
730 /**
731 * Main test thread for exercising contention on a massive physical-to-virtual
732 * mapping list in the pmap. This thread creates a large number of mappings
733 * (as requested by the caller) to the same physical page, spawns the above
734 * worker threads to do different operations on that physical page, then while
735 * that is going on it repeatedly calls pmap_page_protect_options() on the page,
736 * for the number of loops specified by the caller.
737 *
738 * @param num_loops Number of iterations to execute in the main thread before
739 * stopping the workers.
740 * @param num_mappings The number of alias mappings to create for the same
741 * physical page.
742 *
743 * @return KERN_SUCCESS if the test succeeds, KERN_FAILURE if it encounters
744 * an unexpected setup failure. Any failed integrity check during
745 * the actual execution of the worker threads will panic.
746 */
747 kern_return_t
test_pmap_huge_pv_list(unsigned int num_loops,unsigned int num_mappings)748 test_pmap_huge_pv_list(unsigned int num_loops, unsigned int num_mappings)
749 {
750 kern_return_t kr = KERN_SUCCESS;
751 thread_t remove_enter_thread, fast_fault_thread, cache_attr_thread;
752 if ((num_loops == 0) || (num_mappings == 0)) {
753 /**
754 * If num_mappings is 0, we'll get into a case in which the
755 * remove_enter_thread leaves a single dangling mapping, triggering
756 * a panic when we free the page. This isn't a valid test
757 * configuration anyway.
758 */
759 return KERN_SUCCESS;
760 }
761 pmap_t new_pmap = pmap_create_wrapper(0);
762 if (new_pmap == NULL) {
763 return KERN_FAILURE;
764 }
765 vm_page_t m = pmap_test_alloc_vm_page();
766 if (m == VM_PAGE_NULL) {
767 pmap_destroy(new_pmap);
768 return KERN_FAILURE;
769 }
770
771 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
772
773 for (unsigned int mapping = 0; mapping < num_mappings; ++mapping) {
774 kr = pmap_enter(new_pmap, PMAP_TEST_VA + ((vm_offset_t)mapping << PAGE_SHIFT), phys_page,
775 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE, PMAP_MAPPING_TYPE_INFER);
776 assert(kr == KERN_SUCCESS);
777 }
778
779 thread_call_t huge_pv_panic_call = thread_call_allocate(huge_pv_test_panic, NULL);
780
781 pmap_hugepv_test_thread_args args = {
782 .args = {.pmap = new_pmap, .stop = FALSE, .va = PMAP_TEST_VA, .pn = phys_page},
783 .nthreads = 0, .num_mappings = num_mappings, .panic_callout = huge_pv_panic_call
784 };
785
786 thread_call_t huge_pv_timer_call = thread_call_allocate(huge_pv_test_timeout, &args);
787
788 kr = kernel_thread_start_priority(hugepv_remove_enter_thread,
789 &args, thread_kern_get_pri(current_thread()), &remove_enter_thread);
790 if (kr != KERN_SUCCESS) {
791 goto hugepv_cleanup;
792 }
793 ++args.nthreads;
794 thread_deallocate(remove_enter_thread);
795
796 kr = kernel_thread_start_priority(hugepv_fast_fault_thread, &args,
797 thread_kern_get_pri(current_thread()), &fast_fault_thread);
798 if (kr != KERN_SUCCESS) {
799 goto hugepv_cleanup;
800 }
801 ++args.nthreads;
802 thread_deallocate(fast_fault_thread);
803
804 kr = kernel_thread_start_priority(hugepv_cache_attr_thread, &args,
805 thread_kern_get_pri(current_thread()), &cache_attr_thread);
806 if (kr != KERN_SUCCESS) {
807 goto hugepv_cleanup;
808 }
809 ++args.nthreads;
810 thread_deallocate(cache_attr_thread);
811
812 /**
813 * Set up a 5 minute timer to gracefully halt the test upon expiry.
814 * Ordinarily the test should complete in well less than 5 minutes,
815 * but it can run longer and hit the 10 minute BATS timeout if this
816 * thread is really unlucky w.r.t. scheduling (which can happen if
817 * it is repeatedly preempted and starved by the other threads
818 * contending on the PVH lock).
819 */
820 uint64_t deadline;
821 clock_interval_to_deadline(300, NSEC_PER_SEC, &deadline);
822 thread_call_enter_delayed(huge_pv_timer_call, deadline);
823
824 for (unsigned int i = 0; (i < num_loops) && !args.args.stop; i++) {
825 pmap_page_protect_options(phys_page, VM_PROT_READ, 0, NULL);
826 /**
827 * Yield briefly to give the other workers a chance to get through
828 * more iterations.
829 */
830 __builtin_arm_wfe();
831 }
832
833 pmap_disconnect_options(phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
834
835 hugepv_cleanup:
836 thread_call_cancel_wait(huge_pv_timer_call);
837 thread_call_free(huge_pv_timer_call);
838
839 if (__improbable(args.args.stop)) {
840 /**
841 * If stop is already set, we hit the timeout, so we can't safely block waiting for
842 * the workers to terminate as they may already be doing so. Spin in a WFE loop
843 * instead.
844 */
845 while (os_atomic_load_exclusive(&args.nthreads, relaxed) != 0) {
846 __builtin_arm_wfe();
847 }
848 os_atomic_clear_exclusive();
849 } else if (args.nthreads > 0) {
850 /* Ensure prior stores to nthreads are visible before the update to args.args.stop. */
851 os_atomic_thread_fence(release);
852 huge_pv_start_panic_timer(huge_pv_panic_call);
853 assert_wait((event_t)&args, THREAD_UNINT);
854 args.args.stop = TRUE;
855 thread_block(THREAD_CONTINUE_NULL);
856 assert(args.nthreads == 0);
857 }
858
859 thread_call_cancel_wait(huge_pv_panic_call);
860 thread_call_free(huge_pv_panic_call);
861
862 if (new_pmap != NULL) {
863 pmap_remove(new_pmap, PMAP_TEST_VA, PMAP_TEST_VA + ((vm_offset_t)num_mappings << PAGE_SHIFT));
864 }
865
866 pmap_test_free_vm_page(m);
867 if (new_pmap != NULL) {
868 pmap_destroy(new_pmap);
869 }
870
871 return kr;
872 }
873
874
875 kern_return_t
test_pmap_reentrance(unsigned int num_loops __unused)876 test_pmap_reentrance(unsigned int num_loops __unused)
877 {
878 return KERN_NOT_SUPPORTED;
879 }
880
881
882 #if __ARM64_PMAP_SUBPAGE_L1__
883 /* Data shared between the main testing thread and the workers. */
884 typedef struct {
885 /* A pointer to an atomic counter of the active worker threads. */
886 unsigned int *surt_test_active_surge_thread;
887
888 /* The SURT physical address this worker is responsible for. */
889 pmap_paddr_t surt_pa;
890 } surt_emulation_thread_data;
891
892 /**
893 * SURT allocation emulation
894 *
895 * This function emulates the behavior of a thread trying to allocate a SURT.
896 * It tries to find a free SURT in the SURT page list first, and if it does
897 * not manage to find one, it allocates a new SURT page, takes the first SURT,
898 * and feeds the page to the SURT page list.
899 *
900 * @param arg Pointer to the shared structure between the main thread and the
901 * worker.
902 * @param wres Wait result - unused.
903 */
904 static void
surt_allocation_emulation_thread(void * arg,wait_result_t __unused wres)905 surt_allocation_emulation_thread(void *arg, wait_result_t __unused wres)
906 {
907 pmap_paddr_t surt_pa;
908
909 surt_emulation_thread_data *thread_data = (surt_emulation_thread_data *)arg;
910
911 surt_pa = surt_try_alloc();
912
913 if (surt_pa) {
914 goto saet_done;
915 }
916
917 const kern_return_t ret = pmap_page_alloc(&surt_pa, PMAP_PAGE_NOZEROFILL);
918
919 if (ret != KERN_SUCCESS) {
920 goto saet_done;
921 }
922
923 /**
924 * This has to be retyped to XNU_SUBPAGE_USER_ROOT_TABLES in case
925 * a SURT request from real process creation shows up. It does not
926 * need to, and cannot, call SPTM's SURT alloc function, however,
927 * because some extreme stress test parameters can exhaust available
928 * ASIDs. The normal operation of the system should be unaffected
929 * as long as the xnu bitmap tracking used SURTs is a superset of
930 * the SPTM tracking structures.
931 */
932 sptm_retype_params_t retype_params = {.raw = SPTM_RETYPE_PARAMS_NULL};
933 sptm_retype(surt_pa, XNU_DEFAULT, XNU_SUBPAGE_USER_ROOT_TABLES, retype_params);
934
935 /* Feed the SURT page to the SURT list. */
936 surt_feed_page_with_first_table_allocated(surt_pa);
937
938 saet_done:
939 /* Update the shared structure. */
940 thread_data->surt_pa = surt_pa;
941 if (os_atomic_dec(thread_data->surt_test_active_surge_thread, relaxed) == 0) {
942 thread_wakeup(thread_data->surt_test_active_surge_thread);
943 }
944 }
945
946 /**
947 * SURT free emulation
948 *
949 * This function pairs with the allocation emulation function to complete the
950 * emulation of the lifecycle of a SURT table. It records and reports the time
951 * it takes to free the SURT, and when applicable, the time it takes to free
952 * the SURT page.
953 *
954 * @param arg Pointer to the shared structure between the main thread and the
955 * worker.
956 * @param wres Wait result - unused.
957 */
958 static void
surt_free_emulation_thread(void * arg,wait_result_t __unused wres)959 surt_free_emulation_thread(void *arg, wait_result_t __unused wres)
960 {
961 surt_emulation_thread_data *thread_data = (surt_emulation_thread_data *)arg;
962
963 if (thread_data->surt_pa == 0) {
964 goto sfet_free;
965 }
966
967 const bool retype = surt_free(thread_data->surt_pa);
968
969 if (retype) {
970 os_atomic_thread_fence(acquire);
971 sptm_retype_params_t retype_params = {.raw = SPTM_RETYPE_PARAMS_NULL};
972 sptm_retype(thread_data->surt_pa & ~PAGE_MASK, XNU_SUBPAGE_USER_ROOT_TABLES,
973 XNU_DEFAULT, retype_params);
974 pmap_page_free(thread_data->surt_pa & ~PAGE_MASK);
975 }
976
977 sfet_free:
978 if (os_atomic_dec(thread_data->surt_test_active_surge_thread, relaxed) == 0) {
979 thread_wakeup(thread_data->surt_test_active_surge_thread);
980 }
981 }
982
983 /**
984 * SURT stress test
985 *
986 * This function tries to stress the SURT system by launching certain numbers
987 * of threads allocating a SURT then free them.
988 *
989 * @param num_surts The number of SURTs to allocate and free. Note that this
990 * many of worker threads will be allocated so take care when
991 * passing in a large number: memory zones can be exhausted.
992 *
993 * @return Whether the test finishes successfully.
994 */
995 kern_return_t
test_surt(unsigned int num_surts)996 test_surt(unsigned int num_surts)
997 {
998 surt_emulation_thread_data *thread_data_array = kalloc_type(surt_emulation_thread_data,
999 num_surts, Z_WAITOK | Z_ZERO);
1000 if (!thread_data_array) {
1001 return KERN_FAILURE;
1002 }
1003
1004 thread_t *thread_array = kalloc_type(thread_t, num_surts, Z_WAITOK | Z_ZERO);
1005 if (!thread_array) {
1006 return KERN_FAILURE;
1007 }
1008
1009 unsigned int active_threads = 0;
1010
1011 for (unsigned int i = 0; i < num_surts; i++) {
1012 os_atomic_inc(&active_threads, relaxed);
1013 thread_data_array[i].surt_test_active_surge_thread = &active_threads;
1014
1015 kernel_thread_start_priority(surt_allocation_emulation_thread,
1016 &thread_data_array[i],
1017 thread_kern_get_pri(current_thread()) - 1,
1018 &thread_array[i]);
1019 }
1020
1021 assert_wait(&active_threads, THREAD_UNINT);
1022
1023 if (os_atomic_load(&active_threads, relaxed) == 0) {
1024 clear_wait(current_thread(), THREAD_AWAKENED);
1025 } else {
1026 thread_block(THREAD_CONTINUE_NULL);
1027 }
1028
1029 if (os_atomic_load(&active_threads, relaxed) != 0) {
1030 panic("%s: unexpected wakeup of main test thread while workers are active.",
1031 __func__);
1032 }
1033
1034 for (unsigned int i = 0; i < num_surts; i++) {
1035 thread_deallocate(thread_array[i]);
1036 }
1037
1038 for (unsigned int i = 0; i < num_surts; i++) {
1039 os_atomic_inc(&active_threads, relaxed);
1040 kernel_thread_start_priority(surt_free_emulation_thread,
1041 &thread_data_array[i],
1042 thread_kern_get_pri(current_thread()) - 1,
1043 &thread_array[i]);
1044 }
1045
1046 assert_wait(&active_threads, THREAD_UNINT);
1047
1048 if (os_atomic_load(&active_threads, relaxed) == 0) {
1049 clear_wait(current_thread(), THREAD_AWAKENED);
1050 } else {
1051 thread_block(THREAD_CONTINUE_NULL);
1052 }
1053
1054 if (os_atomic_load(&active_threads, relaxed) != 0) {
1055 panic("%s: unexpected wakeup of main test thread while workers are active.",
1056 __func__);
1057 }
1058
1059 for (unsigned int i = 0; i < num_surts; i++) {
1060 thread_deallocate(thread_array[i]);
1061 }
1062
1063 kfree_type(surt_emulation_thread_data, num_surts, thread_data_array);
1064 kfree_type(thread_t, num_surts, thread_array);
1065
1066 return KERN_SUCCESS;
1067 }
1068 #endif /* __ARM64_PMAP_SUBPAGE_L1__ */
1069 #endif /* CONFIG_SPTM */
1070