xref: /xnu-8020.121.3/tests/hvtest_x86.m (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1#include <darwintest.h>
2#include <pthread.h>
3#include <stdatomic.h>
4
5#include <mach/mach.h>
6#include <mach/vm_map.h>
7#include <mach/vm_page_size.h>
8
9#include <sys/sysctl.h>
10
11#include "hvtest_x86_guest.h"
12
13#include <Foundation/Foundation.h>
14#include <Hypervisor/hv.h>
15#include <Hypervisor/hv_vmx.h>
16
17T_GLOBAL_META(
18	T_META_NAMESPACE("xnu.intel.hv"),
19	T_META_RUN_CONCURRENTLY(true),
20	T_META_REQUIRES_SYSCTL_NE("hw.optional.arm64", 1), // Don't run translated.
21	T_META_RADAR_COMPONENT_NAME("xnu"),
22	T_META_RADAR_COMPONENT_VERSION("intel"),
23	T_META_OWNER("joster")
24	);
25
26/*
27 * We want every hypervisor test to run multiple times:
28 *   - Using hv_vcpu_run()
29 *   - Using hv_vcpu_run_until()
30 *   - using hv_vcpu_run_until() with HV_VM_ACCEL_APIC
31 *
32 * darwintest has no means to run tests multiple
33 * times with slightly different configuration,
34 * so we have to bake it ourselves. (This can
35 * be extended for other config variants of
36 * course.)
37 */
38static bool hv_use_run_until;
39static bool hv_use_accel_apic;
40#define T_DECL_HV(name, ...)                         \
41    static void hv_test_##name (void);               \
42    T_DECL(name##_run, __VA_ARGS__) {                \
43        hv_use_run_until = false;                    \
44        hv_use_accel_apic = false;                   \
45        hv_test_##name();                            \
46    }                                                \
47    T_DECL(name##_run_until, __VA_ARGS__) {          \
48        hv_use_run_until = true;                     \
49        hv_use_accel_apic = false;                   \
50        hv_test_##name();                            \
51    }                                                \
52    T_DECL(name##_run_until_accel, __VA_ARGS__) {    \
53        hv_use_run_until = true;                     \
54        hv_use_accel_apic = true;                    \
55        hv_test_##name();                            \
56    }                                                \
57    static void hv_test_##name (void)
58
59static void
60create_vm(hv_vm_options_t flags)
61{
62	if (hv_use_accel_apic) {
63		flags |= HV_VM_ACCEL_APIC;
64	}
65
66	T_ASSERT_EQ(hv_vm_create(flags), HV_SUCCESS, "created vm");
67}
68
69static void
70run_vcpu(hv_vcpuid_t vcpu)
71{
72	if (hv_use_run_until) {
73		T_QUIET; T_ASSERT_EQ(hv_vcpu_run_until(vcpu, ~(uint64_t)0), HV_SUCCESS, "hv_vcpu_run_until");
74	} else {
75		T_QUIET; T_ASSERT_EQ(hv_vcpu_run(vcpu), HV_SUCCESS, "hv_vcpu_run");
76	}
77}
78
79static bool
80hv_support()
81{
82	int hv_support;
83	size_t hv_support_size = sizeof(hv_support);
84
85	int err = sysctlbyname("kern.hv_support", &hv_support, &hv_support_size, NULL, 0);
86	if (err) {
87		return false;
88	} else {
89		return hv_support != 0;
90	}
91}
92
93static uint64_t get_reg(hv_vcpuid_t vcpu, hv_x86_reg_t reg)
94{
95	uint64_t val;
96	T_QUIET; T_EXPECT_EQ(hv_vcpu_read_register(vcpu, reg, &val), HV_SUCCESS,
97                         "get register");
98	return val;
99}
100
101static void set_reg(hv_vcpuid_t vcpu, hv_x86_reg_t reg, uint64_t value)
102{
103	T_QUIET; T_EXPECT_EQ(hv_vcpu_write_register(vcpu, reg, value), HV_SUCCESS,
104                         "set register");
105}
106
107static uint64_t get_vmcs(hv_vcpuid_t vcpu, uint32_t field)
108{
109	uint64_t val;
110	T_QUIET; T_EXPECT_EQ(hv_vmx_vcpu_read_vmcs(vcpu, field, &val), HV_SUCCESS,
111                         "get vmcs");
112	return val;
113}
114
115static void set_vmcs(hv_vcpuid_t vcpu, uint32_t field, uint64_t value)
116{
117	T_QUIET; T_EXPECT_EQ(hv_vmx_vcpu_write_vmcs(vcpu, field, value), HV_SUCCESS,
118                         "set vmcs");
119}
120
121static uint64_t get_cap(uint32_t field)
122{
123    uint64_t val;
124    T_QUIET; T_ASSERT_EQ(hv_vmx_read_capability(field, &val), HV_SUCCESS,
125                         "get capability");
126    return val;
127}
128
129
130
131static NSMutableDictionary *page_cache;
132static NSMutableSet *allocated_phys_pages;
133static pthread_mutex_t page_table_lock = PTHREAD_MUTEX_INITIALIZER;
134
135static uint64_t next_phys = 0x4000000;
136
137/*
138 * Map a page into guest's physical address space, return gpa of the
139 * page.  If *host_uva is NULL, a new host user page is allocated.
140 */
141static hv_gpaddr_t
142map_guest_phys_locked(void **host_uva)
143{
144    hv_gpaddr_t gpa = next_phys;
145    next_phys += vm_page_size;
146
147    if (*host_uva == NULL) {
148        *host_uva = valloc(vm_page_size);
149        memset(*host_uva, 0, vm_page_size);
150        [allocated_phys_pages addObject:@((uintptr_t)*host_uva)];
151    }
152
153    T_QUIET; T_ASSERT_EQ(hv_vm_map(*host_uva, gpa, vm_page_size, HV_MEMORY_READ), HV_SUCCESS, "enter hv mapping");
154
155    [page_cache setObject:@((uintptr_t)*host_uva) forKey:@(gpa)];
156
157    return gpa;
158}
159
160static hv_gpaddr_t
161map_guest_phys(void **host_uva)
162{
163	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&page_table_lock),
164	    "acquire page lock");
165
166    hv_gpaddr_t gpa = map_guest_phys_locked(host_uva);
167
168	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&page_table_lock),
169	    "release page lock");
170
171    return gpa;
172}
173
174static uint64_t *pml4;
175static hv_gpaddr_t pml4_gpa;
176
177/* Stolen from kern/bits.h, which cannot be included outside the kernel. */
178#define BIT(b)                          (1ULL << (b))
179
180#define mask(width)                     (width >= 64 ? (unsigned long long)-1 : (BIT(width) - 1))
181#define extract(x, shift, width)        ((((uint64_t)(x)) >> (shift)) & mask(width))
182#define bits(x, hi, lo)                 extract((x), (lo), (hi) - (lo) + 1)
183
184
185/*
186 * Enter a page in a level of long mode's PML4 paging structures.
187 * Helper for fault_in_page.
188 */
189static void *
190enter_level_locked(uint64_t *table, void *host_va, void *va, int hi, int lo) {
191    uint64_t * const te = &table[bits(va, hi, lo)];
192
193    const uint64_t present = 1;
194    const uint64_t rw = 2;
195
196    const uint64_t addr_mask = mask(47-12) << 12;
197
198    if (!(*te & present)) {
199        hv_gpaddr_t gpa = map_guest_phys_locked(&host_va);
200        *te = (gpa & addr_mask) | rw | present;
201    } else {
202        NSNumber *num = [page_cache objectForKey:@(*te & addr_mask)];
203        T_QUIET; T_ASSERT_NOTNULL(num, "existing page is backed");
204        void *backing = (void*)[num unsignedLongValue];
205        if (host_va != 0) {
206            T_QUIET; T_ASSERT_EQ(va, backing, "backing page matches");
207        } else {
208            host_va = backing;
209        }
210    }
211
212    return host_va;
213}
214
215/*
216 * Enters a page both into the guest paging structures and the EPT
217 * (long mode PML4 only, real mode and protected mode support running
218 * without paging, and that's what they use instead.)
219 */
220static void *
221map_page(void *host_va, void *va) {
222	void *result;
223
224	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&page_table_lock),
225	    "acquire page lock");
226
227    uint64_t *pdpt = enter_level_locked(pml4, NULL, va, 47, 39);
228    uint64_t *pd = enter_level_locked(pdpt, NULL, va, 38, 30);
229    uint64_t *pt = enter_level_locked(pd, NULL, va, 29, 21);
230    result = enter_level_locked(pt, host_va, va, 20, 12);
231
232	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&page_table_lock),
233	    "release page lock");
234
235	return result;
236}
237
238static void
239fault_in_page(void *va) {
240	map_page(va, va);
241}
242
243static void free_page_cache(void)
244{
245	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&page_table_lock),
246	    "acquire page lock");
247
248	for (NSNumber *uvaNumber in allocated_phys_pages) {
249		uintptr_t va = [uvaNumber unsignedLongValue];
250		free((void *)va);
251	}
252	[page_cache release];
253    [allocated_phys_pages release];
254
255	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&page_table_lock),
256	    "release page lock");
257}
258
259static uint64_t
260run_to_next_vm_fault(hv_vcpuid_t vcpu, bool on_demand_paging)
261{
262	bool retry;
263    uint64_t exit_reason, qual, gpa, gla, info, vector_info, error_code;
264	uint64_t last_spurious_qual = 0, last_spurious_gpa = 0, last_spurious_gla = 0;
265	int spurious_ept_count = 0;
266	do {
267        retry = false;
268		do {
269			run_vcpu(vcpu);
270			exit_reason = get_vmcs(vcpu, VMCS_RO_EXIT_REASON);
271		} while (exit_reason == VMX_REASON_IRQ);
272
273        qual = get_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC);
274        gpa = get_vmcs(vcpu, VMCS_GUEST_PHYSICAL_ADDRESS);
275        gla = get_vmcs(vcpu, VMCS_RO_GUEST_LIN_ADDR);
276        info = get_vmcs(vcpu, VMCS_RO_VMEXIT_IRQ_INFO);
277        vector_info = get_vmcs(vcpu, VMCS_RO_IDT_VECTOR_INFO);
278        error_code = get_vmcs(vcpu, VMCS_RO_VMEXIT_IRQ_ERROR);
279
280        if (on_demand_paging) {
281            if (exit_reason == VMX_REASON_EXC_NMI &&
282                (info & 0x800003ff) == 0x8000030e &&
283                (error_code & 0x1) == 0) {
284                // guest paging fault
285                fault_in_page((void*)qual);
286                retry = true;
287            }
288            else if (exit_reason == VMX_REASON_EPT_VIOLATION) {
289                if ((qual & 0x86) == 0x82) {
290                    // EPT write fault
291                    T_QUIET; T_ASSERT_EQ(hv_vm_protect(gpa & ~(hv_gpaddr_t)PAGE_MASK, vm_page_size,
292                                                       HV_MEMORY_READ | HV_MEMORY_WRITE),
293                                         HV_SUCCESS, "make page writable");
294                    retry = true;
295                }
296                else if ((qual & 0x86) == 0x84) {
297                    // EPT exec fault
298                    T_QUIET; T_ASSERT_EQ(hv_vm_protect(gpa & ~(hv_gpaddr_t)PAGE_MASK, vm_page_size,
299                                                       HV_MEMORY_READ | HV_MEMORY_EXEC),
300                                         HV_SUCCESS, "make page executable");
301                    retry = true;
302                }
303            }
304        }
305
306		if (!hv_use_run_until && !retry && exit_reason == VMX_REASON_EPT_VIOLATION &&
307			spurious_ept_count++ < 128) {
308			/*
309			 * When using hv_vcpu_run() instead of
310			 * hv_vcpu_run_until(), the Hypervisor kext bubbles up
311			 * spurious EPT violations that it actually handled
312			 * itself.
313			 *
314			 * It is hard to assess whether the EPT violation is
315			 * spurious or not (a good reason never to use this
316			 * interface in practice) without knowledge of the
317			 * specific test, so we just retry here, unless we
318			 * encounter what seems to be the same fault again.
319			 *
320			 * To guard against cycling faults that we do not detect
321			 * here, we also put a maximum on the number of
322			 * retries. Yes, this is all very shoddy, but so is
323			 * hv_vcpu_run().
324			 *
325			 * Every test will also be run with hv_vcpu_run_until()
326			 * which employs no such hackery, so this should not mask
327			 * any unexpected EPT violations.
328			 */
329
330			retry = !((last_spurious_qual == qual) && (last_spurious_gpa == gpa) && (last_spurious_gla == gla));
331
332			if (retry) {
333				last_spurious_qual = qual;
334				last_spurious_gpa = gpa;
335				last_spurious_gla = gla;
336			}
337		}
338	} while (retry);
339
340    // printf("reason: %lld, qualification: %llx\n", exit_reason, qual);
341    // printf("gpa: %llx, gla: %llx\n", gpa, gla);
342    // printf("RIP: %llx\n", get_reg(vcpu, HV_X86_RIP));
343    // printf("CR3: %llx\n", get_reg(vcpu, HV_X86_CR3));
344    // printf("info: %llx\n", info);
345    // printf("vector_info: %llx\n", vector_info);
346    // printf("error_code: %llx\n", error_code);
347
348    return exit_reason;
349}
350
351static uint64_t
352expect_vmcall(hv_vcpuid_t vcpu, bool on_demand_paging)
353{
354	uint64_t reason = run_to_next_vm_fault(vcpu, on_demand_paging);
355	T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_VMCALL, "expect vmcall exit");
356
357    // advance RIP to after VMCALL
358    set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
359
360    return get_reg(vcpu, HV_X86_RAX);
361}
362
363static uint64_t
364expect_vmcall_with_value(hv_vcpuid_t vcpu, uint64_t rax, bool on_demand_paging)
365{
366	uint64_t reason = run_to_next_vm_fault(vcpu, on_demand_paging);
367	T_QUIET; T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_VMCALL, "check for vmcall exit");
368    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), rax, "vmcall exit with expected RAX value %llx", rax);
369
370    // advance RIP to after VMCALL
371    set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
372
373    return reason;
374}
375
376typedef void (*vcpu_entry_function)(uint64_t);
377typedef void *(*vcpu_monitor_function)(void *, hv_vcpuid_t);
378
379struct test_vcpu {
380	hv_vcpuid_t vcpu;
381	vcpu_entry_function guest_func;
382	uint64_t guest_param;
383	vcpu_monitor_function monitor_func;
384	void *monitor_param;
385};
386
387static uint64_t
388canonicalize(uint64_t ctrl, uint64_t mask)
389{
390	return (ctrl | (mask & 0xffffffff)) & (mask >> 32);
391}
392
393static void
394setup_real_mode(hv_vcpuid_t vcpu)
395{
396    uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
397
398    pin_cap = get_cap(HV_VMX_CAP_PINBASED);
399    proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
400    proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
401    entry_cap = get_cap(HV_VMX_CAP_ENTRY);
402    exit_cap = get_cap(HV_VMX_CAP_EXIT);
403
404    set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
405	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
406             canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
407	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
408    set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(0, entry_cap));
409	set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
410
411    set_vmcs(vcpu, VMCS_GUEST_CR0, 0x20);
412	set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
413	set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x20);
414	set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2000);
415	set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
416	set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x0000);
417	set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x83);
418	set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
419	set_vmcs(vcpu, VMCS_GUEST_SS, 0);
420	set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
421	set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffff);
422	set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0x93);
423	set_vmcs(vcpu, VMCS_GUEST_CS, 0);
424	set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
425	set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffff);
426	set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0x9b);
427	set_vmcs(vcpu, VMCS_GUEST_DS, 0);
428	set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
429	set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffff);
430	set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0x93);
431	set_vmcs(vcpu, VMCS_GUEST_ES, 0);
432	set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
433	set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffff);
434	set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0x93);
435	set_vmcs(vcpu, VMCS_GUEST_FS, 0);
436	set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
437	set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffff);
438	set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0x93);
439	set_vmcs(vcpu, VMCS_GUEST_GS, 0);
440	set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
441	set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffff);
442	set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0x93);
443
444    set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
445	set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
446    set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
447	set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
448
449    set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
450
451	set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
452}
453
454static void
455setup_protected_mode(hv_vcpuid_t vcpu)
456{
457    uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
458
459    pin_cap = get_cap(HV_VMX_CAP_PINBASED);
460    proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
461    proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
462    entry_cap = get_cap(HV_VMX_CAP_ENTRY);
463    exit_cap = get_cap(HV_VMX_CAP_EXIT);
464
465    set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
466	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
467             canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
468	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
469    set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(0, entry_cap));
470	set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
471
472    set_vmcs(vcpu, VMCS_GUEST_CR0, 0x21);
473	set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
474	set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x21);
475	set_vmcs(vcpu, VMCS_GUEST_CR3, 0);
476	set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2000);
477	set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
478	set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x0000);
479
480    set_vmcs(vcpu, VMCS_GUEST_TR, 0);
481    set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x8b);
482
483	set_vmcs(vcpu, VMCS_GUEST_LDTR, 0x0);
484	set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
485
486	set_vmcs(vcpu, VMCS_GUEST_SS, 0x8);
487	set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
488	set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffffffff);
489	set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0xc093);
490
491	set_vmcs(vcpu, VMCS_GUEST_CS, 0x10);
492	set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
493	set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffffffff);
494	set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0xc09b);
495
496	set_vmcs(vcpu, VMCS_GUEST_DS, 0x8);
497	set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
498	set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffffffff);
499	set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0xc093);
500
501	set_vmcs(vcpu, VMCS_GUEST_ES, 0x8);
502	set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
503	set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffffffff);
504	set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0xc093);
505
506	set_vmcs(vcpu, VMCS_GUEST_FS, 0x8);
507	set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
508	set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffffffff);
509	set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0xc093);
510
511	set_vmcs(vcpu, VMCS_GUEST_GS, 0x8);
512	set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
513	set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffffffff);
514	set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0xc093);
515
516    set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
517	set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
518
519    set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
520	set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
521
522    set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
523
524	set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
525}
526
527static void
528setup_long_mode(hv_vcpuid_t vcpu)
529{
530    uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
531
532    pin_cap = get_cap(HV_VMX_CAP_PINBASED);
533    proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
534    proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
535    entry_cap = get_cap(HV_VMX_CAP_ENTRY);
536    exit_cap = get_cap(HV_VMX_CAP_EXIT);
537
538    set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
539	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
540             canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
541	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
542    set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(VMENTRY_GUEST_IA32E, entry_cap));
543	set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
544
545    set_vmcs(vcpu, VMCS_GUEST_CR0, 0x80000021L);
546	set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
547	set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x80000021L);
548	set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2020);
549	set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
550	set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x2020);
551
552    set_vmcs(vcpu, VMCS_GUEST_IA32_EFER, 0x500);
553
554    T_QUIET; T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, MSR_IA32_KERNEL_GS_BASE, true), HV_SUCCESS, "enable native GS_BASE");
555
556    set_vmcs(vcpu, VMCS_GUEST_TR, 0);
557    set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x8b);
558
559	set_vmcs(vcpu, VMCS_GUEST_LDTR, 0x0);
560	set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
561
562	set_vmcs(vcpu, VMCS_GUEST_SS, 0x8);
563	set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
564	set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffffffff);
565	set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0xa093);
566
567	set_vmcs(vcpu, VMCS_GUEST_CS, 0x10);
568	set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
569	set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffffffff);
570	set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0xa09b);
571
572	set_vmcs(vcpu, VMCS_GUEST_DS, 0x8);
573	set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
574	set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffffffff);
575	set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0xa093);
576
577	set_vmcs(vcpu, VMCS_GUEST_ES, 0x8);
578	set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
579	set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffffffff);
580	set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0xa093);
581
582	set_vmcs(vcpu, VMCS_GUEST_FS, 0x8);
583	set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
584	set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffffffff);
585	set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0xa093);
586
587	set_vmcs(vcpu, VMCS_GUEST_GS, 0x8);
588	set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
589	set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffffffff);
590	set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0xa093);
591
592    set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
593
594    set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
595
596    set_vmcs(vcpu, VMCS_GUEST_CR3, pml4_gpa);
597
598    set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
599	set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
600
601    set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
602	set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
603}
604
605static void *
606wrap_monitor(void *param)
607{
608	struct test_vcpu *test = (struct test_vcpu *)param;
609
610    T_QUIET; T_ASSERT_EQ(hv_vcpu_create(&test->vcpu, HV_VCPU_DEFAULT), HV_SUCCESS,
611	    "created vcpu");
612
613	const size_t stack_size = 0x4000;
614	void *stack_bottom = valloc(stack_size);
615	T_QUIET; T_ASSERT_NOTNULL(stack_bottom, "allocate VCPU stack");
616	vcpu_entry_function entry = test->guest_func;
617
618    set_vmcs(test->vcpu, VMCS_GUEST_RIP, (uintptr_t)entry);
619	set_vmcs(test->vcpu, VMCS_GUEST_RSP, (uintptr_t)stack_bottom + stack_size);
620	set_reg(test->vcpu, HV_X86_RDI, test->guest_param);
621
622	void *result = test->monitor_func(test->monitor_param, test->vcpu);
623
624	T_QUIET; T_ASSERT_EQ(hv_vcpu_destroy(test->vcpu), HV_SUCCESS, "Destroyed vcpu");
625	free(stack_bottom);
626	free(test);
627	return result;
628}
629
630static pthread_t
631create_vcpu_thread(
632    vcpu_entry_function guest_function, uint64_t guest_param,
633    vcpu_monitor_function monitor_func, void *monitor_param)
634{
635
636	pthread_t thread;
637	struct test_vcpu *test = malloc(sizeof(*test));
638    T_QUIET; T_ASSERT_NOTNULL(test, "malloc test params");
639	test->guest_func = guest_function;
640	test->guest_param = guest_param;
641	test->monitor_func = monitor_func;
642	test->monitor_param = monitor_param;
643	T_ASSERT_POSIX_SUCCESS(pthread_create(&thread, NULL, wrap_monitor, test),
644	    "create vcpu pthread");
645	// ownership of test struct moves to the thread
646	test = NULL;
647
648	return thread;
649}
650
651static void
652vm_setup()
653{
654	T_SETUPBEGIN;
655
656	if (hv_support() < 1) {
657		T_SKIP("Running on non-HV target, skipping...");
658		return;
659	}
660
661	page_cache = [[NSMutableDictionary alloc] init];
662	allocated_phys_pages = [[NSMutableSet alloc] init];
663
664	create_vm(HV_VM_DEFAULT);
665
666    // Set up root paging structures for long mode,
667    // where paging is mandatory.
668
669    pml4_gpa = map_guest_phys((void**)&pml4);
670    memset(pml4, 0, vm_page_size);
671
672    T_SETUPEND;
673}
674
675static void
676vm_cleanup()
677{
678	T_ASSERT_EQ(hv_vm_destroy(), HV_SUCCESS, "Destroyed vm");
679	free_page_cache();
680
681	pml4 = NULL;
682	pml4_gpa = 0;
683}
684
685static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
686static pthread_mutex_t vcpus_ready_lock = PTHREAD_MUTEX_INITIALIZER;
687static uint32_t vcpus_initializing;
688static pthread_mutex_t vcpus_hang_lock = PTHREAD_MUTEX_INITIALIZER;
689
690static void *
691multikill_vcpu_thread_function(void __unused *arg)
692{
693 	hv_vcpuid_t *vcpu = (hv_vcpuid_t*)arg;
694
695    T_QUIET; T_ASSERT_EQ(hv_vcpu_create(vcpu, HV_VCPU_DEFAULT), HV_SUCCESS,
696                         "created vcpu");
697
698	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_ready_lock),
699	    "acquire vcpus_ready_lock");
700	T_QUIET; T_ASSERT_NE(vcpus_initializing, 0, "check for vcpus_ready underflow");
701	vcpus_initializing--;
702	if (vcpus_initializing == 0) {
703		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&ready_cond),
704		    "signaling all VCPUs ready");
705	}
706	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&vcpus_ready_lock),
707	    "release vcpus_ready_lock");
708
709	// To cause the VCPU pointer to be cleared from the wrong thread, we need
710	// to get threads onto the thread deallocate queue. One way to accomplish
711	// this is to die while waiting for a lock.
712	T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_hang_lock),
713	    "acquire vcpus_hang_lock");
714
715	// Do not allow the thread to terminate. Exactly one thread will acquire
716	// the above lock successfully.
717	while (true) {
718		pause();
719	}
720
721	return NULL;
722}
723
724T_DECL_HV(regression_55524541,
725	"kill task with multiple VCPU threads waiting for lock")
726{
727	if (!hv_support()) {
728		T_SKIP("no HV support");
729	}
730
731	int pipedesc[2];
732	T_ASSERT_POSIX_SUCCESS(pipe(pipedesc), "create pipe");
733
734	pid_t child = fork();
735	if (child == 0) {
736		const uint32_t vcpu_count = 8;
737		pthread_t vcpu_threads[8];
738		create_vm(HV_VM_DEFAULT);
739		vcpus_initializing = vcpu_count;
740		for (uint32_t i = 0; i < vcpu_count; i++) {
741            hv_vcpuid_t vcpu;
742
743			T_ASSERT_POSIX_SUCCESS(pthread_create(&vcpu_threads[i], NULL,
744			    multikill_vcpu_thread_function, (void *)&vcpu),
745				"create vcpu_threads[%u]", i);
746		}
747
748		T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_ready_lock),
749		    "acquire vcpus_ready_lock");
750		while (vcpus_initializing != 0) {
751			T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&ready_cond,
752			    &vcpus_ready_lock), "wait for all threads ready");
753		}
754		T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&vcpus_ready_lock),
755		    "release vcpus_ready_lock");
756
757		// Indicate readiness to die, meditiate peacefully.
758		uint8_t byte = 0;
759		T_ASSERT_EQ_LONG(write(pipedesc[1], &byte, 1), 1L, "notifying on pipe");
760		while (true) {
761			pause();
762		}
763	} else {
764		T_ASSERT_GT(child, 0, "successful fork");
765		// Wait for child to prepare.
766		uint8_t byte;
767		T_ASSERT_EQ_LONG(read(pipedesc[0], &byte, 1), 1L, "waiting on pipe");
768		T_ASSERT_POSIX_SUCCESS(kill(child, SIGTERM), "kill child");
769		// Hope for no panic...
770		T_ASSERT_POSIX_SUCCESS(wait(NULL), "reap child");
771	}
772	T_ASSERT_POSIX_SUCCESS(close(pipedesc[0]), "close pipedesc[0]");
773	T_ASSERT_POSIX_SUCCESS(close(pipedesc[1]), "close pipedesc[1]");
774}
775
776static void *
777simple_long_mode_monitor(void *arg __unused, hv_vcpuid_t vcpu)
778{
779    setup_long_mode(vcpu);
780
781    expect_vmcall_with_value(vcpu, 0x33456, true);
782
783    return NULL;
784}
785
786T_DECL_HV(simple_long_mode_guest, "simple long mode guest")
787{
788    vm_setup();
789
790    pthread_t vcpu_thread = create_vcpu_thread(simple_long_mode_vcpu_entry, 0x10000, simple_long_mode_monitor, 0);
791	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
792
793	vm_cleanup();
794}
795
796static void *
797smp_test_monitor(void *arg __unused, hv_vcpuid_t vcpu)
798{
799    setup_long_mode(vcpu);
800
801	uint64_t value = expect_vmcall(vcpu, true);
802	return (void *)(uintptr_t)value;
803}
804
805T_DECL_HV(smp_sanity, "Multiple VCPUs in the same VM")
806{
807	vm_setup();
808
809	// Use this region as shared memory between the VCPUs.
810	void *shared = NULL;
811    map_guest_phys((void**)&shared);
812
813	atomic_uint *count_word = (atomic_uint *)shared;
814	atomic_init(count_word, 0);
815
816	pthread_t vcpu1_thread = create_vcpu_thread(smp_vcpu_entry,
817	    (uintptr_t)count_word, smp_test_monitor, count_word);
818	pthread_t vcpu2_thread = create_vcpu_thread(smp_vcpu_entry,
819	    (uintptr_t)count_word, smp_test_monitor, count_word);
820
821	void *r1, *r2;
822	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu1_thread, &r1), "join vcpu1");
823	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu2_thread, &r2), "join vcpu2");
824	uint64_t v1 = (uint64_t)r1;
825	uint64_t v2 = (uint64_t)r2;
826	if (v1 == 0) {
827		T_ASSERT_EQ_ULLONG(v2, 1ULL, "check count");
828	} else if (v1 == 1) {
829		T_ASSERT_EQ_ULLONG(v2, 0ULL, "check count");
830	} else {
831		T_FAIL("unexpected count: %llu", v1);
832	}
833
834	vm_cleanup();
835}
836
837
838extern void *hvtest_begin;
839extern void *hvtest_end;
840
841static void *
842simple_protected_mode_test_monitor(void *arg __unused, hv_vcpuid_t vcpu)
843{
844    setup_protected_mode(vcpu);
845
846    size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
847
848    const size_t mem_size = 1 * 1024 * 1024;
849    uint8_t *guest_pages_shadow = valloc(mem_size);
850
851    bzero(guest_pages_shadow, mem_size);
852    memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
853
854    T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x40000000, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
855                HV_SUCCESS, "map guest memory");
856
857    expect_vmcall_with_value(vcpu, 0x23456, false);
858
859    free(guest_pages_shadow);
860
861    return NULL;
862}
863
864T_DECL_HV(simple_protected_mode_guest, "simple protected mode guest")
865{
866    vm_setup();
867
868    pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
869                                               (((uintptr_t)simple_protected_mode_vcpu_entry & PAGE_MASK) +
870                                                0x40000000 + 0x1000),
871                                               0, simple_protected_mode_test_monitor, 0);
872	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
873
874	vm_cleanup();
875}
876
877static void *
878simple_real_mode_monitor(void *arg __unused, hv_vcpuid_t vcpu)
879{
880    setup_real_mode(vcpu);
881
882    size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
883
884    const size_t mem_size = 1 * 1024 * 1024;
885    uint8_t *guest_pages_shadow = valloc(mem_size);
886
887    bzero(guest_pages_shadow, mem_size);
888    memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
889
890    T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x0, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC), HV_SUCCESS,
891                "map guest memory");
892
893    expect_vmcall_with_value(vcpu, 0x23456, false);
894
895    free(guest_pages_shadow);
896
897    return NULL;
898}
899
900T_DECL_HV(simple_real_mode_guest, "simple real mode guest")
901{
902    vm_setup();
903
904    pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
905                                               (((uintptr_t)simple_real_mode_vcpu_entry & PAGE_MASK) +
906                                                0x1000),
907                                               0, simple_real_mode_monitor, 0);
908	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
909
910	vm_cleanup();
911}
912
913static void *
914radar61961809_monitor(void *gpaddr, hv_vcpuid_t vcpu)
915{
916	uint32_t const gdt_template[] = {
917		0, 0,                         /* Empty */
918		0x0000ffff, 0x00cf9200,       /* 0x08 CPL0 4GB writable data, 32bit */
919		0x0000ffff, 0x00cf9a00,       /* 0x10 CPL0 4GB readable code, 32bit */
920		0x0000ffff, 0x00af9200,       /* 0x18 CPL0 4GB writable data, 64bit */
921		0x0000ffff, 0x00af9a00,       /* 0x20 CPL0 4GB readable code, 64bit */
922	};
923
924	// We start the test in protected mode.
925    setup_protected_mode(vcpu);
926
927	// SAVE_EFER makes untrapped CR0.PG work.
928    uint64_t exit_cap = get_cap(HV_VMX_CAP_EXIT);
929	set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(VMEXIT_SAVE_EFER, exit_cap));
930
931	// Start with CR0.PG disabled.
932	set_vmcs(vcpu, VMCS_GUEST_CR0, 0x00000021);
933	set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x00000021);
934	/*
935	 * Don't trap on modifying CR0.PG to reproduce the problem.
936	 * Otherwise, we'd have to handle the switch ourselves, and would
937	 * just do it right.
938	 */
939	set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0x80000000UL);
940
941	// PAE must be enabled for a switch into long mode to work.
942	set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2020);
943	set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
944	set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x2020);
945
946	// Will use the harness managed page tables in long mode.
947	set_vmcs(vcpu, VMCS_GUEST_CR3, pml4_gpa);
948
949	// Hypervisor fw wants this (for good, but unrelated reason).
950	T_QUIET; T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, MSR_IA32_KERNEL_GS_BASE, true), HV_SUCCESS, "enable native GS_BASE");
951
952	// Far pointer array for our far jumps.
953	uint32_t *far_ptr = NULL;
954	hv_gpaddr_t far_ptr_gpaddr = map_guest_phys((void**)&far_ptr);
955	map_page(far_ptr, (void*)far_ptr_gpaddr);
956
957	far_ptr[0] = (uint32_t)(((uintptr_t)&radar61961809_prepare - (uintptr_t)&hvtest_begin) + (uintptr_t)gpaddr);
958	far_ptr[1] = 0x0010; // 32bit CS
959	far_ptr[2] = (uint32_t)(((uintptr_t)&radar61961809_loop64 - (uintptr_t)&hvtest_begin) + (uintptr_t)gpaddr);
960	far_ptr[3] = 0x0020; // 64bit CS
961
962	set_reg(vcpu, HV_X86_RDI, far_ptr_gpaddr);
963
964	// Setup GDT.
965	uint32_t *gdt = valloc(vm_page_size);
966	hv_gpaddr_t gdt_gpaddr = 0x70000000;
967	map_page(gdt, (void*)gdt_gpaddr);
968	bzero(gdt, vm_page_size);
969	memcpy(gdt, gdt_template, sizeof(gdt_template));
970
971	set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, gdt_gpaddr);
972	set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, sizeof(gdt_template)+1);
973
974	// Map test code (because we start in protected mode without
975	// paging, we cannot use the harness's fault management yet.)
976	size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
977
978	const size_t mem_size = 1 * 1024 * 1024;
979	uint8_t *guest_pages_shadow = valloc(mem_size);
980
981	bzero(guest_pages_shadow, mem_size);
982	memcpy(guest_pages_shadow, &hvtest_begin, guest_pages_size);
983
984	T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, (hv_gpaddr_t)gpaddr, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
985		HV_SUCCESS, "map guest memory");
986
987	// Create entries in PML4.
988	uint8_t *host_va = guest_pages_shadow;
989	uint8_t *va = (uint8_t*)gpaddr;
990	for (unsigned long i = 0; i < guest_pages_size / vm_page_size; i++, va += vm_page_size, host_va += vm_page_size) {
991		map_page(host_va, va);
992	}
993
994	uint64_t reason = run_to_next_vm_fault(vcpu, false);
995	T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_RDMSR, "check for rdmsr");
996    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), 0xc0000080LL, "expected EFER rdmsr");
997
998	set_reg(vcpu, HV_X86_RDX, 0);
999	set_reg(vcpu, HV_X86_RAX, 0);
1000    set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
1001
1002	reason = run_to_next_vm_fault(vcpu, false);
1003	T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_WRMSR, "check for wrmsr");
1004	T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), 0xc0000080LL, "expected EFER wrmsr");
1005	T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), 0x0LL, "expected EFER wrmsr higher bits 0");
1006	T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), 0x100LL, "expected EFER wrmsr lower bits LME");
1007
1008	set_vmcs(vcpu, VMCS_GUEST_IA32_EFER, 0x100);
1009	set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
1010
1011	// See assembly part of the test for checkpoints.
1012	expect_vmcall_with_value(vcpu, 0x100, false /* PG disabled =>
1013												 * no PFs expected */);
1014	expect_vmcall_with_value(vcpu, 0x1111, true /* PG now enabled */);
1015	expect_vmcall_with_value(vcpu, 0x2222, true);
1016
1017	free(guest_pages_shadow);
1018	free(gdt);
1019
1020    return NULL;
1021}
1022
1023T_DECL_HV(radar61961809_guest,
1024	"rdar://61961809 (Unexpected guest faults with hv_vcpu_run_until, dropping out of long mode)")
1025{
1026    vm_setup();
1027
1028	hv_gpaddr_t gpaddr = 0x80000000;
1029    pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1030		(((uintptr_t)radar61961809_entry & PAGE_MASK) +
1031			gpaddr),
1032		0, radar61961809_monitor, (void*)gpaddr);
1033	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1034
1035	vm_cleanup();
1036}
1037
1038static void *
1039superpage_2mb_backed_guest_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1040{
1041    setup_protected_mode(vcpu);
1042
1043    size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
1044
1045    const size_t mem_size = 2 * 1024 * 1024;
1046
1047    uint8_t *guest_pages_shadow = mmap(NULL, mem_size,
1048                                       PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
1049                                       VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
1050
1051    if (guest_pages_shadow == MAP_FAILED) {
1052        /* Getting a 2MB superpage is hard in practice, because memory gets fragmented
1053         * easily.
1054         * T_META_REQUIRES_REBOOT in the T_DECL helps a lot in actually getting a page,
1055         * but in the case that it still fails, we don't want the test to fail through
1056         * no fault of the hypervisor.
1057         */
1058        T_SKIP("Unable to attain a 2MB superpage. Skipping.");
1059    }
1060
1061    bzero(guest_pages_shadow, mem_size);
1062    memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
1063
1064    T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x40000000, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
1065                HV_SUCCESS, "map guest memory");
1066
1067    expect_vmcall_with_value(vcpu, 0x23456, false);
1068
1069    munmap(guest_pages_shadow, mem_size);
1070
1071    return NULL;
1072}
1073
1074T_DECL_HV(superpage_2mb_backed_guest, "guest backed by a 2MB superpage",
1075       T_META_REQUIRES_REBOOT(true)) // Helps actually getting a superpage
1076{
1077    vm_setup();
1078
1079    pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1080                                               (((uintptr_t)simple_protected_mode_vcpu_entry & PAGE_MASK) +
1081                                                0x40000000 + 0x1000),
1082                                               0, superpage_2mb_backed_guest_monitor, 0);
1083	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1084
1085	vm_cleanup();
1086}
1087
1088static void *
1089save_restore_regs_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1090{
1091
1092    setup_long_mode(vcpu);
1093
1094    uint64_t rsp = get_reg(vcpu, HV_X86_RSP);
1095
1096    set_reg(vcpu, HV_X86_RAX, 0x0101010101010101);
1097    set_reg(vcpu, HV_X86_RBX, 0x0202020202020202);
1098    set_reg(vcpu, HV_X86_RCX, 0x0303030303030303);
1099    set_reg(vcpu, HV_X86_RDX, 0x0404040404040404);
1100    set_reg(vcpu, HV_X86_RSI, 0x0505050505050505);
1101    set_reg(vcpu, HV_X86_RDI, 0x0606060606060606);
1102
1103    set_reg(vcpu, HV_X86_RBP, 0x0707070707070707);
1104
1105    set_reg(vcpu, HV_X86_R8, 0x0808080808080808);
1106    set_reg(vcpu, HV_X86_R9, 0x0909090909090909);
1107    set_reg(vcpu, HV_X86_R10, 0x0a0a0a0a0a0a0a0a);
1108    set_reg(vcpu, HV_X86_R11, 0x0b0b0b0b0b0b0b0b);
1109    set_reg(vcpu, HV_X86_R12, 0x0c0c0c0c0c0c0c0c);
1110    set_reg(vcpu, HV_X86_R13, 0x0d0d0d0d0d0d0d0d);
1111    set_reg(vcpu, HV_X86_R14, 0x0e0e0e0e0e0e0e0e);
1112    set_reg(vcpu, HV_X86_R15, 0x0f0f0f0f0f0f0f0f);
1113
1114    // invalid selectors: ok as long as we don't try to use them
1115    set_reg(vcpu, HV_X86_DS, 0x1010);
1116    set_reg(vcpu, HV_X86_ES, 0x2020);
1117    set_reg(vcpu, HV_X86_FS, 0x3030);
1118    set_reg(vcpu, HV_X86_GS, 0x4040);
1119
1120    expect_vmcall_with_value(vcpu, (uint64_t)~0x0101010101010101LL, true);
1121
1122    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSP), rsp-8, "check if push happened");
1123
1124    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), (uint64_t)~0x0101010101010101LL, "check if RAX negated");
1125    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBX), (uint64_t)~0x0202020202020202LL, "check if RBX negated");
1126    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), (uint64_t)~0x0303030303030303LL, "check if RCX negated");
1127    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), (uint64_t)~0x0404040404040404LL, "check if RDX negated");
1128    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSI), (uint64_t)~0x0505050505050505LL, "check if RSI negated");
1129    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDI), (uint64_t)~0x0606060606060606LL, "check if RDI negated");
1130
1131    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBP), (uint64_t)~0x0707070707070707LL, "check if RBP negated");
1132
1133    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R8), (uint64_t)~0x0808080808080808LL, "check if R8 negated");
1134    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R9), (uint64_t)~0x0909090909090909LL, "check if R9 negated");
1135    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R10), (uint64_t)~0x0a0a0a0a0a0a0a0aLL, "check if R10 negated");
1136    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R11), (uint64_t)~0x0b0b0b0b0b0b0b0bLL, "check if R11 negated");
1137    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R12), (uint64_t)~0x0c0c0c0c0c0c0c0cLL, "check if R12 negated");
1138    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R13), (uint64_t)~0x0d0d0d0d0d0d0d0dLL, "check if R13 negated");
1139    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R14), (uint64_t)~0x0e0e0e0e0e0e0e0eLL, "check if R14 negated");
1140    T_ASSERT_EQ(get_reg(vcpu, HV_X86_R15), (uint64_t)~0x0f0f0f0f0f0f0f0fLL, "check if R15 negated");
1141
1142    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), (uint64_t)~0x0101010101010101LL, "check if RAX negated");
1143    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBX), (uint64_t)~0x0202020202020202LL, "check if RBX negated");
1144    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), (uint64_t)~0x0303030303030303LL, "check if RCX negated");
1145    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), (uint64_t)~0x0404040404040404LL, "check if RDX negated");
1146
1147    // Cannot set selector to arbitrary value from the VM, but we have the RPL field to play with
1148    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DS), 1ULL, "check if DS == 1");
1149    T_ASSERT_EQ(get_reg(vcpu, HV_X86_ES), 2ULL, "check if ES == 2");
1150    T_ASSERT_EQ(get_reg(vcpu, HV_X86_FS), 3ULL, "check if FS == 3");
1151    T_ASSERT_EQ(get_reg(vcpu, HV_X86_GS), 1ULL, "check if GS == 1");
1152
1153    expect_vmcall_with_value(vcpu, (uint64_t)~0x0101010101010101LL, true);
1154
1155    T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSP), rsp-16, "check if push happened again");
1156
1157    return NULL;
1158}
1159
1160T_DECL_HV(save_restore_regs, "check if general purpose and segment registers are properly saved and restored")
1161{
1162    vm_setup();
1163
1164    pthread_t vcpu_thread = create_vcpu_thread(save_restore_regs_entry, 0x10000, save_restore_regs_monitor, 0);
1165	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1166
1167	vm_cleanup();
1168}
1169
1170static void *
1171save_restore_debug_regs_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1172{
1173
1174    setup_long_mode(vcpu);
1175
1176    set_reg(vcpu, HV_X86_RAX, 0x0101010101010101);
1177
1178    set_reg(vcpu, HV_X86_DR0, 0x1111111111111111);
1179    set_reg(vcpu, HV_X86_DR1, 0x2222222222222222);
1180    set_reg(vcpu, HV_X86_DR2, 0x3333333333333333);
1181    set_reg(vcpu, HV_X86_DR3, 0x4444444444444444);
1182
1183    // debug status and control regs (some bits are reserved, one other bit would generate an exception)
1184    const uint64_t dr6_force_clear = 0xffffffff00001000ULL;
1185    const uint64_t dr6_force_set = 0xffff0ff0ULL;
1186    const uint64_t dr7_force_clear = 0xffffffff0000f000ULL;
1187    const uint64_t dr7_force_set = 0x0400ULL;
1188
1189    set_reg(vcpu, HV_X86_DR6, (0x5555555555555555ULL | dr6_force_set) & ~(dr6_force_clear));
1190    set_reg(vcpu, HV_X86_DR7, (0x5555555555555555ULL | dr7_force_set) & ~(dr7_force_clear));
1191
1192    expect_vmcall_with_value(vcpu, ~0x0101010101010101ULL, true);
1193
1194    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR0), (uint64_t)~0x1111111111111111LL, "check if DR0 negated");
1195    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR1), (uint64_t)~0x2222222222222222LL, "check if DR1 negated");
1196    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR2), (uint64_t)~0x3333333333333333LL, "check if DR2 negated");
1197    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR3), (uint64_t)~0x4444444444444444LL, "check if DR3 negated");
1198
1199    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR6), (0xaaaaaaaaaaaaaaaaULL | dr6_force_set) & ~(dr6_force_clear), "check if DR6 negated");
1200    T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR7), (0xaaaaaaaaaaaaaaaaULL | dr7_force_set) & ~(dr7_force_clear), "check if DR7 negated");
1201
1202    expect_vmcall_with_value(vcpu, 0x0101010101010101LL, true);
1203
1204    return NULL;
1205}
1206
1207T_DECL_HV(save_restore_debug_regs, "check if debug registers are properly saved and restored")
1208{
1209    vm_setup();
1210
1211    pthread_t vcpu_thread = create_vcpu_thread(save_restore_debug_regs_entry, 0x10000, save_restore_debug_regs_monitor, 0);
1212	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1213
1214	vm_cleanup();
1215}
1216
1217#define T_NATIVE_MSR(msr)
1218
1219static void *
1220native_msr_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1221{
1222    const uint32_t msrs[] = {
1223        MSR_IA32_STAR,
1224        MSR_IA32_LSTAR,
1225        MSR_IA32_CSTAR,
1226        MSR_IA32_FMASK,
1227        MSR_IA32_KERNEL_GS_BASE,
1228        MSR_IA32_TSC,
1229        MSR_IA32_TSC_AUX,
1230
1231        MSR_IA32_SYSENTER_CS,
1232        MSR_IA32_SYSENTER_ESP,
1233        MSR_IA32_SYSENTER_EIP,
1234        MSR_IA32_FS_BASE,
1235        MSR_IA32_GS_BASE,
1236    };
1237    const int msr_count = sizeof(msrs)/sizeof(uint32_t);
1238
1239    setup_long_mode(vcpu);
1240
1241    for (int i = 0; i < msr_count; i++) {
1242        T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, msrs[i], true), HV_SUCCESS, "enable native MSR %x", msrs[i]);
1243    }
1244
1245    expect_vmcall_with_value(vcpu, 0x23456, true);
1246
1247    return NULL;
1248}
1249
1250T_DECL_HV(native_msr_clobber, "enable and clobber native MSRs in the guest")
1251{
1252    vm_setup();
1253
1254    pthread_t vcpu_thread = create_vcpu_thread(native_msr_vcpu_entry, 0x10000, native_msr_monitor, 0);
1255	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1256
1257	vm_cleanup();
1258}
1259
1260static void *
1261radar60691363_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1262{
1263    setup_long_mode(vcpu);
1264
1265    uint64_t proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
1266	set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(CPU_BASED2_VMCS_SHADOW, proc2_cap));
1267
1268	T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_GUEST_ES,
1269			HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1270		"enable VMCS_GUEST_ES shadow access");
1271	T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_GUEST_ES, 0x1234), HV_SUCCESS,
1272		"set VMCS_GUEST_ES in shadow");
1273
1274	T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_RO_EXIT_QUALIFIC,
1275			HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1276		"enable VMCS_RO_EXIT_QUALIFIC shadow access");
1277	T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC, 0x111), HV_SUCCESS,
1278		"set VMCS_RO_EXIT_QUALIFIC in shadow");
1279
1280	T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_RO_IO_RCX,
1281			HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1282		"enable VMCS_RO_IO_RCX shadow access");
1283	T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_RO_IO_RCX, 0x2323), HV_SUCCESS,
1284		"set VMCS_RO_IO_RCX in shadow");
1285
1286    expect_vmcall_with_value(vcpu, 0x1234, true);
1287	expect_vmcall_with_value(vcpu, 0x111, true);
1288	expect_vmcall_with_value(vcpu, 0x2323, true);
1289
1290	expect_vmcall_with_value(vcpu, 0x4567, true);
1291
1292	uint64_t value;
1293	T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_GUEST_ES, &value), HV_SUCCESS,
1294		"read updated VMCS_GUEST_ES in shadow");
1295	T_ASSERT_EQ(value, 0x9191LL, "VMCS_GUEST_ES value is updated");
1296	T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC, &value), HV_SUCCESS,
1297		"read updated VMCS_RO_EXIT_QUALIFIC in shadow");
1298	T_ASSERT_EQ(value, 0x9898LL, "VMCS_RO_EXIT_QUALIFIC value is updated");
1299	T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_RO_IO_RCX, &value), HV_SUCCESS,
1300		"read updated VMCS_RO_IO_RCX in shadow");
1301	T_ASSERT_EQ(value, 0x7979LL, "VMCS_RO_IO_RCX value is updated");
1302
1303	// This must not work.
1304	T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_CTRL_EPTP,
1305			HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1306		"enable VMCS_CTRL_EPTP shadow access");
1307	T_ASSERT_EQ(hv_vmx_vcpu_read_vmcs(vcpu, VMCS_CTRL_EPTP, &value), HV_BAD_ARGUMENT,
1308		"accessing EPTP in ordinary VMCS fails");
1309
1310    return NULL;
1311}
1312
1313T_DECL_HV(radar60691363, "rdar://60691363 (SEED: Web: Allow shadowing of read only VMCS fields)")
1314{
1315	vm_setup();
1316
1317	uint64_t proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
1318
1319	if (!(proc2_cap & ((uint64_t)CPU_BASED2_VMCS_SHADOW << 32))) {
1320		T_SKIP("Device does not support shadow VMCS, skipping.");
1321	}
1322
1323	pthread_t vcpu_thread = create_vcpu_thread(radar60691363_entry, 0x10000, radar60691363_monitor, 0);
1324	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1325
1326	vm_cleanup();
1327}
1328
1329T_DECL_HV(radar63641279, "rdar://63641279 (Evaluate \"no SMT\" scheduling option/sidechannel security mitigation for Hypervisor.framework VMs)",
1330    T_META_OWNER("mphalan"))
1331{
1332	const uint64_t ALL_MITIGATIONS =
1333	    HV_VM_MITIGATION_A_ENABLE |
1334	    HV_VM_MITIGATION_B_ENABLE |
1335	    HV_VM_MITIGATION_C_ENABLE |
1336	    HV_VM_MITIGATION_D_ENABLE |
1337	    HV_VM_MITIGATION_E_ENABLE; // NO_SMT
1338
1339	T_SETUPBEGIN;
1340
1341	if (hv_support() < 1) {
1342		T_SKIP("Running on non-HV target, skipping...");
1343		return;
1344	}
1345
1346	create_vm(HV_VM_SPECIFY_MITIGATIONS | ALL_MITIGATIONS);
1347
1348	T_SETUPEND;
1349
1350	pthread_t vcpu_thread = create_vcpu_thread(
1351	    (vcpu_entry_function) (((uintptr_t)simple_real_mode_vcpu_entry & PAGE_MASK) + 0x1000),
1352	    0, simple_real_mode_monitor, 0);
1353	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1354
1355	vm_cleanup();
1356}
1357
1358// Get the number of  messages waiting for the specified port
1359static int
1360get_count(mach_port_t port)
1361{
1362	int count;
1363
1364	count = 0;
1365	while (true) {
1366		hv_ion_message_t msg = {
1367			.header.msgh_size = sizeof (msg),
1368			.header.msgh_local_port = port,
1369		};
1370
1371		kern_return_t ret = mach_msg(&msg.header, MACH_RCV_MSG | MACH_RCV_TIMEOUT,
1372		    0, sizeof (msg), port, 0, MACH_PORT_NULL);
1373
1374		if (ret != MACH_MSG_SUCCESS) {
1375			break;
1376		}
1377
1378		T_QUIET; T_ASSERT_TRUE(msg.addr == 0xab || msg.addr == 0xcd || msg.addr == 0xef,
1379		    "address is 0xab, 0xcd or 0xef");
1380		T_QUIET; T_ASSERT_EQ(msg.value, 0xaaULL, "value written is 0xaa");
1381		T_QUIET; T_ASSERT_TRUE(msg.size == 1 || msg.size == 4, "size is 1 or 4");
1382
1383		count++;
1384	}
1385
1386	return count;
1387}
1388
1389static void *
1390pio_monitor(void *arg, hv_vcpuid_t vcpu)
1391{
1392
1393	size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
1394	const size_t mem_size = 1 * 1024 * 1024;
1395	uint8_t *guest_pages_shadow = valloc(mem_size);
1396	int handle_io_count = 0;
1397	uint64_t exit_reason = 0;
1398
1399	setup_real_mode(vcpu);
1400
1401	bzero(guest_pages_shadow, mem_size);
1402	memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
1403
1404	T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x0, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC), HV_SUCCESS,
1405	    "map guest memory");
1406
1407	while (true) {
1408		run_vcpu(vcpu);
1409		exit_reason = get_vmcs(vcpu, VMCS_RO_EXIT_REASON);
1410
1411		if (exit_reason == VMX_REASON_VMCALL) {
1412			break;
1413		}
1414
1415		if (exit_reason == VMX_REASON_IRQ) {
1416			continue;
1417		}
1418
1419		if (exit_reason == VMX_REASON_EPT_VIOLATION && !hv_use_run_until) {
1420			continue;
1421		}
1422
1423		T_QUIET; T_ASSERT_EQ(exit_reason, (uint64_t)VMX_REASON_IO, "exit reason is IO");
1424
1425		union {
1426			struct {
1427				uint64_t io_size:3;
1428				uint64_t io_dirn:1;
1429				uint64_t io_string:1;
1430				uint64_t io_rep:1;
1431				uint64_t io_encoding:1;
1432				uint64_t __io_resvd0:9;
1433				uint64_t io_port:16;
1434				uint64_t __io_resvd1:32;
1435			} io;
1436			uint64_t reg64;
1437		} info = {
1438			.reg64 = get_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC),
1439		};
1440
1441		T_QUIET; T_ASSERT_EQ(info.io.io_port, 0xefULL, "exit is a port IO on 0xef");
1442
1443		handle_io_count++;
1444
1445		set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP) + get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
1446	}
1447
1448	free(guest_pages_shadow);
1449
1450	*((int *)arg) = handle_io_count;
1451
1452	return NULL;
1453}
1454
1455T_DECL_HV(pio_notifier_arguments, "test adding and removing port IO notifiers", T_META_OWNER("mphalan"))
1456{
1457	mach_port_t notify_port = MACH_PORT_NULL;
1458	kern_return_t kret = KERN_FAILURE;
1459	hv_return_t hret = HV_ERROR;
1460
1461	T_SETUPBEGIN;
1462
1463	/* Setup notification port. */
1464	kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
1465	    &notify_port);
1466	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
1467
1468	kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
1469	   MACH_MSG_TYPE_MAKE_SEND);
1470	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
1471
1472	/* Setup VM */
1473	vm_setup();
1474
1475	T_SETUPEND;
1476
1477	/* Add with bad size. */
1478	hret = hv_vm_add_pio_notifier(0xab, 7, 1, notify_port, HV_ION_NONE);
1479	T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad size");
1480
1481	/* Add with bad data. */
1482	hret = hv_vm_add_pio_notifier(0xab, 1, UINT16_MAX, notify_port, HV_ION_NONE);
1483	T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad data");
1484
1485	/* Add with bad mach port. */
1486	hret = hv_vm_add_pio_notifier(0xab, 1, UINT16_MAX, MACH_PORT_NULL, HV_ION_NONE);
1487	T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad port");
1488
1489	/* Add with bad flags. */
1490	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, 0xffff);
1491	T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad flags");
1492
1493	/* Remove when none are installed. */
1494	hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1495	T_ASSERT_NE(hret, HV_SUCCESS, "removing a non-existent notifier");
1496
1497	/* Add duplicate. */
1498	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1499	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier");
1500	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1501	T_ASSERT_NE(hret, HV_SUCCESS, "adding duplicate notifier");
1502	hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1503	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier");
1504
1505	/* Add then remove. */
1506	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1507	T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier");
1508	hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1509	T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier");
1510
1511	/* Add two, remove in reverse order. */
1512	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1513	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding 1st notifier");
1514	hret = hv_vm_add_pio_notifier(0xab, 2, 1, notify_port, HV_ION_NONE);
1515	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding 2nd notifier");
1516	hret = hv_vm_remove_pio_notifier(0xab, 2, 1, notify_port, HV_ION_NONE);
1517	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing 2nd notifier");
1518	hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
1519	T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier in reverse order");
1520
1521	/* Add with ANY_SIZE and remove. */
1522	hret = hv_vm_add_pio_notifier(0xab, 0, 1, notify_port, HV_ION_ANY_SIZE);
1523	T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier with ANY_SIZE");
1524	hret = hv_vm_remove_pio_notifier(0xab, 0, 1, notify_port, HV_ION_ANY_SIZE);
1525	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier with ANY_SIZE");
1526
1527	/* Add with ANY_VALUE and remove. */
1528	hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_ANY_VALUE);
1529	T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier with ANY_VALUE");
1530	hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_ANY_VALUE);
1531	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier with ANY_VALUE");
1532
1533	vm_cleanup();
1534
1535	mach_port_mod_refs(mach_task_self(), notify_port, MACH_PORT_RIGHT_RECEIVE, -1);
1536}
1537
1538T_DECL_HV(pio_notifier_bad_port, "test port IO notifiers when the port is destroyed/deallocated/has no receive right",
1539    T_META_OWNER("mphalan"))
1540{
1541	pthread_t vcpu_thread;
1542	mach_port_t notify_port = MACH_PORT_NULL;
1543	int handle_io_count = 0;
1544	kern_return_t kret = KERN_FAILURE;
1545	hv_return_t hret = HV_ERROR;
1546
1547	/* Setup VM */
1548	vm_setup();
1549
1550	/*
1551	 * Test that nothing bad happens when the notification port is
1552	 * added and mach_port_destroy() is called.
1553	 */
1554
1555	/* Add a notification port. */
1556	kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
1557	    &notify_port);
1558	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
1559
1560	/* Insert send right. */
1561	kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
1562	   MACH_MSG_TYPE_MAKE_SEND);
1563	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
1564
1565	/* All port writes to 0xef. */
1566	hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
1567	    HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
1568	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1569	    "to port 0xef");
1570
1571	/* After adding, destroy the port. */
1572	kret = mach_port_destroy(mach_task_self(), notify_port);
1573	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "destroying notify port");
1574
1575	vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1576	    (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
1577	    &handle_io_count);
1578	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1579
1580	/* Expect the messages to be lost. */
1581	T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when port destroyed");
1582
1583	hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1584	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
1585
1586	vm_cleanup();
1587
1588
1589	vm_setup();
1590	/*
1591	 * Test that nothing bad happens when the notification port is added and
1592	 * mach_port_mod_refs() is called.
1593	 */
1594
1595	/* Add a notification port. */
1596	kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
1597	    &notify_port);
1598	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
1599
1600	/* Insert send right. */
1601	kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
1602	   MACH_MSG_TYPE_MAKE_SEND);
1603	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
1604
1605	/* All port writes to 0xef. */
1606	hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
1607	    HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
1608	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1609	    "to port 0xef");
1610
1611	/* After adding, remove receive right. */
1612	mach_port_mod_refs(mach_task_self(), notify_port, MACH_PORT_RIGHT_RECEIVE, -1);
1613	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "removing receive right");
1614
1615	vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1616	    (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
1617	    &handle_io_count);
1618	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1619
1620	/* Expect messages to be lost. */
1621	T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when receive right removed");
1622
1623	hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1624	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
1625
1626	vm_cleanup();
1627
1628
1629	vm_setup();
1630	/*
1631	 * Test that nothing bad happens when the notification port is added and
1632	 * mach_port_deallocate() is called.
1633	 */
1634
1635	/* Add a notification port. */
1636	kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
1637	    &notify_port);
1638	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
1639
1640	/* Insert send right. */
1641	kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
1642	   MACH_MSG_TYPE_MAKE_SEND);
1643	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
1644
1645	/* All port writes to 0xef. */
1646	hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
1647	    HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
1648	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1649	    "to port 0xef");
1650
1651	/* After adding, call mach_port_deallocate(). */
1652	kret = mach_port_deallocate(mach_task_self(), notify_port);
1653	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "destroying notify port");
1654
1655	vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1656	    (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
1657	    &handle_io_count);
1658	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1659
1660	/* Expect messages to be lost. */
1661	T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when port deallocated");
1662
1663	hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1664	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
1665
1666	vm_cleanup();
1667}
1668
1669T_DECL_HV(pio_notifier, "test port IO notifiers", T_META_OWNER("mphalan"))
1670{
1671	#define MACH_PORT_COUNT 4
1672	mach_port_t notify_port[MACH_PORT_COUNT] = { MACH_PORT_NULL };
1673	int handle_io_count = 0;
1674	kern_return_t kret = KERN_FAILURE;
1675	hv_return_t hret = HV_ERROR;
1676
1677	T_SETUPBEGIN;
1678
1679	/* Setup notification ports. */
1680	for (int i = 0; i  < MACH_PORT_COUNT; i++) {
1681		kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
1682		    &notify_port[i]);
1683		T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
1684
1685		kret = mach_port_insert_right(mach_task_self(), notify_port[i], notify_port[i],
1686		   MACH_MSG_TYPE_MAKE_SEND);
1687		T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
1688	}
1689	/* Setup VM */
1690	vm_setup();
1691
1692	T_SETUPEND;
1693
1694	/* Test that messages are properly sent to mach port notifiers. */
1695
1696	/* One for all port writes to 0xab. */
1697	hret = hv_vm_add_pio_notifier(0xab, 0, 0, notify_port[0],
1698	    HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
1699	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1700	    "to port 0xab");
1701
1702	/* One for for 4 byte writes of 0xaa. */
1703	hret = hv_vm_add_pio_notifier(0xab, 4, 0xaa, notify_port[1], HV_ION_NONE);
1704	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for 4 byte writes "
1705	    "to port 0xab");
1706
1707	/* One for all writes to 0xcd (ignoring queue full errors). */
1708	hret = hv_vm_add_pio_notifier(0xcd, 0, 0, notify_port[2],
1709	    HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1710	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1711	    "to port 0xcd, ignoring if the queue fills");
1712
1713	/* One for writes to 0xef asking for exits when the queue is full. */
1714	hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port[3],
1715	    HV_ION_ANY_SIZE | HV_ION_ANY_VALUE | HV_ION_EXIT_FULL);
1716	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
1717	    "to port 0xef, not ignoring if the queue fills");
1718
1719	pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
1720	    (((uintptr_t)pio_entry & PAGE_MASK) + 0x1000), 0, pio_monitor,
1721	    &handle_io_count);
1722	T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1723
1724	/* Expect messages to be waiting. */
1725	T_ASSERT_EQ(4, get_count(notify_port[0]), "expected 4 messages");
1726	T_ASSERT_EQ(1, get_count(notify_port[1]), "expected 1 messages");
1727	T_ASSERT_EQ(10, get_count(notify_port[2]) + handle_io_count, "expected IO exits");
1728	T_ASSERT_EQ(5, get_count(notify_port[3]), "expected 5 messages");
1729
1730	hret = hv_vm_remove_pio_notifier(0xab, 0, 0, notify_port[0], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1731	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xab");
1732
1733	hret = hv_vm_remove_pio_notifier(0xab, 4, 0xaa, notify_port[1], HV_ION_NONE);
1734	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for 4 byte writes "
1735	    "to port 0xab");
1736
1737	hret = hv_vm_remove_pio_notifier(0xcd, 0, 0, notify_port[2], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
1738	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes "
1739	    "to port 0xcd, ignoring if the queue fills");
1740
1741	hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port[3], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE | HV_ION_EXIT_FULL);
1742	T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes "
1743	    "to port 0xef, not ignoring if the queue fills");
1744
1745	vm_cleanup();
1746
1747	for (int i = 0; i < MACH_PORT_COUNT; i++) {
1748		mach_port_mod_refs(mach_task_self(), notify_port[i], MACH_PORT_RIGHT_RECEIVE, -1);
1749	}
1750}
1751