xref: /xnu-10002.61.3/tests/avx.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1*0f4c859eSApple OSS Distributions #ifdef T_NAMESPACE
2*0f4c859eSApple OSS Distributions #undef T_NAMESPACE
3*0f4c859eSApple OSS Distributions #endif
4*0f4c859eSApple OSS Distributions 
5*0f4c859eSApple OSS Distributions #include <darwintest.h>
6*0f4c859eSApple OSS Distributions #include <unistd.h>
7*0f4c859eSApple OSS Distributions #include <signal.h>
8*0f4c859eSApple OSS Distributions #include <sys/time.h>
9*0f4c859eSApple OSS Distributions #include <sys/mman.h>
10*0f4c859eSApple OSS Distributions #include <immintrin.h>
11*0f4c859eSApple OSS Distributions #include <mach/mach.h>
12*0f4c859eSApple OSS Distributions #include <stdio.h>
13*0f4c859eSApple OSS Distributions #include <string.h>
14*0f4c859eSApple OSS Distributions #include <err.h>
15*0f4c859eSApple OSS Distributions #include <i386/cpu_capabilities.h>
16*0f4c859eSApple OSS Distributions 
17*0f4c859eSApple OSS Distributions T_GLOBAL_META(
18*0f4c859eSApple OSS Distributions 	T_META_NAMESPACE("xnu.intel"),
19*0f4c859eSApple OSS Distributions 	T_META_CHECK_LEAKS(false),
20*0f4c859eSApple OSS Distributions 	T_META_RADAR_COMPONENT_NAME("xnu"),
21*0f4c859eSApple OSS Distributions 	T_META_RADAR_COMPONENT_VERSION("intel"),
22*0f4c859eSApple OSS Distributions 	T_META_OWNER("seth_goldberg"),
23*0f4c859eSApple OSS Distributions 	T_META_RUN_CONCURRENTLY(true)
24*0f4c859eSApple OSS Distributions 	);
25*0f4c859eSApple OSS Distributions 
26*0f4c859eSApple OSS Distributions #define QUICK_RUN_TIME   (2)
27*0f4c859eSApple OSS Distributions #define NORMAL_RUN_TIME  (10)
28*0f4c859eSApple OSS Distributions #define LONG_RUN_TIME    (10*60)
29*0f4c859eSApple OSS Distributions #define TIMEOUT_OVERHEAD (10)
30*0f4c859eSApple OSS Distributions 
31*0f4c859eSApple OSS Distributions volatile boolean_t checking = true;
32*0f4c859eSApple OSS Distributions char vec_str_buf[8196];
33*0f4c859eSApple OSS Distributions char karray_str_buf[1024];
34*0f4c859eSApple OSS Distributions 
35*0f4c859eSApple OSS Distributions /*
36*0f4c859eSApple OSS Distributions  * ymm defines/globals/prototypes
37*0f4c859eSApple OSS Distributions  */
38*0f4c859eSApple OSS Distributions #define STOP_COOKIE_256 0x01234567
39*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
40*0f4c859eSApple OSS Distributions #define YMM_MAX                 16
41*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_T         x86_avx_state64_t
42*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_COUNT     x86_AVX_STATE64_COUNT
43*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_FLAVOR    x86_AVX_STATE64
44*0f4c859eSApple OSS Distributions #define MCONTEXT_SIZE_256       sizeof(struct __darwin_mcontext_avx64)
45*0f4c859eSApple OSS Distributions #else
46*0f4c859eSApple OSS Distributions #define YMM_MAX                 8
47*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_T         x86_avx_state32_t
48*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_COUNT     x86_AVX_STATE32_COUNT
49*0f4c859eSApple OSS Distributions #define X86_AVX_STATE_FLAVOR    x86_AVX_STATE32
50*0f4c859eSApple OSS Distributions #define MCONTEXT_SIZE_256       sizeof(struct __darwin_mcontext_avx32)
51*0f4c859eSApple OSS Distributions #endif
52*0f4c859eSApple OSS Distributions #define VECTOR256 __m256
53*0f4c859eSApple OSS Distributions #define VEC256ALIGN __attribute ((aligned(32)))
54*0f4c859eSApple OSS Distributions static inline void populate_ymm(void);
55*0f4c859eSApple OSS Distributions static inline void check_ymm(void);
56*0f4c859eSApple OSS Distributions VECTOR256       vec256array0[YMM_MAX] VEC256ALIGN;
57*0f4c859eSApple OSS Distributions VECTOR256       vec256array1[YMM_MAX] VEC256ALIGN;
58*0f4c859eSApple OSS Distributions VECTOR256       vec256array2[YMM_MAX] VEC256ALIGN;
59*0f4c859eSApple OSS Distributions VECTOR256       vec256array3[YMM_MAX] VEC256ALIGN;
60*0f4c859eSApple OSS Distributions 
61*0f4c859eSApple OSS Distributions /*
62*0f4c859eSApple OSS Distributions  * zmm defines/globals/prototypes
63*0f4c859eSApple OSS Distributions  */
64*0f4c859eSApple OSS Distributions #define STOP_COOKIE_512 0x0123456789abcdefULL
65*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
66*0f4c859eSApple OSS Distributions #define ZMM_MAX                 32
67*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_T      x86_avx512_state64_t
68*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_COUNT  x86_AVX512_STATE64_COUNT
69*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
70*0f4c859eSApple OSS Distributions #define MCONTEXT_SIZE_512       sizeof(struct __darwin_mcontext_avx512_64)
71*0f4c859eSApple OSS Distributions #else
72*0f4c859eSApple OSS Distributions #define ZMM_MAX                 8
73*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_T      x86_avx512_state32_t
74*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_COUNT  x86_AVX512_STATE32_COUNT
75*0f4c859eSApple OSS Distributions #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
76*0f4c859eSApple OSS Distributions #define MCONTEXT_SIZE_512       sizeof(struct __darwin_mcontext_avx512_32)
77*0f4c859eSApple OSS Distributions #endif
78*0f4c859eSApple OSS Distributions #define VECTOR512 __m512
79*0f4c859eSApple OSS Distributions #define VEC512ALIGN __attribute ((aligned(64)))
80*0f4c859eSApple OSS Distributions #define OPMASK uint64_t
81*0f4c859eSApple OSS Distributions #define KARRAY_MAX              8
82*0f4c859eSApple OSS Distributions static inline void zero_zmm(void);
83*0f4c859eSApple OSS Distributions static inline void zero_opmask(void);
84*0f4c859eSApple OSS Distributions static inline void populate_zmm(void);
85*0f4c859eSApple OSS Distributions static inline void populate_opmask(void);
86*0f4c859eSApple OSS Distributions static inline void check_zmm(boolean_t check_cookie);
87*0f4c859eSApple OSS Distributions VECTOR512       vec512array0[ZMM_MAX] VEC512ALIGN;
88*0f4c859eSApple OSS Distributions VECTOR512       vec512array1[ZMM_MAX] VEC512ALIGN;
89*0f4c859eSApple OSS Distributions VECTOR512       vec512array2[ZMM_MAX] VEC512ALIGN;
90*0f4c859eSApple OSS Distributions VECTOR512       vec512array3[ZMM_MAX] VEC512ALIGN;
91*0f4c859eSApple OSS Distributions OPMASK karray0[8];
92*0f4c859eSApple OSS Distributions OPMASK karray1[8];
93*0f4c859eSApple OSS Distributions OPMASK karray2[8];
94*0f4c859eSApple OSS Distributions OPMASK karray3[8];
95*0f4c859eSApple OSS Distributions 
96*0f4c859eSApple OSS Distributions kern_return_t _thread_get_state_avx(thread_t thread, int flavor, thread_state_t state,
97*0f4c859eSApple OSS Distributions     mach_msg_type_number_t *state_count);
98*0f4c859eSApple OSS Distributions kern_return_t _thread_get_state_avx512(thread_t thread, int flavor, thread_state_t state,
99*0f4c859eSApple OSS Distributions     mach_msg_type_number_t *state_count);
100*0f4c859eSApple OSS Distributions 
101*0f4c859eSApple OSS Distributions /*
102*0f4c859eSApple OSS Distributions  * Common functions
103*0f4c859eSApple OSS Distributions  */
104*0f4c859eSApple OSS Distributions 
105*0f4c859eSApple OSS Distributions int
memcmp_unoptimized(const void * s1,const void * s2,size_t n)106*0f4c859eSApple OSS Distributions memcmp_unoptimized(const void *s1, const void *s2, size_t n)
107*0f4c859eSApple OSS Distributions {
108*0f4c859eSApple OSS Distributions 	if (n != 0) {
109*0f4c859eSApple OSS Distributions 		const unsigned char *p1 = s1, *p2 = s2;
110*0f4c859eSApple OSS Distributions 		do {
111*0f4c859eSApple OSS Distributions 			if (*p1++ != *p2++) {
112*0f4c859eSApple OSS Distributions 				return *--p1 - *--p2;
113*0f4c859eSApple OSS Distributions 			}
114*0f4c859eSApple OSS Distributions 		} while (--n != 0);
115*0f4c859eSApple OSS Distributions 	}
116*0f4c859eSApple OSS Distributions 	return 0;
117*0f4c859eSApple OSS Distributions }
118*0f4c859eSApple OSS Distributions 
119*0f4c859eSApple OSS Distributions void
start_timer(int seconds,void (* handler)(int,siginfo_t *,void *))120*0f4c859eSApple OSS Distributions start_timer(int seconds, void (*handler)(int, siginfo_t *, void *))
121*0f4c859eSApple OSS Distributions {
122*0f4c859eSApple OSS Distributions 	struct sigaction sigalrm_action = {
123*0f4c859eSApple OSS Distributions 		.sa_sigaction = handler,
124*0f4c859eSApple OSS Distributions 		.sa_flags = SA_RESTART,
125*0f4c859eSApple OSS Distributions 		.sa_mask = 0
126*0f4c859eSApple OSS Distributions 	};
127*0f4c859eSApple OSS Distributions 	struct itimerval timer = {
128*0f4c859eSApple OSS Distributions 		.it_value.tv_sec = seconds,
129*0f4c859eSApple OSS Distributions 		.it_value.tv_usec = 0,
130*0f4c859eSApple OSS Distributions 		.it_interval.tv_sec = 0,
131*0f4c859eSApple OSS Distributions 		.it_interval.tv_usec = 0
132*0f4c859eSApple OSS Distributions 	};
133*0f4c859eSApple OSS Distributions 	T_QUIET; T_WITH_ERRNO;
134*0f4c859eSApple OSS Distributions 	T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL);
135*0f4c859eSApple OSS Distributions 	T_QUIET; T_WITH_ERRNO;
136*0f4c859eSApple OSS Distributions 	T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL);
137*0f4c859eSApple OSS Distributions }
138*0f4c859eSApple OSS Distributions 
139*0f4c859eSApple OSS Distributions void
require_avx(void)140*0f4c859eSApple OSS Distributions require_avx(void)
141*0f4c859eSApple OSS Distributions {
142*0f4c859eSApple OSS Distributions 	if ((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) {
143*0f4c859eSApple OSS Distributions 		T_SKIP("AVX not supported on this system");
144*0f4c859eSApple OSS Distributions 	}
145*0f4c859eSApple OSS Distributions }
146*0f4c859eSApple OSS Distributions 
147*0f4c859eSApple OSS Distributions void
require_avx512(void)148*0f4c859eSApple OSS Distributions require_avx512(void)
149*0f4c859eSApple OSS Distributions {
150*0f4c859eSApple OSS Distributions 	if ((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) {
151*0f4c859eSApple OSS Distributions 		T_SKIP("AVX-512 not supported on this system");
152*0f4c859eSApple OSS Distributions 	}
153*0f4c859eSApple OSS Distributions }
154*0f4c859eSApple OSS Distributions 
155*0f4c859eSApple OSS Distributions /*
156*0f4c859eSApple OSS Distributions  * ymm functions
157*0f4c859eSApple OSS Distributions  */
158*0f4c859eSApple OSS Distributions 
159*0f4c859eSApple OSS Distributions static inline void
store_ymm(VECTOR256 * vec256array)160*0f4c859eSApple OSS Distributions store_ymm(VECTOR256 *vec256array)
161*0f4c859eSApple OSS Distributions {
162*0f4c859eSApple OSS Distributions 	int i = 0;
163*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %%ymm0, %0" :"=m" (vec256array[i]));
164*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm1, %0" :"=m" (vec256array[i]));
165*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm2, %0" :"=m" (vec256array[i]));
166*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm3, %0" :"=m" (vec256array[i]));
167*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm4, %0" :"=m" (vec256array[i]));
168*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm5, %0" :"=m" (vec256array[i]));
169*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm6, %0" :"=m" (vec256array[i]));
170*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm7, %0" :"=m" (vec256array[i]));
171*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
172*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm8, %0" :"=m" (vec256array[i]));
173*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm9, %0" :"=m" (vec256array[i]));
174*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm10, %0" :"=m" (vec256array[i]));
175*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm11, %0" :"=m" (vec256array[i]));
176*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm12, %0" :"=m" (vec256array[i]));
177*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm13, %0" :"=m" (vec256array[i]));
178*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm14, %0" :"=m" (vec256array[i]));
179*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%ymm15, %0" :"=m" (vec256array[i]));
180*0f4c859eSApple OSS Distributions #endif
181*0f4c859eSApple OSS Distributions }
182*0f4c859eSApple OSS Distributions 
183*0f4c859eSApple OSS Distributions static inline void
restore_ymm(VECTOR256 * vec256array)184*0f4c859eSApple OSS Distributions restore_ymm(VECTOR256 *vec256array)
185*0f4c859eSApple OSS Distributions {
186*0f4c859eSApple OSS Distributions 	VECTOR256 *p = vec256array;
187*0f4c859eSApple OSS Distributions 
188*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); p++;
189*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); p++;
190*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); p++;
191*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); p++;
192*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); p++;
193*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); p++;
194*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); p++;
195*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
196*0f4c859eSApple OSS Distributions 
197*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
198*0f4c859eSApple OSS Distributions 	++p; __asm__ volatile ("vmovaps  %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); p++;
199*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); p++;
200*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); p++;
201*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); p++;
202*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); p++;
203*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); p++;
204*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); p++;
205*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
206*0f4c859eSApple OSS Distributions #endif
207*0f4c859eSApple OSS Distributions }
208*0f4c859eSApple OSS Distributions 
209*0f4c859eSApple OSS Distributions static inline void
populate_ymm(void)210*0f4c859eSApple OSS Distributions populate_ymm(void)
211*0f4c859eSApple OSS Distributions {
212*0f4c859eSApple OSS Distributions 	int j;
213*0f4c859eSApple OSS Distributions 	uint32_t p[8] VEC256ALIGN;
214*0f4c859eSApple OSS Distributions 
215*0f4c859eSApple OSS Distributions 	for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
216*0f4c859eSApple OSS Distributions 		p[j] = getpid();
217*0f4c859eSApple OSS Distributions 	}
218*0f4c859eSApple OSS Distributions 
219*0f4c859eSApple OSS Distributions 	p[0] = 0x22222222;
220*0f4c859eSApple OSS Distributions 	p[7] = 0x77777777;
221*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0");
222*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1");
223*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2");
224*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3");
225*0f4c859eSApple OSS Distributions 
226*0f4c859eSApple OSS Distributions 	p[0] = 0x44444444;
227*0f4c859eSApple OSS Distributions 	p[7] = 0xEEEEEEEE;
228*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4");
229*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5");
230*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6");
231*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
232*0f4c859eSApple OSS Distributions 
233*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
234*0f4c859eSApple OSS Distributions 	p[0] = 0x88888888;
235*0f4c859eSApple OSS Distributions 	p[7] = 0xAAAAAAAA;
236*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8");
237*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9");
238*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10");
239*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11");
240*0f4c859eSApple OSS Distributions 
241*0f4c859eSApple OSS Distributions 	p[0] = 0xBBBBBBBB;
242*0f4c859eSApple OSS Distributions 	p[7] = 0xCCCCCCCC;
243*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12");
244*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13");
245*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14");
246*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
247*0f4c859eSApple OSS Distributions #endif
248*0f4c859eSApple OSS Distributions 
249*0f4c859eSApple OSS Distributions 	store_ymm(vec256array0);
250*0f4c859eSApple OSS Distributions }
251*0f4c859eSApple OSS Distributions 
252*0f4c859eSApple OSS Distributions void
vec256_to_string(VECTOR256 * vec,char * buf)253*0f4c859eSApple OSS Distributions vec256_to_string(VECTOR256 *vec, char *buf)
254*0f4c859eSApple OSS Distributions {
255*0f4c859eSApple OSS Distributions 	unsigned int vec_idx = 0;
256*0f4c859eSApple OSS Distributions 	unsigned int buf_idx = 0;
257*0f4c859eSApple OSS Distributions 	int ret = 0;
258*0f4c859eSApple OSS Distributions 
259*0f4c859eSApple OSS Distributions 	for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) {
260*0f4c859eSApple OSS Distributions 		uint64_t a[4];
261*0f4c859eSApple OSS Distributions 		bcopy(&vec[vec_idx], &a[0], sizeof(a));
262*0f4c859eSApple OSS Distributions 		ret = sprintf(
263*0f4c859eSApple OSS Distributions 			buf + buf_idx,
264*0f4c859eSApple OSS Distributions 			"0x%016llx:%016llx:%016llx:%016llx\n",
265*0f4c859eSApple OSS Distributions 			a[0], a[1], a[2], a[3]
266*0f4c859eSApple OSS Distributions 			);
267*0f4c859eSApple OSS Distributions 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
268*0f4c859eSApple OSS Distributions 		buf_idx += ret;
269*0f4c859eSApple OSS Distributions 	}
270*0f4c859eSApple OSS Distributions }
271*0f4c859eSApple OSS Distributions 
272*0f4c859eSApple OSS Distributions void
assert_ymm_eq(void * a,void * b,int c)273*0f4c859eSApple OSS Distributions assert_ymm_eq(void *a, void *b, int c)
274*0f4c859eSApple OSS Distributions {
275*0f4c859eSApple OSS Distributions 	if (memcmp_unoptimized(a, b, c)) {
276*0f4c859eSApple OSS Distributions 		vec256_to_string(a, vec_str_buf);
277*0f4c859eSApple OSS Distributions 		T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
278*0f4c859eSApple OSS Distributions 		vec256_to_string(b, vec_str_buf);
279*0f4c859eSApple OSS Distributions 		T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
280*0f4c859eSApple OSS Distributions 		T_ASSERT_FAIL("vectors not equal");
281*0f4c859eSApple OSS Distributions 	}
282*0f4c859eSApple OSS Distributions }
283*0f4c859eSApple OSS Distributions 
284*0f4c859eSApple OSS Distributions void
check_ymm(void)285*0f4c859eSApple OSS Distributions check_ymm(void)
286*0f4c859eSApple OSS Distributions {
287*0f4c859eSApple OSS Distributions 	uint32_t *p = (uint32_t *) &vec256array1[7];
288*0f4c859eSApple OSS Distributions 	store_ymm(vec256array1);
289*0f4c859eSApple OSS Distributions 	if (p[0] == STOP_COOKIE_256) {
290*0f4c859eSApple OSS Distributions 		return;
291*0f4c859eSApple OSS Distributions 	}
292*0f4c859eSApple OSS Distributions 	assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0));
293*0f4c859eSApple OSS Distributions }
294*0f4c859eSApple OSS Distributions 
295*0f4c859eSApple OSS Distributions static void
copy_ymm_state_to_vector(X86_AVX_STATE_T * sp,VECTOR256 * vp)296*0f4c859eSApple OSS Distributions copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp)
297*0f4c859eSApple OSS Distributions {
298*0f4c859eSApple OSS Distributions 	int     i;
299*0f4c859eSApple OSS Distributions 	struct  __darwin_xmm_reg *xmm  = &sp->__fpu_xmm0;
300*0f4c859eSApple OSS Distributions 	struct  __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
301*0f4c859eSApple OSS Distributions 
302*0f4c859eSApple OSS Distributions 	for (i = 0; i < YMM_MAX; i++) {
303*0f4c859eSApple OSS Distributions 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
304*0f4c859eSApple OSS Distributions 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
305*0f4c859eSApple OSS Distributions 	}
306*0f4c859eSApple OSS Distributions }
307*0f4c859eSApple OSS Distributions 
308*0f4c859eSApple OSS Distributions static void
ymm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)309*0f4c859eSApple OSS Distributions ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
310*0f4c859eSApple OSS Distributions {
311*0f4c859eSApple OSS Distributions 	ucontext_t *contextp = (ucontext_t *) ctx;
312*0f4c859eSApple OSS Distributions 	mcontext_t mcontext = contextp->uc_mcontext;
313*0f4c859eSApple OSS Distributions 	X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs;
314*0f4c859eSApple OSS Distributions 	uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7;
315*0f4c859eSApple OSS Distributions 	uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7;
316*0f4c859eSApple OSS Distributions 
317*0f4c859eSApple OSS Distributions 	T_LOG("Got SIGALRM");
318*0f4c859eSApple OSS Distributions 
319*0f4c859eSApple OSS Distributions 	/* Check for AVX state */
320*0f4c859eSApple OSS Distributions 	T_QUIET;
321*0f4c859eSApple OSS Distributions 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size");
322*0f4c859eSApple OSS Distributions 
323*0f4c859eSApple OSS Distributions 	/* Check that the state in the context is what's set and expected */
324*0f4c859eSApple OSS Distributions 	copy_ymm_state_to_vector(avx_state, vec256array3);
325*0f4c859eSApple OSS Distributions 	assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1));
326*0f4c859eSApple OSS Distributions 
327*0f4c859eSApple OSS Distributions 	/* Change the context and break the main loop */
328*0f4c859eSApple OSS Distributions 	xp[0] = STOP_COOKIE_256;
329*0f4c859eSApple OSS Distributions 	yp[0] = STOP_COOKIE_256;
330*0f4c859eSApple OSS Distributions 	checking = FALSE;
331*0f4c859eSApple OSS Distributions }
332*0f4c859eSApple OSS Distributions 
333*0f4c859eSApple OSS Distributions kern_return_t
_thread_get_state_avx(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)334*0f4c859eSApple OSS Distributions _thread_get_state_avx(
335*0f4c859eSApple OSS Distributions 	thread_t                thread,
336*0f4c859eSApple OSS Distributions 	int                     flavor,
337*0f4c859eSApple OSS Distributions 	thread_state_t          state,          /* pointer to OUT array */
338*0f4c859eSApple OSS Distributions 	mach_msg_type_number_t  *state_count)   /*IN/OUT*/
339*0f4c859eSApple OSS Distributions {
340*0f4c859eSApple OSS Distributions 	kern_return_t rv;
341*0f4c859eSApple OSS Distributions 	VECTOR256 ymms[YMM_MAX];
342*0f4c859eSApple OSS Distributions 
343*0f4c859eSApple OSS Distributions 	/*
344*0f4c859eSApple OSS Distributions 	 * We must save and restore the YMMs across thread_get_state() because
345*0f4c859eSApple OSS Distributions 	 * code in thread_get_state changes at least one xmm register AFTER the
346*0f4c859eSApple OSS Distributions 	 * thread_get_state has saved the state in userspace.  While it's still
347*0f4c859eSApple OSS Distributions 	 * possible for something to muck with %xmms BEFORE making the mach
348*0f4c859eSApple OSS Distributions 	 * system call (and rendering this save/restore useless), that does not
349*0f4c859eSApple OSS Distributions 	 * currently occur, and since we depend on the avx state saved in the
350*0f4c859eSApple OSS Distributions 	 * thread_get_state to be the same as that manually copied from YMMs after
351*0f4c859eSApple OSS Distributions 	 * thread_get_state returns, we have to go through these machinations.
352*0f4c859eSApple OSS Distributions 	 */
353*0f4c859eSApple OSS Distributions 	store_ymm(ymms);
354*0f4c859eSApple OSS Distributions 
355*0f4c859eSApple OSS Distributions 	rv = thread_get_state(thread, flavor, state, state_count);
356*0f4c859eSApple OSS Distributions 
357*0f4c859eSApple OSS Distributions 	restore_ymm(ymms);
358*0f4c859eSApple OSS Distributions 
359*0f4c859eSApple OSS Distributions 	return rv;
360*0f4c859eSApple OSS Distributions }
361*0f4c859eSApple OSS Distributions 
362*0f4c859eSApple OSS Distributions void
ymm_integrity(int time)363*0f4c859eSApple OSS Distributions ymm_integrity(int time)
364*0f4c859eSApple OSS Distributions {
365*0f4c859eSApple OSS Distributions 	mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT;
366*0f4c859eSApple OSS Distributions 	kern_return_t kret;
367*0f4c859eSApple OSS Distributions 	X86_AVX_STATE_T avx_state, avx_state2;
368*0f4c859eSApple OSS Distributions 	mach_port_t ts = mach_thread_self();
369*0f4c859eSApple OSS Distributions 
370*0f4c859eSApple OSS Distributions 	bzero(&avx_state, sizeof(avx_state));
371*0f4c859eSApple OSS Distributions 	bzero(&avx_state2, sizeof(avx_state));
372*0f4c859eSApple OSS Distributions 
373*0f4c859eSApple OSS Distributions 	kret = _thread_get_state_avx(
374*0f4c859eSApple OSS Distributions 		ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
375*0f4c859eSApple OSS Distributions 		);
376*0f4c859eSApple OSS Distributions 
377*0f4c859eSApple OSS Distributions 	store_ymm(vec256array2);
378*0f4c859eSApple OSS Distributions 
379*0f4c859eSApple OSS Distributions 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
380*0f4c859eSApple OSS Distributions 	vec256_to_string(vec256array2, vec_str_buf);
381*0f4c859eSApple OSS Distributions 	T_LOG("Initial state:\n%s", vec_str_buf);
382*0f4c859eSApple OSS Distributions 
383*0f4c859eSApple OSS Distributions 	copy_ymm_state_to_vector(&avx_state, vec256array1);
384*0f4c859eSApple OSS Distributions 	assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1));
385*0f4c859eSApple OSS Distributions 
386*0f4c859eSApple OSS Distributions 	populate_ymm();
387*0f4c859eSApple OSS Distributions 
388*0f4c859eSApple OSS Distributions 	kret = _thread_get_state_avx(
389*0f4c859eSApple OSS Distributions 		ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
390*0f4c859eSApple OSS Distributions 		);
391*0f4c859eSApple OSS Distributions 
392*0f4c859eSApple OSS Distributions 	store_ymm(vec256array2);
393*0f4c859eSApple OSS Distributions 
394*0f4c859eSApple OSS Distributions 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
395*0f4c859eSApple OSS Distributions 	vec256_to_string(vec256array2, vec_str_buf);
396*0f4c859eSApple OSS Distributions 	T_LOG("Populated state:\n%s", vec_str_buf);
397*0f4c859eSApple OSS Distributions 
398*0f4c859eSApple OSS Distributions 	copy_ymm_state_to_vector(&avx_state2, vec256array1);
399*0f4c859eSApple OSS Distributions 	assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0));
400*0f4c859eSApple OSS Distributions 
401*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
402*0f4c859eSApple OSS Distributions 	start_timer(time, ymm_sigalrm_handler);
403*0f4c859eSApple OSS Distributions 
404*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
405*0f4c859eSApple OSS Distributions 	populate_ymm();
406*0f4c859eSApple OSS Distributions 
407*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
408*0f4c859eSApple OSS Distributions 	while (checking) {
409*0f4c859eSApple OSS Distributions 		check_ymm();
410*0f4c859eSApple OSS Distributions 	}
411*0f4c859eSApple OSS Distributions 
412*0f4c859eSApple OSS Distributions 	/* Check that the sig handler changed out AVX state */
413*0f4c859eSApple OSS Distributions 	store_ymm(vec256array1);
414*0f4c859eSApple OSS Distributions 
415*0f4c859eSApple OSS Distributions 	uint32_t *p = (uint32_t *) &vec256array1[7];
416*0f4c859eSApple OSS Distributions 	if (p[0] != STOP_COOKIE_256 ||
417*0f4c859eSApple OSS Distributions 	    p[4] != STOP_COOKIE_256) {
418*0f4c859eSApple OSS Distributions 		vec256_to_string(vec256array1, vec_str_buf);
419*0f4c859eSApple OSS Distributions 		T_ASSERT_FAIL("sigreturn failed to stick");
420*0f4c859eSApple OSS Distributions 		T_LOG("State:\n%s", vec_str_buf);
421*0f4c859eSApple OSS Distributions 	}
422*0f4c859eSApple OSS Distributions 
423*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
424*0f4c859eSApple OSS Distributions 	T_PASS("No ymm register corruption occurred");
425*0f4c859eSApple OSS Distributions }
426*0f4c859eSApple OSS Distributions 
427*0f4c859eSApple OSS Distributions /*
428*0f4c859eSApple OSS Distributions  * zmm functions
429*0f4c859eSApple OSS Distributions  */
430*0f4c859eSApple OSS Distributions 
431*0f4c859eSApple OSS Distributions static inline void
store_opmask(OPMASK k[])432*0f4c859eSApple OSS Distributions store_opmask(OPMASK k[])
433*0f4c859eSApple OSS Distributions {
434*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k0, %0" :"=m" (k[0]));
435*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k1, %0" :"=m" (k[1]));
436*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k2, %0" :"=m" (k[2]));
437*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k3, %0" :"=m" (k[3]));
438*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k4, %0" :"=m" (k[4]));
439*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k5, %0" :"=m" (k[5]));
440*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k6, %0" :"=m" (k[6]));
441*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %%k7, %0" :"=m" (k[7]));
442*0f4c859eSApple OSS Distributions }
443*0f4c859eSApple OSS Distributions 
444*0f4c859eSApple OSS Distributions static inline void
store_zmm(VECTOR512 * vecarray)445*0f4c859eSApple OSS Distributions store_zmm(VECTOR512 *vecarray)
446*0f4c859eSApple OSS Distributions {
447*0f4c859eSApple OSS Distributions 	int i = 0;
448*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %%zmm0, %0" :"=m" (vecarray[i]));
449*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm1, %0" :"=m" (vecarray[i]));
450*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm2, %0" :"=m" (vecarray[i]));
451*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm3, %0" :"=m" (vecarray[i]));
452*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm4, %0" :"=m" (vecarray[i]));
453*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm5, %0" :"=m" (vecarray[i]));
454*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm6, %0" :"=m" (vecarray[i]));
455*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm7, %0" :"=m" (vecarray[i]));
456*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
457*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm8, %0" :"=m" (vecarray[i]));
458*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm9, %0" :"=m" (vecarray[i]));
459*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm10, %0" :"=m" (vecarray[i]));
460*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm11, %0" :"=m" (vecarray[i]));
461*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm12, %0" :"=m" (vecarray[i]));
462*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm13, %0" :"=m" (vecarray[i]));
463*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm14, %0" :"=m" (vecarray[i]));
464*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm15, %0" :"=m" (vecarray[i]));
465*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm16, %0" :"=m" (vecarray[i]));
466*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm17, %0" :"=m" (vecarray[i]));
467*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm18, %0" :"=m" (vecarray[i]));
468*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm19, %0" :"=m" (vecarray[i]));
469*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm20, %0" :"=m" (vecarray[i]));
470*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm21, %0" :"=m" (vecarray[i]));
471*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm22, %0" :"=m" (vecarray[i]));
472*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm23, %0" :"=m" (vecarray[i]));
473*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm24, %0" :"=m" (vecarray[i]));
474*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm25, %0" :"=m" (vecarray[i]));
475*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm26, %0" :"=m" (vecarray[i]));
476*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm27, %0" :"=m" (vecarray[i]));
477*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm28, %0" :"=m" (vecarray[i]));
478*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm29, %0" :"=m" (vecarray[i]));
479*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm30, %0" :"=m" (vecarray[i]));
480*0f4c859eSApple OSS Distributions 	i++; __asm__ volatile ("vmovaps  %%zmm31, %0" :"=m" (vecarray[i]));
481*0f4c859eSApple OSS Distributions #endif
482*0f4c859eSApple OSS Distributions }
483*0f4c859eSApple OSS Distributions 
484*0f4c859eSApple OSS Distributions static inline void
restore_zmm(VECTOR512 * vecarray)485*0f4c859eSApple OSS Distributions restore_zmm(VECTOR512 *vecarray)
486*0f4c859eSApple OSS Distributions {
487*0f4c859eSApple OSS Distributions 	VECTOR512 *p = vecarray;
488*0f4c859eSApple OSS Distributions 
489*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0"); p++;
490*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1"); p++;
491*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2"); p++;
492*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3"); p++;
493*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4"); p++;
494*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5"); p++;
495*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6"); p++;
496*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
497*0f4c859eSApple OSS Distributions 
498*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
499*0f4c859eSApple OSS Distributions 	++p; __asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8"); p++;
500*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9"); p++;
501*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10"); p++;
502*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11"); p++;
503*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12"); p++;
504*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13"); p++;
505*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14"); p++;
506*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15"); p++;
507*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16"); p++;
508*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17"); p++;
509*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18"); p++;
510*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19"); p++;
511*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20"); p++;
512*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21"); p++;
513*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22"); p++;
514*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23"); p++;
515*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24"); p++;
516*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25"); p++;
517*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26"); p++;
518*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27"); p++;
519*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28"); p++;
520*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29"); p++;
521*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30"); p++;
522*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
523*0f4c859eSApple OSS Distributions #endif
524*0f4c859eSApple OSS Distributions }
525*0f4c859eSApple OSS Distributions 
526*0f4c859eSApple OSS Distributions static inline void
zero_opmask(void)527*0f4c859eSApple OSS Distributions zero_opmask(void)
528*0f4c859eSApple OSS Distributions {
529*0f4c859eSApple OSS Distributions 	uint64_t zero = 0x0000000000000000ULL;
530*0f4c859eSApple OSS Distributions 
531*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k0" : :"m" (zero) : "k0");
532*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k1" : :"m" (zero) : "k1");
533*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k2" : :"m" (zero) : "k2");
534*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k3" : :"m" (zero) : "k3");
535*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k4" : :"m" (zero) : "k4");
536*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k5" : :"m" (zero) : "k5");
537*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k6" : :"m" (zero) : "k6");
538*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k7" : :"m" (zero) : "k7");
539*0f4c859eSApple OSS Distributions 	store_opmask(karray0);
540*0f4c859eSApple OSS Distributions }
541*0f4c859eSApple OSS Distributions 
542*0f4c859eSApple OSS Distributions static inline void
populate_opmask(void)543*0f4c859eSApple OSS Distributions populate_opmask(void)
544*0f4c859eSApple OSS Distributions {
545*0f4c859eSApple OSS Distributions 	uint64_t k[8];
546*0f4c859eSApple OSS Distributions 
547*0f4c859eSApple OSS Distributions 	for (int j = 0; j < 8; j++) {
548*0f4c859eSApple OSS Distributions 		k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j);
549*0f4c859eSApple OSS Distributions 	}
550*0f4c859eSApple OSS Distributions 
551*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k0" : :"m" (k[0]) : "k0");
552*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k1" : :"m" (k[1]) : "k1");
553*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k2" : :"m" (k[2]) : "k2");
554*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k3" : :"m" (k[3]) : "k3");
555*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k4" : :"m" (k[4]) : "k4");
556*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k5" : :"m" (k[5]) : "k5");
557*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k6" : :"m" (k[6]) : "k6");
558*0f4c859eSApple OSS Distributions 	__asm__ volatile ("kmovq %0, %%k7" : :"m" (k[7]) : "k7");
559*0f4c859eSApple OSS Distributions 
560*0f4c859eSApple OSS Distributions 	store_opmask(karray0);
561*0f4c859eSApple OSS Distributions }
562*0f4c859eSApple OSS Distributions 
563*0f4c859eSApple OSS Distributions kern_return_t
_thread_get_state_avx512(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)564*0f4c859eSApple OSS Distributions _thread_get_state_avx512(
565*0f4c859eSApple OSS Distributions 	thread_t                thread,
566*0f4c859eSApple OSS Distributions 	int                     flavor,
567*0f4c859eSApple OSS Distributions 	thread_state_t          state,          /* pointer to OUT array */
568*0f4c859eSApple OSS Distributions 	mach_msg_type_number_t  *state_count)   /*IN/OUT*/
569*0f4c859eSApple OSS Distributions {
570*0f4c859eSApple OSS Distributions 	kern_return_t rv;
571*0f4c859eSApple OSS Distributions 	VECTOR512 zmms[ZMM_MAX];
572*0f4c859eSApple OSS Distributions 
573*0f4c859eSApple OSS Distributions 	/*
574*0f4c859eSApple OSS Distributions 	 * We must save and restore the ZMMs across thread_get_state() because
575*0f4c859eSApple OSS Distributions 	 * code in thread_get_state changes at least one xmm register AFTER the
576*0f4c859eSApple OSS Distributions 	 * thread_get_state has saved the state in userspace.  While it's still
577*0f4c859eSApple OSS Distributions 	 * possible for something to muck with %XMMs BEFORE making the mach
578*0f4c859eSApple OSS Distributions 	 * system call (and rendering this save/restore useless), that does not
579*0f4c859eSApple OSS Distributions 	 * currently occur, and since we depend on the avx512 state saved in the
580*0f4c859eSApple OSS Distributions 	 * thread_get_state to be the same as that manually copied from ZMMs after
581*0f4c859eSApple OSS Distributions 	 * thread_get_state returns, we have to go through these machinations.
582*0f4c859eSApple OSS Distributions 	 */
583*0f4c859eSApple OSS Distributions 	store_zmm(zmms);
584*0f4c859eSApple OSS Distributions 
585*0f4c859eSApple OSS Distributions 	rv = thread_get_state(thread, flavor, state, state_count);
586*0f4c859eSApple OSS Distributions 
587*0f4c859eSApple OSS Distributions 	restore_zmm(zmms);
588*0f4c859eSApple OSS Distributions 
589*0f4c859eSApple OSS Distributions 	return rv;
590*0f4c859eSApple OSS Distributions }
591*0f4c859eSApple OSS Distributions 
592*0f4c859eSApple OSS Distributions static inline void
zero_zmm(void)593*0f4c859eSApple OSS Distributions zero_zmm(void)
594*0f4c859eSApple OSS Distributions {
595*0f4c859eSApple OSS Distributions 	uint64_t zero[8] VEC512ALIGN = {0};
596*0f4c859eSApple OSS Distributions 
597*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (zero) : "zmm0");
598*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (zero) : "zmm1");
599*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (zero) : "zmm2");
600*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (zero) : "zmm3");
601*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (zero) : "zmm4");
602*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (zero) : "zmm5");
603*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (zero) : "zmm6");
604*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (zero) : "zmm7");
605*0f4c859eSApple OSS Distributions 
606*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
607*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (zero) : "zmm8");
608*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (zero) : "zmm9");
609*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (zero) : "zmm10");
610*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (zero) : "zmm11");
611*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (zero) : "zmm12");
612*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (zero) : "zmm13");
613*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (zero) : "zmm14");
614*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (zero) : "zmm15");
615*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (zero) : "zmm16");
616*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (zero) : "zmm17");
617*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (zero) : "zmm18");
618*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (zero) : "zmm19");
619*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (zero) : "zmm20");
620*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (zero) : "zmm21");
621*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (zero) : "zmm22");
622*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (zero) : "zmm23");
623*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (zero) : "zmm24");
624*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (zero) : "zmm25");
625*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (zero) : "zmm26");
626*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (zero) : "zmm27");
627*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (zero) : "zmm28");
628*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (zero) : "zmm29");
629*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (zero) : "zmm30");
630*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (zero) : "zmm31");
631*0f4c859eSApple OSS Distributions #endif
632*0f4c859eSApple OSS Distributions 
633*0f4c859eSApple OSS Distributions 	store_zmm(vec512array0);
634*0f4c859eSApple OSS Distributions }
635*0f4c859eSApple OSS Distributions 
636*0f4c859eSApple OSS Distributions static inline void
populate_zmm(void)637*0f4c859eSApple OSS Distributions populate_zmm(void)
638*0f4c859eSApple OSS Distributions {
639*0f4c859eSApple OSS Distributions 	int j;
640*0f4c859eSApple OSS Distributions 	uint64_t p[8] VEC512ALIGN;
641*0f4c859eSApple OSS Distributions 
642*0f4c859eSApple OSS Distributions 	for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
643*0f4c859eSApple OSS Distributions 		p[j] = ((uint64_t) getpid() << 32) + getpid();
644*0f4c859eSApple OSS Distributions 	}
645*0f4c859eSApple OSS Distributions 
646*0f4c859eSApple OSS Distributions 	p[0] = 0x0000000000000000ULL;
647*0f4c859eSApple OSS Distributions 	p[2] = 0x4444444444444444ULL;
648*0f4c859eSApple OSS Distributions 	p[4] = 0x8888888888888888ULL;
649*0f4c859eSApple OSS Distributions 	p[7] = 0xCCCCCCCCCCCCCCCCULL;
650*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0");
651*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1");
652*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2");
653*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3");
654*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4");
655*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5");
656*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6");
657*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
658*0f4c859eSApple OSS Distributions 
659*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
660*0f4c859eSApple OSS Distributions 	p[0] = 0x1111111111111111ULL;
661*0f4c859eSApple OSS Distributions 	p[2] = 0x5555555555555555ULL;
662*0f4c859eSApple OSS Distributions 	p[4] = 0x9999999999999999ULL;
663*0f4c859eSApple OSS Distributions 	p[7] = 0xDDDDDDDDDDDDDDDDULL;
664*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8");
665*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9");
666*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10");
667*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11");
668*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12");
669*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13");
670*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14");
671*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15");
672*0f4c859eSApple OSS Distributions 
673*0f4c859eSApple OSS Distributions 	p[0] = 0x2222222222222222ULL;
674*0f4c859eSApple OSS Distributions 	p[2] = 0x6666666666666666ULL;
675*0f4c859eSApple OSS Distributions 	p[4] = 0xAAAAAAAAAAAAAAAAULL;
676*0f4c859eSApple OSS Distributions 	p[7] = 0xEEEEEEEEEEEEEEEEULL;
677*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16");
678*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17");
679*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18");
680*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19");
681*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20");
682*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21");
683*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22");
684*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23");
685*0f4c859eSApple OSS Distributions 
686*0f4c859eSApple OSS Distributions 	p[0] = 0x3333333333333333ULL;
687*0f4c859eSApple OSS Distributions 	p[2] = 0x7777777777777777ULL;
688*0f4c859eSApple OSS Distributions 	p[4] = 0xBBBBBBBBBBBBBBBBULL;
689*0f4c859eSApple OSS Distributions 	p[7] = 0xFFFFFFFFFFFFFFFFULL;
690*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24");
691*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25");
692*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26");
693*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27");
694*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28");
695*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29");
696*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30");
697*0f4c859eSApple OSS Distributions 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
698*0f4c859eSApple OSS Distributions #endif
699*0f4c859eSApple OSS Distributions 
700*0f4c859eSApple OSS Distributions 	store_zmm(vec512array0);
701*0f4c859eSApple OSS Distributions }
702*0f4c859eSApple OSS Distributions 
703*0f4c859eSApple OSS Distributions void
vec512_to_string(VECTOR512 * vec,char * buf)704*0f4c859eSApple OSS Distributions vec512_to_string(VECTOR512 *vec, char *buf)
705*0f4c859eSApple OSS Distributions {
706*0f4c859eSApple OSS Distributions 	unsigned int vec_idx = 0;
707*0f4c859eSApple OSS Distributions 	unsigned int buf_idx = 0;
708*0f4c859eSApple OSS Distributions 	int ret = 0;
709*0f4c859eSApple OSS Distributions 
710*0f4c859eSApple OSS Distributions 	for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) {
711*0f4c859eSApple OSS Distributions 		uint64_t a[8];
712*0f4c859eSApple OSS Distributions 		bcopy(&vec[vec_idx], &a[0], sizeof(a));
713*0f4c859eSApple OSS Distributions 		ret = sprintf(
714*0f4c859eSApple OSS Distributions 			buf + buf_idx,
715*0f4c859eSApple OSS Distributions 			"0x%016llx:%016llx:%016llx:%016llx:"
716*0f4c859eSApple OSS Distributions 			"%016llx:%016llx:%016llx:%016llx%s",
717*0f4c859eSApple OSS Distributions 			a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
718*0f4c859eSApple OSS Distributions 			vec_idx < ZMM_MAX - 1 ? "\n" : ""
719*0f4c859eSApple OSS Distributions 			);
720*0f4c859eSApple OSS Distributions 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
721*0f4c859eSApple OSS Distributions 		buf_idx += ret;
722*0f4c859eSApple OSS Distributions 	}
723*0f4c859eSApple OSS Distributions }
724*0f4c859eSApple OSS Distributions 
725*0f4c859eSApple OSS Distributions void
opmask_to_string(OPMASK * karray,char * buf)726*0f4c859eSApple OSS Distributions opmask_to_string(OPMASK *karray, char *buf)
727*0f4c859eSApple OSS Distributions {
728*0f4c859eSApple OSS Distributions 	unsigned int karray_idx = 0;
729*0f4c859eSApple OSS Distributions 	unsigned int buf_idx = 0;
730*0f4c859eSApple OSS Distributions 	int ret = 0;
731*0f4c859eSApple OSS Distributions 
732*0f4c859eSApple OSS Distributions 	for (karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) {
733*0f4c859eSApple OSS Distributions 		ret = sprintf(
734*0f4c859eSApple OSS Distributions 			buf + buf_idx,
735*0f4c859eSApple OSS Distributions 			"k%d: 0x%016llx%s",
736*0f4c859eSApple OSS Distributions 			karray_idx, karray[karray_idx],
737*0f4c859eSApple OSS Distributions 			karray_idx < KARRAY_MAX ? "\n" : ""
738*0f4c859eSApple OSS Distributions 			);
739*0f4c859eSApple OSS Distributions 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
740*0f4c859eSApple OSS Distributions 		buf_idx += ret;
741*0f4c859eSApple OSS Distributions 	}
742*0f4c859eSApple OSS Distributions }
743*0f4c859eSApple OSS Distributions 
744*0f4c859eSApple OSS Distributions static void
assert_zmm_eq(void * a,void * b,int c)745*0f4c859eSApple OSS Distributions assert_zmm_eq(void *a, void *b, int c)
746*0f4c859eSApple OSS Distributions {
747*0f4c859eSApple OSS Distributions 	if (memcmp_unoptimized(a, b, c)) {
748*0f4c859eSApple OSS Distributions 		vec512_to_string(a, vec_str_buf);
749*0f4c859eSApple OSS Distributions 		T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
750*0f4c859eSApple OSS Distributions 		vec512_to_string(b, vec_str_buf);
751*0f4c859eSApple OSS Distributions 		T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
752*0f4c859eSApple OSS Distributions 		T_ASSERT_FAIL("Vectors not equal");
753*0f4c859eSApple OSS Distributions 	}
754*0f4c859eSApple OSS Distributions }
755*0f4c859eSApple OSS Distributions 
756*0f4c859eSApple OSS Distributions static void
assert_opmask_eq(OPMASK * a,OPMASK * b)757*0f4c859eSApple OSS Distributions assert_opmask_eq(OPMASK *a, OPMASK *b)
758*0f4c859eSApple OSS Distributions {
759*0f4c859eSApple OSS Distributions 	for (int i = 0; i < KARRAY_MAX; i++) {
760*0f4c859eSApple OSS Distributions 		if (a[i] != b[i]) {
761*0f4c859eSApple OSS Distributions 			opmask_to_string(a, karray_str_buf);
762*0f4c859eSApple OSS Distributions 			T_LOG("Compare failed, opmask A:\n%s", karray_str_buf);
763*0f4c859eSApple OSS Distributions 			opmask_to_string(b, karray_str_buf);
764*0f4c859eSApple OSS Distributions 			T_LOG("Compare failed, opmask B:\n%s", karray_str_buf);
765*0f4c859eSApple OSS Distributions 			T_ASSERT_FAIL("opmasks not equal");
766*0f4c859eSApple OSS Distributions 		}
767*0f4c859eSApple OSS Distributions 	}
768*0f4c859eSApple OSS Distributions }
769*0f4c859eSApple OSS Distributions 
770*0f4c859eSApple OSS Distributions void
check_zmm(boolean_t check_cookie)771*0f4c859eSApple OSS Distributions check_zmm(boolean_t check_cookie)
772*0f4c859eSApple OSS Distributions {
773*0f4c859eSApple OSS Distributions 	uint64_t *p = (uint64_t *) &vec512array1[7];
774*0f4c859eSApple OSS Distributions 	store_opmask(karray1);
775*0f4c859eSApple OSS Distributions 	store_zmm(vec512array1);
776*0f4c859eSApple OSS Distributions 	if (check_cookie && p[0] == STOP_COOKIE_512) {
777*0f4c859eSApple OSS Distributions 		return;
778*0f4c859eSApple OSS Distributions 	}
779*0f4c859eSApple OSS Distributions 
780*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0));
781*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray0, karray1);
782*0f4c859eSApple OSS Distributions }
783*0f4c859eSApple OSS Distributions 
784*0f4c859eSApple OSS Distributions static void
copy_state_to_opmask(X86_AVX512_STATE_T * sp,OPMASK * op)785*0f4c859eSApple OSS Distributions copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op)
786*0f4c859eSApple OSS Distributions {
787*0f4c859eSApple OSS Distributions 	OPMASK *k = (OPMASK *) &sp->__fpu_k0;
788*0f4c859eSApple OSS Distributions 	for (int i = 0; i < KARRAY_MAX; i++) {
789*0f4c859eSApple OSS Distributions 		bcopy(&k[i], &op[i], sizeof(*op));
790*0f4c859eSApple OSS Distributions 	}
791*0f4c859eSApple OSS Distributions }
792*0f4c859eSApple OSS Distributions 
793*0f4c859eSApple OSS Distributions static void
copy_zmm_state_to_vector(X86_AVX512_STATE_T * sp,VECTOR512 * vp)794*0f4c859eSApple OSS Distributions copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp)
795*0f4c859eSApple OSS Distributions {
796*0f4c859eSApple OSS Distributions 	int     i;
797*0f4c859eSApple OSS Distributions 	struct  __darwin_xmm_reg *xmm  = &sp->__fpu_xmm0;
798*0f4c859eSApple OSS Distributions 	struct  __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
799*0f4c859eSApple OSS Distributions 	struct  __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0;
800*0f4c859eSApple OSS Distributions #if defined(__x86_64__)
801*0f4c859eSApple OSS Distributions 	struct  __darwin_zmm_reg *zmm  = &sp->__fpu_zmm16;
802*0f4c859eSApple OSS Distributions 
803*0f4c859eSApple OSS Distributions 	for (i = 0; i < ZMM_MAX / 2; i++) {
804*0f4c859eSApple OSS Distributions 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
805*0f4c859eSApple OSS Distributions 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
806*0f4c859eSApple OSS Distributions 		bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
807*0f4c859eSApple OSS Distributions 		bcopy(&zmm[i], &vp[(ZMM_MAX / 2) + i], sizeof(*zmm));
808*0f4c859eSApple OSS Distributions 	}
809*0f4c859eSApple OSS Distributions #else
810*0f4c859eSApple OSS Distributions 	for (i = 0; i < ZMM_MAX; i++) {
811*0f4c859eSApple OSS Distributions 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
812*0f4c859eSApple OSS Distributions 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
813*0f4c859eSApple OSS Distributions 		bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
814*0f4c859eSApple OSS Distributions 	}
815*0f4c859eSApple OSS Distributions #endif
816*0f4c859eSApple OSS Distributions }
817*0f4c859eSApple OSS Distributions 
818*0f4c859eSApple OSS Distributions static void
zmm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)819*0f4c859eSApple OSS Distributions zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
820*0f4c859eSApple OSS Distributions {
821*0f4c859eSApple OSS Distributions 	ucontext_t *contextp = (ucontext_t *) ctx;
822*0f4c859eSApple OSS Distributions 	mcontext_t mcontext = contextp->uc_mcontext;
823*0f4c859eSApple OSS Distributions 	X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
824*0f4c859eSApple OSS Distributions 	uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
825*0f4c859eSApple OSS Distributions 	uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
826*0f4c859eSApple OSS Distributions 	uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
827*0f4c859eSApple OSS Distributions 	uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
828*0f4c859eSApple OSS Distributions 
829*0f4c859eSApple OSS Distributions 	/* Check for AVX512 state */
830*0f4c859eSApple OSS Distributions 	T_QUIET;
831*0f4c859eSApple OSS Distributions 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
832*0f4c859eSApple OSS Distributions 
833*0f4c859eSApple OSS Distributions 	/* Check that the state in the context is what's set and expected */
834*0f4c859eSApple OSS Distributions 	copy_zmm_state_to_vector(avx_state, vec512array3);
835*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
836*0f4c859eSApple OSS Distributions 	copy_state_to_opmask(avx_state, karray3);
837*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray3, karray0);
838*0f4c859eSApple OSS Distributions 
839*0f4c859eSApple OSS Distributions 	/* Change the context and break the main loop */
840*0f4c859eSApple OSS Distributions 	xp[0] = STOP_COOKIE_512;
841*0f4c859eSApple OSS Distributions 	yp[0] = STOP_COOKIE_512;
842*0f4c859eSApple OSS Distributions 	zp[0] = STOP_COOKIE_512;
843*0f4c859eSApple OSS Distributions 	kp[7] = STOP_COOKIE_512;
844*0f4c859eSApple OSS Distributions 	checking = FALSE;
845*0f4c859eSApple OSS Distributions }
846*0f4c859eSApple OSS Distributions 
847*0f4c859eSApple OSS Distributions static void
zmm_sigalrm_handler_no_mod(int signum __unused,siginfo_t * info __unused,void * ctx)848*0f4c859eSApple OSS Distributions zmm_sigalrm_handler_no_mod(int signum __unused, siginfo_t *info __unused, void *ctx)
849*0f4c859eSApple OSS Distributions {
850*0f4c859eSApple OSS Distributions 	ucontext_t *contextp = (ucontext_t *) ctx;
851*0f4c859eSApple OSS Distributions 	mcontext_t mcontext = contextp->uc_mcontext;
852*0f4c859eSApple OSS Distributions 	X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
853*0f4c859eSApple OSS Distributions 	uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
854*0f4c859eSApple OSS Distributions 	uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
855*0f4c859eSApple OSS Distributions 	uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
856*0f4c859eSApple OSS Distributions 	uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
857*0f4c859eSApple OSS Distributions 
858*0f4c859eSApple OSS Distributions 	/* Check for AVX512 state */
859*0f4c859eSApple OSS Distributions 	T_QUIET;
860*0f4c859eSApple OSS Distributions 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
861*0f4c859eSApple OSS Distributions 
862*0f4c859eSApple OSS Distributions 	/* Check that the state in the context is what's set and expected */
863*0f4c859eSApple OSS Distributions 	copy_zmm_state_to_vector(avx_state, vec512array3);
864*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
865*0f4c859eSApple OSS Distributions 	copy_state_to_opmask(avx_state, karray3);
866*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray3, karray0);
867*0f4c859eSApple OSS Distributions 
868*0f4c859eSApple OSS Distributions 	/* Change the context and break the main loop */
869*0f4c859eSApple OSS Distributions 	checking = FALSE;
870*0f4c859eSApple OSS Distributions }
871*0f4c859eSApple OSS Distributions 
872*0f4c859eSApple OSS Distributions 
873*0f4c859eSApple OSS Distributions void
zmm_integrity(int time)874*0f4c859eSApple OSS Distributions zmm_integrity(int time)
875*0f4c859eSApple OSS Distributions {
876*0f4c859eSApple OSS Distributions 	mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT;
877*0f4c859eSApple OSS Distributions 	kern_return_t kret;
878*0f4c859eSApple OSS Distributions 	X86_AVX512_STATE_T avx_state, avx_state2;
879*0f4c859eSApple OSS Distributions 	mach_port_t ts = mach_thread_self();
880*0f4c859eSApple OSS Distributions 
881*0f4c859eSApple OSS Distributions 	bzero(&avx_state, sizeof(avx_state));
882*0f4c859eSApple OSS Distributions 	bzero(&avx_state2, sizeof(avx_state));
883*0f4c859eSApple OSS Distributions 
884*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
885*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
886*0f4c859eSApple OSS Distributions 
887*0f4c859eSApple OSS Distributions 	kret = _thread_get_state_avx512(
888*0f4c859eSApple OSS Distributions 		ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
889*0f4c859eSApple OSS Distributions 		);
890*0f4c859eSApple OSS Distributions 
891*0f4c859eSApple OSS Distributions 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
892*0f4c859eSApple OSS Distributions 	vec512_to_string(vec512array2, vec_str_buf);
893*0f4c859eSApple OSS Distributions 	opmask_to_string(karray2, karray_str_buf);
894*0f4c859eSApple OSS Distributions 	T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf);
895*0f4c859eSApple OSS Distributions 
896*0f4c859eSApple OSS Distributions 	copy_zmm_state_to_vector(&avx_state, vec512array1);
897*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
898*0f4c859eSApple OSS Distributions 	copy_state_to_opmask(&avx_state, karray1);
899*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray2, karray1);
900*0f4c859eSApple OSS Distributions 
901*0f4c859eSApple OSS Distributions 	populate_zmm();
902*0f4c859eSApple OSS Distributions 	populate_opmask();
903*0f4c859eSApple OSS Distributions 
904*0f4c859eSApple OSS Distributions 	kret = _thread_get_state_avx512(
905*0f4c859eSApple OSS Distributions 		ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
906*0f4c859eSApple OSS Distributions 		);
907*0f4c859eSApple OSS Distributions 
908*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
909*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
910*0f4c859eSApple OSS Distributions 
911*0f4c859eSApple OSS Distributions 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
912*0f4c859eSApple OSS Distributions 	vec512_to_string(vec512array2, vec_str_buf);
913*0f4c859eSApple OSS Distributions 	opmask_to_string(karray2, karray_str_buf);
914*0f4c859eSApple OSS Distributions 	T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf);
915*0f4c859eSApple OSS Distributions 
916*0f4c859eSApple OSS Distributions 	copy_zmm_state_to_vector(&avx_state2, vec512array1);
917*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
918*0f4c859eSApple OSS Distributions 	copy_state_to_opmask(&avx_state2, karray1);
919*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray2, karray1);
920*0f4c859eSApple OSS Distributions 
921*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
922*0f4c859eSApple OSS Distributions 	start_timer(time, zmm_sigalrm_handler);
923*0f4c859eSApple OSS Distributions 
924*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
925*0f4c859eSApple OSS Distributions 	populate_zmm();
926*0f4c859eSApple OSS Distributions 	populate_opmask();
927*0f4c859eSApple OSS Distributions 
928*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
929*0f4c859eSApple OSS Distributions 	while (checking) {
930*0f4c859eSApple OSS Distributions 		check_zmm(TRUE);
931*0f4c859eSApple OSS Distributions 	}
932*0f4c859eSApple OSS Distributions 
933*0f4c859eSApple OSS Distributions 	/* Check that the sig handler changed our AVX state */
934*0f4c859eSApple OSS Distributions 	store_zmm(vec512array1);
935*0f4c859eSApple OSS Distributions 	store_opmask(karray1);
936*0f4c859eSApple OSS Distributions 
937*0f4c859eSApple OSS Distributions 	uint64_t *p = (uint64_t *) &vec512array1[7];
938*0f4c859eSApple OSS Distributions 	if (p[0] != STOP_COOKIE_512 ||
939*0f4c859eSApple OSS Distributions 	    p[2] != STOP_COOKIE_512 ||
940*0f4c859eSApple OSS Distributions 	    p[4] != STOP_COOKIE_512 ||
941*0f4c859eSApple OSS Distributions 	    karray1[7] != STOP_COOKIE_512) {
942*0f4c859eSApple OSS Distributions 		vec512_to_string(vec512array1, vec_str_buf);
943*0f4c859eSApple OSS Distributions 		opmask_to_string(karray1, karray_str_buf);
944*0f4c859eSApple OSS Distributions 		T_ASSERT_FAIL("sigreturn failed to stick");
945*0f4c859eSApple OSS Distributions 		T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf);
946*0f4c859eSApple OSS Distributions 	}
947*0f4c859eSApple OSS Distributions 
948*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
949*0f4c859eSApple OSS Distributions 	T_PASS("No zmm register corruption occurred");
950*0f4c859eSApple OSS Distributions }
951*0f4c859eSApple OSS Distributions 
952*0f4c859eSApple OSS Distributions void
zmm_zeroing_optimization_integrity(int time)953*0f4c859eSApple OSS Distributions zmm_zeroing_optimization_integrity(int time)
954*0f4c859eSApple OSS Distributions {
955*0f4c859eSApple OSS Distributions 	/*
956*0f4c859eSApple OSS Distributions 	 * Check ZMM zero and OpMask zero
957*0f4c859eSApple OSS Distributions 	 */
958*0f4c859eSApple OSS Distributions 	T_LOG("Checking ZMM zero and OpMask zero");
959*0f4c859eSApple OSS Distributions 	checking = true;
960*0f4c859eSApple OSS Distributions 	zero_zmm();
961*0f4c859eSApple OSS Distributions 	zero_opmask();
962*0f4c859eSApple OSS Distributions 
963*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
964*0f4c859eSApple OSS Distributions 	start_timer(time, zmm_sigalrm_handler_no_mod);
965*0f4c859eSApple OSS Distributions 
966*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
967*0f4c859eSApple OSS Distributions 	zero_zmm();
968*0f4c859eSApple OSS Distributions 	zero_opmask();
969*0f4c859eSApple OSS Distributions 
970*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
971*0f4c859eSApple OSS Distributions 	while (checking) {
972*0f4c859eSApple OSS Distributions 		check_zmm(FALSE);
973*0f4c859eSApple OSS Distributions 	}
974*0f4c859eSApple OSS Distributions 
975*0f4c859eSApple OSS Distributions 	/* Check that sig handler did not changed our AVX state */
976*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
977*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
978*0f4c859eSApple OSS Distributions 
979*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
980*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray0, karray2);
981*0f4c859eSApple OSS Distributions 
982*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
983*0f4c859eSApple OSS Distributions 	T_PASS("ZMM zero and OpMask zero");
984*0f4c859eSApple OSS Distributions 
985*0f4c859eSApple OSS Distributions 
986*0f4c859eSApple OSS Distributions 	/*
987*0f4c859eSApple OSS Distributions 	 * Check ZMM zero and OpMask non-zero
988*0f4c859eSApple OSS Distributions 	 */
989*0f4c859eSApple OSS Distributions 	T_LOG("Checking ZMM zero and OpMask non-zero");
990*0f4c859eSApple OSS Distributions 	checking = true;
991*0f4c859eSApple OSS Distributions 	zero_zmm();
992*0f4c859eSApple OSS Distributions 	populate_opmask();
993*0f4c859eSApple OSS Distributions 
994*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
995*0f4c859eSApple OSS Distributions 	start_timer(time, zmm_sigalrm_handler_no_mod);
996*0f4c859eSApple OSS Distributions 
997*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
998*0f4c859eSApple OSS Distributions 	zero_zmm();
999*0f4c859eSApple OSS Distributions 	populate_opmask();
1000*0f4c859eSApple OSS Distributions 
1001*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
1002*0f4c859eSApple OSS Distributions 	while (checking) {
1003*0f4c859eSApple OSS Distributions 		check_zmm(FALSE);
1004*0f4c859eSApple OSS Distributions 	}
1005*0f4c859eSApple OSS Distributions 
1006*0f4c859eSApple OSS Distributions 	/* Check that sig handler did not changed our AVX state */
1007*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
1008*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
1009*0f4c859eSApple OSS Distributions 
1010*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1011*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray0, karray2);
1012*0f4c859eSApple OSS Distributions 
1013*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
1014*0f4c859eSApple OSS Distributions 	T_PASS("ZMM zero and OpMask non-zero");
1015*0f4c859eSApple OSS Distributions 
1016*0f4c859eSApple OSS Distributions 
1017*0f4c859eSApple OSS Distributions 	/*
1018*0f4c859eSApple OSS Distributions 	 * Check ZMM non-zero and OpMask zero
1019*0f4c859eSApple OSS Distributions 	 */
1020*0f4c859eSApple OSS Distributions 	T_LOG("Checking ZMM non-zero and OpMask zero");
1021*0f4c859eSApple OSS Distributions 	checking = true;
1022*0f4c859eSApple OSS Distributions 	populate_zmm();
1023*0f4c859eSApple OSS Distributions 	zero_opmask();
1024*0f4c859eSApple OSS Distributions 
1025*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
1026*0f4c859eSApple OSS Distributions 	start_timer(time, zmm_sigalrm_handler_no_mod);
1027*0f4c859eSApple OSS Distributions 
1028*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
1029*0f4c859eSApple OSS Distributions 	populate_zmm();
1030*0f4c859eSApple OSS Distributions 	zero_opmask();
1031*0f4c859eSApple OSS Distributions 
1032*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
1033*0f4c859eSApple OSS Distributions 	while (checking) {
1034*0f4c859eSApple OSS Distributions 		check_zmm(FALSE);
1035*0f4c859eSApple OSS Distributions 	}
1036*0f4c859eSApple OSS Distributions 
1037*0f4c859eSApple OSS Distributions 	/* Check that sig handler did not changed our AVX state */
1038*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
1039*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
1040*0f4c859eSApple OSS Distributions 
1041*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1042*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray0, karray2);
1043*0f4c859eSApple OSS Distributions 
1044*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
1045*0f4c859eSApple OSS Distributions 	T_PASS("ZMM non-zero and OpMask zero");
1046*0f4c859eSApple OSS Distributions 
1047*0f4c859eSApple OSS Distributions 
1048*0f4c859eSApple OSS Distributions 	/*
1049*0f4c859eSApple OSS Distributions 	 * Check ZMM non-zero and OpMask non-zero
1050*0f4c859eSApple OSS Distributions 	 */
1051*0f4c859eSApple OSS Distributions 	T_LOG("Checking ZMM non-zero and OpMask non-zero");
1052*0f4c859eSApple OSS Distributions 	checking = true;
1053*0f4c859eSApple OSS Distributions 	populate_zmm();
1054*0f4c859eSApple OSS Distributions 	populate_opmask();
1055*0f4c859eSApple OSS Distributions 
1056*0f4c859eSApple OSS Distributions 	T_LOG("Running for %ds…", time);
1057*0f4c859eSApple OSS Distributions 	start_timer(time, zmm_sigalrm_handler_no_mod);
1058*0f4c859eSApple OSS Distributions 
1059*0f4c859eSApple OSS Distributions 	/* re-populate because printing mucks up XMMs */
1060*0f4c859eSApple OSS Distributions 	populate_zmm();
1061*0f4c859eSApple OSS Distributions 	populate_opmask();
1062*0f4c859eSApple OSS Distributions 
1063*0f4c859eSApple OSS Distributions 	/* Check state until timer fires */
1064*0f4c859eSApple OSS Distributions 	while (checking) {
1065*0f4c859eSApple OSS Distributions 		check_zmm(FALSE);
1066*0f4c859eSApple OSS Distributions 	}
1067*0f4c859eSApple OSS Distributions 
1068*0f4c859eSApple OSS Distributions 	/* Check that sig handler did not changed our AVX state */
1069*0f4c859eSApple OSS Distributions 	store_zmm(vec512array2);
1070*0f4c859eSApple OSS Distributions 	store_opmask(karray2);
1071*0f4c859eSApple OSS Distributions 
1072*0f4c859eSApple OSS Distributions 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1073*0f4c859eSApple OSS Distributions 	assert_opmask_eq(karray0, karray2);
1074*0f4c859eSApple OSS Distributions 
1075*0f4c859eSApple OSS Distributions 	T_LOG("Ran for %ds", time);
1076*0f4c859eSApple OSS Distributions 	T_PASS("ZMM non-zero and OpMask non-zero");
1077*0f4c859eSApple OSS Distributions }
1078*0f4c859eSApple OSS Distributions 
1079*0f4c859eSApple OSS Distributions /*
1080*0f4c859eSApple OSS Distributions  * Main test declarations
1081*0f4c859eSApple OSS Distributions  */
1082*0f4c859eSApple OSS Distributions T_DECL(ymm_integrity,
1083*0f4c859eSApple OSS Distributions     "Quick soak test to verify that AVX "
1084*0f4c859eSApple OSS Distributions     "register state is maintained correctly",
1085*0f4c859eSApple OSS Distributions     T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1086*0f4c859eSApple OSS Distributions 	require_avx();
1087*0f4c859eSApple OSS Distributions 	ymm_integrity(NORMAL_RUN_TIME);
1088*0f4c859eSApple OSS Distributions }
1089*0f4c859eSApple OSS Distributions 
1090*0f4c859eSApple OSS Distributions T_DECL(ymm_integrity_stress,
1091*0f4c859eSApple OSS Distributions     "Extended soak test to verify that AVX "
1092*0f4c859eSApple OSS Distributions     "register state is maintained correctly",
1093*0f4c859eSApple OSS Distributions     T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1094*0f4c859eSApple OSS Distributions     T_META_ENABLED(false)) {
1095*0f4c859eSApple OSS Distributions 	require_avx();
1096*0f4c859eSApple OSS Distributions 	ymm_integrity(LONG_RUN_TIME);
1097*0f4c859eSApple OSS Distributions }
1098*0f4c859eSApple OSS Distributions 
1099*0f4c859eSApple OSS Distributions T_DECL(zmm_integrity,
1100*0f4c859eSApple OSS Distributions     "Quick soak test to verify that AVX-512 "
1101*0f4c859eSApple OSS Distributions     "register state is maintained correctly",
1102*0f4c859eSApple OSS Distributions     T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1103*0f4c859eSApple OSS Distributions 	require_avx512();
1104*0f4c859eSApple OSS Distributions 	zmm_integrity(NORMAL_RUN_TIME);
1105*0f4c859eSApple OSS Distributions }
1106*0f4c859eSApple OSS Distributions 
1107*0f4c859eSApple OSS Distributions T_DECL(zmm_integrity_stress,
1108*0f4c859eSApple OSS Distributions     "Extended soak test to verify that AVX-512 "
1109*0f4c859eSApple OSS Distributions     "register state is maintained correctly",
1110*0f4c859eSApple OSS Distributions     T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1111*0f4c859eSApple OSS Distributions     T_META_ENABLED(false)) {
1112*0f4c859eSApple OSS Distributions 	require_avx512();
1113*0f4c859eSApple OSS Distributions 	zmm_integrity(LONG_RUN_TIME);
1114*0f4c859eSApple OSS Distributions }
1115*0f4c859eSApple OSS Distributions 
1116*0f4c859eSApple OSS Distributions T_DECL(zmm_zeroing_optimization_integrity,
1117*0f4c859eSApple OSS Distributions     "Quick soak test to verify AVX-512 "
1118*0f4c859eSApple OSS Distributions     "register state is maintained with "
1119*0f4c859eSApple OSS Distributions     "zeroing optimizations enabled",
1120*0f4c859eSApple OSS Distributions     T_META_TIMEOUT(QUICK_RUN_TIME + TIMEOUT_OVERHEAD)) {
1121*0f4c859eSApple OSS Distributions 	require_avx512();
1122*0f4c859eSApple OSS Distributions 	zmm_zeroing_optimization_integrity(QUICK_RUN_TIME);
1123*0f4c859eSApple OSS Distributions }
1124