xref: /xnu-11215.81.4/tests/avx.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 #ifdef T_NAMESPACE
2 #undef T_NAMESPACE
3 #endif
4 
5 #include <darwintest.h>
6 #include <unistd.h>
7 #include <signal.h>
8 #include <sys/time.h>
9 #include <sys/mman.h>
10 #include <immintrin.h>
11 #include <mach/mach.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <err.h>
15 #include <i386/cpu_capabilities.h>
16 
17 T_GLOBAL_META(
18 	T_META_NAMESPACE("xnu.intel"),
19 	T_META_CHECK_LEAKS(false),
20 	T_META_RADAR_COMPONENT_NAME("xnu"),
21 	T_META_RADAR_COMPONENT_VERSION("intel"),
22 	T_META_OWNER("seth_goldberg"),
23 	T_META_RUN_CONCURRENTLY(true)
24 	);
25 
26 #define QUICK_RUN_TIME   (2)
27 #define NORMAL_RUN_TIME  (10)
28 #define LONG_RUN_TIME    (10*60)
29 #define TIMEOUT_OVERHEAD (10)
30 
31 volatile boolean_t checking = true;
32 char vec_str_buf[8196];
33 char karray_str_buf[1024];
34 
35 /*
36  * ymm defines/globals/prototypes
37  */
38 #define STOP_COOKIE_256 0x01234567
39 #if defined(__x86_64__)
40 #define YMM_MAX                 16
41 #define X86_AVX_STATE_T         x86_avx_state64_t
42 #define X86_AVX_STATE_COUNT     x86_AVX_STATE64_COUNT
43 #define X86_AVX_STATE_FLAVOR    x86_AVX_STATE64
44 #define MCONTEXT_SIZE_256       sizeof(struct __darwin_mcontext_avx64)
45 #else
46 #define YMM_MAX                 8
47 #define X86_AVX_STATE_T         x86_avx_state32_t
48 #define X86_AVX_STATE_COUNT     x86_AVX_STATE32_COUNT
49 #define X86_AVX_STATE_FLAVOR    x86_AVX_STATE32
50 #define MCONTEXT_SIZE_256       sizeof(struct __darwin_mcontext_avx32)
51 #endif
52 #define VECTOR256 __m256
53 #define VEC256ALIGN __attribute ((aligned(32)))
54 static inline void populate_ymm(void);
55 static inline void check_ymm(void);
56 VECTOR256       vec256array0[YMM_MAX] VEC256ALIGN;
57 VECTOR256       vec256array1[YMM_MAX] VEC256ALIGN;
58 VECTOR256       vec256array2[YMM_MAX] VEC256ALIGN;
59 VECTOR256       vec256array3[YMM_MAX] VEC256ALIGN;
60 
61 /*
62  * zmm defines/globals/prototypes
63  */
64 #define STOP_COOKIE_512 0x0123456789abcdefULL
65 #if defined(__x86_64__)
66 #define ZMM_MAX                 32
67 #define X86_AVX512_STATE_T      x86_avx512_state64_t
68 #define X86_AVX512_STATE_COUNT  x86_AVX512_STATE64_COUNT
69 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
70 #define MCONTEXT_SIZE_512       sizeof(struct __darwin_mcontext_avx512_64)
71 #else
72 #define ZMM_MAX                 8
73 #define X86_AVX512_STATE_T      x86_avx512_state32_t
74 #define X86_AVX512_STATE_COUNT  x86_AVX512_STATE32_COUNT
75 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
76 #define MCONTEXT_SIZE_512       sizeof(struct __darwin_mcontext_avx512_32)
77 #endif
78 #define VECTOR512 __m512
79 #define VEC512ALIGN __attribute ((aligned(64)))
80 #define OPMASK uint64_t
81 #define KARRAY_MAX              8
82 static inline void zero_zmm(void);
83 static inline void zero_opmask(void);
84 static inline void populate_zmm(void);
85 static inline void populate_opmask(void);
86 static inline void check_zmm(boolean_t check_cookie);
87 VECTOR512       vec512array0[ZMM_MAX] VEC512ALIGN;
88 VECTOR512       vec512array1[ZMM_MAX] VEC512ALIGN;
89 VECTOR512       vec512array2[ZMM_MAX] VEC512ALIGN;
90 VECTOR512       vec512array3[ZMM_MAX] VEC512ALIGN;
91 OPMASK karray0[8];
92 OPMASK karray1[8];
93 OPMASK karray2[8];
94 OPMASK karray3[8];
95 
96 kern_return_t _thread_get_state_avx(thread_t thread, int flavor, thread_state_t state,
97     mach_msg_type_number_t *state_count);
98 kern_return_t _thread_get_state_avx512(thread_t thread, int flavor, thread_state_t state,
99     mach_msg_type_number_t *state_count);
100 
101 /*
102  * Common functions
103  */
104 
105 int
memcmp_unoptimized(const void * s1,const void * s2,size_t n)106 memcmp_unoptimized(const void *s1, const void *s2, size_t n)
107 {
108 	if (n != 0) {
109 		const unsigned char *p1 = s1, *p2 = s2;
110 		do {
111 			if (*p1++ != *p2++) {
112 				return *--p1 - *--p2;
113 			}
114 		} while (--n != 0);
115 	}
116 	return 0;
117 }
118 
119 void
start_timer(int seconds,void (* handler)(int,siginfo_t *,void *))120 start_timer(int seconds, void (*handler)(int, siginfo_t *, void *))
121 {
122 	struct sigaction sigalrm_action = {
123 		.sa_sigaction = handler,
124 		.sa_flags = SA_RESTART,
125 		.sa_mask = 0
126 	};
127 	struct itimerval timer = {
128 		.it_value.tv_sec = seconds,
129 		.it_value.tv_usec = 0,
130 		.it_interval.tv_sec = 0,
131 		.it_interval.tv_usec = 0
132 	};
133 	T_QUIET; T_WITH_ERRNO;
134 	T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL);
135 	T_QUIET; T_WITH_ERRNO;
136 	T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL);
137 }
138 
139 void
require_avx(void)140 require_avx(void)
141 {
142 	if ((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) {
143 		T_SKIP("AVX not supported on this system");
144 	}
145 }
146 
147 void
require_avx512(void)148 require_avx512(void)
149 {
150 	if ((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) {
151 		T_SKIP("AVX-512 not supported on this system");
152 	}
153 }
154 
155 /*
156  * ymm functions
157  */
158 
159 static inline void
store_ymm(VECTOR256 * vec256array)160 store_ymm(VECTOR256 *vec256array)
161 {
162 	int i = 0;
163 	__asm__ volatile ("vmovaps  %%ymm0, %0" :"=m" (vec256array[i]));
164 	i++; __asm__ volatile ("vmovaps  %%ymm1, %0" :"=m" (vec256array[i]));
165 	i++; __asm__ volatile ("vmovaps  %%ymm2, %0" :"=m" (vec256array[i]));
166 	i++; __asm__ volatile ("vmovaps  %%ymm3, %0" :"=m" (vec256array[i]));
167 	i++; __asm__ volatile ("vmovaps  %%ymm4, %0" :"=m" (vec256array[i]));
168 	i++; __asm__ volatile ("vmovaps  %%ymm5, %0" :"=m" (vec256array[i]));
169 	i++; __asm__ volatile ("vmovaps  %%ymm6, %0" :"=m" (vec256array[i]));
170 	i++; __asm__ volatile ("vmovaps  %%ymm7, %0" :"=m" (vec256array[i]));
171 #if defined(__x86_64__)
172 	i++; __asm__ volatile ("vmovaps  %%ymm8, %0" :"=m" (vec256array[i]));
173 	i++; __asm__ volatile ("vmovaps  %%ymm9, %0" :"=m" (vec256array[i]));
174 	i++; __asm__ volatile ("vmovaps  %%ymm10, %0" :"=m" (vec256array[i]));
175 	i++; __asm__ volatile ("vmovaps  %%ymm11, %0" :"=m" (vec256array[i]));
176 	i++; __asm__ volatile ("vmovaps  %%ymm12, %0" :"=m" (vec256array[i]));
177 	i++; __asm__ volatile ("vmovaps  %%ymm13, %0" :"=m" (vec256array[i]));
178 	i++; __asm__ volatile ("vmovaps  %%ymm14, %0" :"=m" (vec256array[i]));
179 	i++; __asm__ volatile ("vmovaps  %%ymm15, %0" :"=m" (vec256array[i]));
180 #endif
181 }
182 
183 static inline void
restore_ymm(VECTOR256 * vec256array)184 restore_ymm(VECTOR256 *vec256array)
185 {
186 	VECTOR256 *p = vec256array;
187 
188 	__asm__ volatile ("vmovaps  %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); p++;
189 	__asm__ volatile ("vmovaps  %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); p++;
190 	__asm__ volatile ("vmovaps  %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); p++;
191 	__asm__ volatile ("vmovaps  %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); p++;
192 	__asm__ volatile ("vmovaps  %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); p++;
193 	__asm__ volatile ("vmovaps  %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); p++;
194 	__asm__ volatile ("vmovaps  %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); p++;
195 	__asm__ volatile ("vmovaps  %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
196 
197 #if defined(__x86_64__)
198 	++p; __asm__ volatile ("vmovaps  %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); p++;
199 	__asm__ volatile ("vmovaps  %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); p++;
200 	__asm__ volatile ("vmovaps  %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); p++;
201 	__asm__ volatile ("vmovaps  %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); p++;
202 	__asm__ volatile ("vmovaps  %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); p++;
203 	__asm__ volatile ("vmovaps  %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); p++;
204 	__asm__ volatile ("vmovaps  %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); p++;
205 	__asm__ volatile ("vmovaps  %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
206 #endif
207 }
208 
209 static inline void
populate_ymm(void)210 populate_ymm(void)
211 {
212 	int j;
213 	uint32_t p[8] VEC256ALIGN;
214 
215 	for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
216 		p[j] = getpid();
217 	}
218 
219 	p[0] = 0x22222222;
220 	p[7] = 0x77777777;
221 	__asm__ volatile ("vmovaps  %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0");
222 	__asm__ volatile ("vmovaps  %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1");
223 	__asm__ volatile ("vmovaps  %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2");
224 	__asm__ volatile ("vmovaps  %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3");
225 
226 	p[0] = 0x44444444;
227 	p[7] = 0xEEEEEEEE;
228 	__asm__ volatile ("vmovaps  %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4");
229 	__asm__ volatile ("vmovaps  %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5");
230 	__asm__ volatile ("vmovaps  %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6");
231 	__asm__ volatile ("vmovaps  %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
232 
233 #if defined(__x86_64__)
234 	p[0] = 0x88888888;
235 	p[7] = 0xAAAAAAAA;
236 	__asm__ volatile ("vmovaps  %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8");
237 	__asm__ volatile ("vmovaps  %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9");
238 	__asm__ volatile ("vmovaps  %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10");
239 	__asm__ volatile ("vmovaps  %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11");
240 
241 	p[0] = 0xBBBBBBBB;
242 	p[7] = 0xCCCCCCCC;
243 	__asm__ volatile ("vmovaps  %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12");
244 	__asm__ volatile ("vmovaps  %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13");
245 	__asm__ volatile ("vmovaps  %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14");
246 	__asm__ volatile ("vmovaps  %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
247 #endif
248 
249 	store_ymm(vec256array0);
250 }
251 
252 void
vec256_to_string(VECTOR256 * vec,char * buf)253 vec256_to_string(VECTOR256 *vec, char *buf)
254 {
255 	unsigned int vec_idx = 0;
256 	unsigned int buf_idx = 0;
257 	int ret = 0;
258 
259 	for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) {
260 		uint64_t a[4];
261 		bcopy(&vec[vec_idx], &a[0], sizeof(a));
262 		ret = sprintf(
263 			buf + buf_idx,
264 			"0x%016llx:%016llx:%016llx:%016llx\n",
265 			a[0], a[1], a[2], a[3]
266 			);
267 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
268 		buf_idx += ret;
269 	}
270 }
271 
272 void
assert_ymm_eq(void * a,void * b,int c)273 assert_ymm_eq(void *a, void *b, int c)
274 {
275 	if (memcmp_unoptimized(a, b, c)) {
276 		vec256_to_string(a, vec_str_buf);
277 		T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
278 		vec256_to_string(b, vec_str_buf);
279 		T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
280 		T_ASSERT_FAIL("vectors not equal");
281 	}
282 }
283 
284 void
check_ymm(void)285 check_ymm(void)
286 {
287 	uint32_t *p = (uint32_t *) &vec256array1[7];
288 	store_ymm(vec256array1);
289 	if (p[0] == STOP_COOKIE_256) {
290 		return;
291 	}
292 	assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0));
293 }
294 
295 static void
copy_ymm_state_to_vector(X86_AVX_STATE_T * sp,VECTOR256 * vp)296 copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp)
297 {
298 	int     i;
299 	struct  __darwin_xmm_reg *xmm  = &sp->__fpu_xmm0;
300 	struct  __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
301 
302 	for (i = 0; i < YMM_MAX; i++) {
303 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
304 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
305 	}
306 }
307 
308 static void
ymm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)309 ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
310 {
311 	ucontext_t *contextp = (ucontext_t *) ctx;
312 	mcontext_t mcontext = contextp->uc_mcontext;
313 	X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs;
314 	uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7;
315 	uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7;
316 
317 	T_LOG("Got SIGALRM");
318 
319 	/* Check for AVX state */
320 	T_QUIET;
321 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size");
322 
323 	/* Check that the state in the context is what's set and expected */
324 	copy_ymm_state_to_vector(avx_state, vec256array3);
325 	assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1));
326 
327 	/* Change the context and break the main loop */
328 	xp[0] = STOP_COOKIE_256;
329 	yp[0] = STOP_COOKIE_256;
330 	checking = FALSE;
331 }
332 
333 kern_return_t
_thread_get_state_avx(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)334 _thread_get_state_avx(
335 	thread_t                thread,
336 	int                     flavor,
337 	thread_state_t          state,          /* pointer to OUT array */
338 	mach_msg_type_number_t  *state_count)   /*IN/OUT*/
339 {
340 	kern_return_t rv;
341 	VECTOR256 ymms[YMM_MAX];
342 
343 	/*
344 	 * We must save and restore the YMMs across thread_get_state() because
345 	 * code in thread_get_state changes at least one xmm register AFTER the
346 	 * thread_get_state has saved the state in userspace.  While it's still
347 	 * possible for something to muck with %xmms BEFORE making the mach
348 	 * system call (and rendering this save/restore useless), that does not
349 	 * currently occur, and since we depend on the avx state saved in the
350 	 * thread_get_state to be the same as that manually copied from YMMs after
351 	 * thread_get_state returns, we have to go through these machinations.
352 	 */
353 	store_ymm(ymms);
354 
355 	rv = thread_get_state(thread, flavor, state, state_count);
356 
357 	restore_ymm(ymms);
358 
359 	return rv;
360 }
361 
362 void
ymm_integrity(int time)363 ymm_integrity(int time)
364 {
365 	mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT;
366 	kern_return_t kret;
367 	X86_AVX_STATE_T avx_state, avx_state2;
368 	mach_port_t ts = mach_thread_self();
369 
370 	bzero(&avx_state, sizeof(avx_state));
371 	bzero(&avx_state2, sizeof(avx_state));
372 
373 	kret = _thread_get_state_avx(
374 		ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
375 		);
376 
377 	store_ymm(vec256array2);
378 
379 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
380 	vec256_to_string(vec256array2, vec_str_buf);
381 	T_LOG("Initial state:\n%s", vec_str_buf);
382 
383 	copy_ymm_state_to_vector(&avx_state, vec256array1);
384 	assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1));
385 
386 	populate_ymm();
387 
388 	kret = _thread_get_state_avx(
389 		ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
390 		);
391 
392 	store_ymm(vec256array2);
393 
394 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
395 	vec256_to_string(vec256array2, vec_str_buf);
396 	T_LOG("Populated state:\n%s", vec_str_buf);
397 
398 	copy_ymm_state_to_vector(&avx_state2, vec256array1);
399 	assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0));
400 
401 	T_LOG("Running for %ds…", time);
402 	start_timer(time, ymm_sigalrm_handler);
403 
404 	/* re-populate because printing mucks up XMMs */
405 	populate_ymm();
406 
407 	/* Check state until timer fires */
408 	while (checking) {
409 		check_ymm();
410 	}
411 
412 	/* Check that the sig handler changed out AVX state */
413 	store_ymm(vec256array1);
414 
415 	uint32_t *p = (uint32_t *) &vec256array1[7];
416 	if (p[0] != STOP_COOKIE_256 ||
417 	    p[4] != STOP_COOKIE_256) {
418 		vec256_to_string(vec256array1, vec_str_buf);
419 		T_ASSERT_FAIL("sigreturn failed to stick");
420 		T_LOG("State:\n%s", vec_str_buf);
421 	}
422 
423 	T_LOG("Ran for %ds", time);
424 	T_PASS("No ymm register corruption occurred");
425 }
426 
427 /*
428  * zmm functions
429  */
430 
431 static inline void
store_opmask(OPMASK k[])432 store_opmask(OPMASK k[])
433 {
434 	__asm__ volatile ("kmovq %%k0, %0" :"=m" (k[0]));
435 	__asm__ volatile ("kmovq %%k1, %0" :"=m" (k[1]));
436 	__asm__ volatile ("kmovq %%k2, %0" :"=m" (k[2]));
437 	__asm__ volatile ("kmovq %%k3, %0" :"=m" (k[3]));
438 	__asm__ volatile ("kmovq %%k4, %0" :"=m" (k[4]));
439 	__asm__ volatile ("kmovq %%k5, %0" :"=m" (k[5]));
440 	__asm__ volatile ("kmovq %%k6, %0" :"=m" (k[6]));
441 	__asm__ volatile ("kmovq %%k7, %0" :"=m" (k[7]));
442 }
443 
444 static inline void
store_zmm(VECTOR512 * vecarray)445 store_zmm(VECTOR512 *vecarray)
446 {
447 	int i = 0;
448 	__asm__ volatile ("vmovaps  %%zmm0, %0" :"=m" (vecarray[i]));
449 	i++; __asm__ volatile ("vmovaps  %%zmm1, %0" :"=m" (vecarray[i]));
450 	i++; __asm__ volatile ("vmovaps  %%zmm2, %0" :"=m" (vecarray[i]));
451 	i++; __asm__ volatile ("vmovaps  %%zmm3, %0" :"=m" (vecarray[i]));
452 	i++; __asm__ volatile ("vmovaps  %%zmm4, %0" :"=m" (vecarray[i]));
453 	i++; __asm__ volatile ("vmovaps  %%zmm5, %0" :"=m" (vecarray[i]));
454 	i++; __asm__ volatile ("vmovaps  %%zmm6, %0" :"=m" (vecarray[i]));
455 	i++; __asm__ volatile ("vmovaps  %%zmm7, %0" :"=m" (vecarray[i]));
456 #if defined(__x86_64__)
457 	i++; __asm__ volatile ("vmovaps  %%zmm8, %0" :"=m" (vecarray[i]));
458 	i++; __asm__ volatile ("vmovaps  %%zmm9, %0" :"=m" (vecarray[i]));
459 	i++; __asm__ volatile ("vmovaps  %%zmm10, %0" :"=m" (vecarray[i]));
460 	i++; __asm__ volatile ("vmovaps  %%zmm11, %0" :"=m" (vecarray[i]));
461 	i++; __asm__ volatile ("vmovaps  %%zmm12, %0" :"=m" (vecarray[i]));
462 	i++; __asm__ volatile ("vmovaps  %%zmm13, %0" :"=m" (vecarray[i]));
463 	i++; __asm__ volatile ("vmovaps  %%zmm14, %0" :"=m" (vecarray[i]));
464 	i++; __asm__ volatile ("vmovaps  %%zmm15, %0" :"=m" (vecarray[i]));
465 	i++; __asm__ volatile ("vmovaps  %%zmm16, %0" :"=m" (vecarray[i]));
466 	i++; __asm__ volatile ("vmovaps  %%zmm17, %0" :"=m" (vecarray[i]));
467 	i++; __asm__ volatile ("vmovaps  %%zmm18, %0" :"=m" (vecarray[i]));
468 	i++; __asm__ volatile ("vmovaps  %%zmm19, %0" :"=m" (vecarray[i]));
469 	i++; __asm__ volatile ("vmovaps  %%zmm20, %0" :"=m" (vecarray[i]));
470 	i++; __asm__ volatile ("vmovaps  %%zmm21, %0" :"=m" (vecarray[i]));
471 	i++; __asm__ volatile ("vmovaps  %%zmm22, %0" :"=m" (vecarray[i]));
472 	i++; __asm__ volatile ("vmovaps  %%zmm23, %0" :"=m" (vecarray[i]));
473 	i++; __asm__ volatile ("vmovaps  %%zmm24, %0" :"=m" (vecarray[i]));
474 	i++; __asm__ volatile ("vmovaps  %%zmm25, %0" :"=m" (vecarray[i]));
475 	i++; __asm__ volatile ("vmovaps  %%zmm26, %0" :"=m" (vecarray[i]));
476 	i++; __asm__ volatile ("vmovaps  %%zmm27, %0" :"=m" (vecarray[i]));
477 	i++; __asm__ volatile ("vmovaps  %%zmm28, %0" :"=m" (vecarray[i]));
478 	i++; __asm__ volatile ("vmovaps  %%zmm29, %0" :"=m" (vecarray[i]));
479 	i++; __asm__ volatile ("vmovaps  %%zmm30, %0" :"=m" (vecarray[i]));
480 	i++; __asm__ volatile ("vmovaps  %%zmm31, %0" :"=m" (vecarray[i]));
481 #endif
482 }
483 
484 static inline void
restore_zmm(VECTOR512 * vecarray)485 restore_zmm(VECTOR512 *vecarray)
486 {
487 	VECTOR512 *p = vecarray;
488 
489 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0"); p++;
490 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1"); p++;
491 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2"); p++;
492 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3"); p++;
493 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4"); p++;
494 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5"); p++;
495 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6"); p++;
496 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
497 
498 #if defined(__x86_64__)
499 	++p; __asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8"); p++;
500 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9"); p++;
501 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10"); p++;
502 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11"); p++;
503 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12"); p++;
504 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13"); p++;
505 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14"); p++;
506 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15"); p++;
507 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16"); p++;
508 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17"); p++;
509 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18"); p++;
510 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19"); p++;
511 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20"); p++;
512 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21"); p++;
513 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22"); p++;
514 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23"); p++;
515 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24"); p++;
516 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25"); p++;
517 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26"); p++;
518 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27"); p++;
519 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28"); p++;
520 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29"); p++;
521 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30"); p++;
522 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
523 #endif
524 }
525 
526 static inline void
zero_opmask(void)527 zero_opmask(void)
528 {
529 	uint64_t zero = 0x0000000000000000ULL;
530 
531 	__asm__ volatile ("kmovq %0, %%k0" : :"m" (zero) : "k0");
532 	__asm__ volatile ("kmovq %0, %%k1" : :"m" (zero) : "k1");
533 	__asm__ volatile ("kmovq %0, %%k2" : :"m" (zero) : "k2");
534 	__asm__ volatile ("kmovq %0, %%k3" : :"m" (zero) : "k3");
535 	__asm__ volatile ("kmovq %0, %%k4" : :"m" (zero) : "k4");
536 	__asm__ volatile ("kmovq %0, %%k5" : :"m" (zero) : "k5");
537 	__asm__ volatile ("kmovq %0, %%k6" : :"m" (zero) : "k6");
538 	__asm__ volatile ("kmovq %0, %%k7" : :"m" (zero) : "k7");
539 	store_opmask(karray0);
540 }
541 
542 static inline void
populate_opmask(void)543 populate_opmask(void)
544 {
545 	uint64_t k[8];
546 
547 	for (int j = 0; j < 8; j++) {
548 		k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j);
549 	}
550 
551 	__asm__ volatile ("kmovq %0, %%k0" : :"m" (k[0]) : "k0");
552 	__asm__ volatile ("kmovq %0, %%k1" : :"m" (k[1]) : "k1");
553 	__asm__ volatile ("kmovq %0, %%k2" : :"m" (k[2]) : "k2");
554 	__asm__ volatile ("kmovq %0, %%k3" : :"m" (k[3]) : "k3");
555 	__asm__ volatile ("kmovq %0, %%k4" : :"m" (k[4]) : "k4");
556 	__asm__ volatile ("kmovq %0, %%k5" : :"m" (k[5]) : "k5");
557 	__asm__ volatile ("kmovq %0, %%k6" : :"m" (k[6]) : "k6");
558 	__asm__ volatile ("kmovq %0, %%k7" : :"m" (k[7]) : "k7");
559 
560 	store_opmask(karray0);
561 }
562 
563 kern_return_t
_thread_get_state_avx512(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)564 _thread_get_state_avx512(
565 	thread_t                thread,
566 	int                     flavor,
567 	thread_state_t          state,          /* pointer to OUT array */
568 	mach_msg_type_number_t  *state_count)   /*IN/OUT*/
569 {
570 	kern_return_t rv;
571 	VECTOR512 zmms[ZMM_MAX];
572 
573 	/*
574 	 * We must save and restore the ZMMs across thread_get_state() because
575 	 * code in thread_get_state changes at least one xmm register AFTER the
576 	 * thread_get_state has saved the state in userspace.  While it's still
577 	 * possible for something to muck with %XMMs BEFORE making the mach
578 	 * system call (and rendering this save/restore useless), that does not
579 	 * currently occur, and since we depend on the avx512 state saved in the
580 	 * thread_get_state to be the same as that manually copied from ZMMs after
581 	 * thread_get_state returns, we have to go through these machinations.
582 	 */
583 	store_zmm(zmms);
584 
585 	rv = thread_get_state(thread, flavor, state, state_count);
586 
587 	restore_zmm(zmms);
588 
589 	return rv;
590 }
591 
592 static inline void
zero_zmm(void)593 zero_zmm(void)
594 {
595 	uint64_t zero[8] VEC512ALIGN = {0};
596 
597 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (zero) : "zmm0");
598 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (zero) : "zmm1");
599 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (zero) : "zmm2");
600 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (zero) : "zmm3");
601 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (zero) : "zmm4");
602 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (zero) : "zmm5");
603 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (zero) : "zmm6");
604 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (zero) : "zmm7");
605 
606 #if defined(__x86_64__)
607 	__asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (zero) : "zmm8");
608 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (zero) : "zmm9");
609 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (zero) : "zmm10");
610 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (zero) : "zmm11");
611 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (zero) : "zmm12");
612 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (zero) : "zmm13");
613 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (zero) : "zmm14");
614 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (zero) : "zmm15");
615 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (zero) : "zmm16");
616 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (zero) : "zmm17");
617 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (zero) : "zmm18");
618 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (zero) : "zmm19");
619 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (zero) : "zmm20");
620 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (zero) : "zmm21");
621 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (zero) : "zmm22");
622 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (zero) : "zmm23");
623 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (zero) : "zmm24");
624 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (zero) : "zmm25");
625 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (zero) : "zmm26");
626 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (zero) : "zmm27");
627 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (zero) : "zmm28");
628 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (zero) : "zmm29");
629 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (zero) : "zmm30");
630 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (zero) : "zmm31");
631 #endif
632 
633 	store_zmm(vec512array0);
634 }
635 
636 static inline void
populate_zmm(void)637 populate_zmm(void)
638 {
639 	int j;
640 	uint64_t p[8] VEC512ALIGN;
641 
642 	for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
643 		p[j] = ((uint64_t) getpid() << 32) + getpid();
644 	}
645 
646 	p[0] = 0x0000000000000000ULL;
647 	p[2] = 0x4444444444444444ULL;
648 	p[4] = 0x8888888888888888ULL;
649 	p[7] = 0xCCCCCCCCCCCCCCCCULL;
650 	__asm__ volatile ("vmovaps  %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0");
651 	__asm__ volatile ("vmovaps  %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1");
652 	__asm__ volatile ("vmovaps  %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2");
653 	__asm__ volatile ("vmovaps  %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3");
654 	__asm__ volatile ("vmovaps  %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4");
655 	__asm__ volatile ("vmovaps  %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5");
656 	__asm__ volatile ("vmovaps  %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6");
657 	__asm__ volatile ("vmovaps  %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
658 
659 #if defined(__x86_64__)
660 	p[0] = 0x1111111111111111ULL;
661 	p[2] = 0x5555555555555555ULL;
662 	p[4] = 0x9999999999999999ULL;
663 	p[7] = 0xDDDDDDDDDDDDDDDDULL;
664 	__asm__ volatile ("vmovaps  %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8");
665 	__asm__ volatile ("vmovaps  %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9");
666 	__asm__ volatile ("vmovaps  %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10");
667 	__asm__ volatile ("vmovaps  %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11");
668 	__asm__ volatile ("vmovaps  %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12");
669 	__asm__ volatile ("vmovaps  %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13");
670 	__asm__ volatile ("vmovaps  %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14");
671 	__asm__ volatile ("vmovaps  %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15");
672 
673 	p[0] = 0x2222222222222222ULL;
674 	p[2] = 0x6666666666666666ULL;
675 	p[4] = 0xAAAAAAAAAAAAAAAAULL;
676 	p[7] = 0xEEEEEEEEEEEEEEEEULL;
677 	__asm__ volatile ("vmovaps  %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16");
678 	__asm__ volatile ("vmovaps  %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17");
679 	__asm__ volatile ("vmovaps  %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18");
680 	__asm__ volatile ("vmovaps  %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19");
681 	__asm__ volatile ("vmovaps  %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20");
682 	__asm__ volatile ("vmovaps  %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21");
683 	__asm__ volatile ("vmovaps  %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22");
684 	__asm__ volatile ("vmovaps  %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23");
685 
686 	p[0] = 0x3333333333333333ULL;
687 	p[2] = 0x7777777777777777ULL;
688 	p[4] = 0xBBBBBBBBBBBBBBBBULL;
689 	p[7] = 0xFFFFFFFFFFFFFFFFULL;
690 	__asm__ volatile ("vmovaps  %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24");
691 	__asm__ volatile ("vmovaps  %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25");
692 	__asm__ volatile ("vmovaps  %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26");
693 	__asm__ volatile ("vmovaps  %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27");
694 	__asm__ volatile ("vmovaps  %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28");
695 	__asm__ volatile ("vmovaps  %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29");
696 	__asm__ volatile ("vmovaps  %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30");
697 	__asm__ volatile ("vmovaps  %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
698 #endif
699 
700 	store_zmm(vec512array0);
701 }
702 
703 void
vec512_to_string(VECTOR512 * vec,char * buf)704 vec512_to_string(VECTOR512 *vec, char *buf)
705 {
706 	unsigned int vec_idx = 0;
707 	unsigned int buf_idx = 0;
708 	int ret = 0;
709 
710 	for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) {
711 		uint64_t a[8];
712 		bcopy(&vec[vec_idx], &a[0], sizeof(a));
713 		ret = sprintf(
714 			buf + buf_idx,
715 			"0x%016llx:%016llx:%016llx:%016llx:"
716 			"%016llx:%016llx:%016llx:%016llx%s",
717 			a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
718 			vec_idx < ZMM_MAX - 1 ? "\n" : ""
719 			);
720 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
721 		buf_idx += ret;
722 	}
723 }
724 
725 void
opmask_to_string(OPMASK * karray,char * buf)726 opmask_to_string(OPMASK *karray, char *buf)
727 {
728 	unsigned int karray_idx = 0;
729 	unsigned int buf_idx = 0;
730 	int ret = 0;
731 
732 	for (karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) {
733 		ret = sprintf(
734 			buf + buf_idx,
735 			"k%d: 0x%016llx%s",
736 			karray_idx, karray[karray_idx],
737 			karray_idx < KARRAY_MAX ? "\n" : ""
738 			);
739 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
740 		buf_idx += ret;
741 	}
742 }
743 
744 static void
assert_zmm_eq(void * a,void * b,int c)745 assert_zmm_eq(void *a, void *b, int c)
746 {
747 	if (memcmp_unoptimized(a, b, c)) {
748 		vec512_to_string(a, vec_str_buf);
749 		T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
750 		vec512_to_string(b, vec_str_buf);
751 		T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
752 		T_ASSERT_FAIL("Vectors not equal");
753 	}
754 }
755 
756 static void
assert_opmask_eq(OPMASK * a,OPMASK * b)757 assert_opmask_eq(OPMASK *a, OPMASK *b)
758 {
759 	for (int i = 0; i < KARRAY_MAX; i++) {
760 		if (a[i] != b[i]) {
761 			opmask_to_string(a, karray_str_buf);
762 			T_LOG("Compare failed, opmask A:\n%s", karray_str_buf);
763 			opmask_to_string(b, karray_str_buf);
764 			T_LOG("Compare failed, opmask B:\n%s", karray_str_buf);
765 			T_ASSERT_FAIL("opmasks not equal");
766 		}
767 	}
768 }
769 
770 void
check_zmm(boolean_t check_cookie)771 check_zmm(boolean_t check_cookie)
772 {
773 	uint64_t *p = (uint64_t *) &vec512array1[7];
774 	store_opmask(karray1);
775 	store_zmm(vec512array1);
776 	if (check_cookie && p[0] == STOP_COOKIE_512) {
777 		return;
778 	}
779 
780 	assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0));
781 	assert_opmask_eq(karray0, karray1);
782 }
783 
784 static void
copy_state_to_opmask(X86_AVX512_STATE_T * sp,OPMASK * op)785 copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op)
786 {
787 	OPMASK *k = (OPMASK *) &sp->__fpu_k0;
788 	for (int i = 0; i < KARRAY_MAX; i++) {
789 		bcopy(&k[i], &op[i], sizeof(*op));
790 	}
791 }
792 
793 static void
copy_zmm_state_to_vector(X86_AVX512_STATE_T * sp,VECTOR512 * vp)794 copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp)
795 {
796 	int     i;
797 	struct  __darwin_xmm_reg *xmm  = &sp->__fpu_xmm0;
798 	struct  __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
799 	struct  __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0;
800 #if defined(__x86_64__)
801 	struct  __darwin_zmm_reg *zmm  = &sp->__fpu_zmm16;
802 
803 	for (i = 0; i < ZMM_MAX / 2; i++) {
804 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
805 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
806 		bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
807 		bcopy(&zmm[i], &vp[(ZMM_MAX / 2) + i], sizeof(*zmm));
808 	}
809 #else
810 	for (i = 0; i < ZMM_MAX; i++) {
811 		bcopy(&xmm[i], &vp[i], sizeof(*xmm));
812 		bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
813 		bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
814 	}
815 #endif
816 }
817 
818 static void
zmm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)819 zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
820 {
821 	ucontext_t *contextp = (ucontext_t *) ctx;
822 	mcontext_t mcontext = contextp->uc_mcontext;
823 	X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
824 	uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
825 	uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
826 	uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
827 	uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
828 
829 	/* Check for AVX512 state */
830 	T_QUIET;
831 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
832 
833 	/* Check that the state in the context is what's set and expected */
834 	copy_zmm_state_to_vector(avx_state, vec512array3);
835 	assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
836 	copy_state_to_opmask(avx_state, karray3);
837 	assert_opmask_eq(karray3, karray0);
838 
839 	/* Change the context and break the main loop */
840 	xp[0] = STOP_COOKIE_512;
841 	yp[0] = STOP_COOKIE_512;
842 	zp[0] = STOP_COOKIE_512;
843 	kp[7] = STOP_COOKIE_512;
844 	checking = FALSE;
845 }
846 
847 static void
zmm_sigalrm_handler_no_mod(int signum __unused,siginfo_t * info __unused,void * ctx)848 zmm_sigalrm_handler_no_mod(int signum __unused, siginfo_t *info __unused, void *ctx)
849 {
850 	ucontext_t *contextp = (ucontext_t *) ctx;
851 	mcontext_t mcontext = contextp->uc_mcontext;
852 	X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
853 	uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
854 	uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
855 	uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
856 	uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
857 
858 	/* Check for AVX512 state */
859 	T_QUIET;
860 	T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
861 
862 	/* Check that the state in the context is what's set and expected */
863 	copy_zmm_state_to_vector(avx_state, vec512array3);
864 	assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
865 	copy_state_to_opmask(avx_state, karray3);
866 	assert_opmask_eq(karray3, karray0);
867 
868 	/* Change the context and break the main loop */
869 	checking = FALSE;
870 }
871 
872 
873 void
zmm_integrity(int time)874 zmm_integrity(int time)
875 {
876 	mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT;
877 	kern_return_t kret;
878 	X86_AVX512_STATE_T avx_state, avx_state2;
879 	mach_port_t ts = mach_thread_self();
880 
881 	bzero(&avx_state, sizeof(avx_state));
882 	bzero(&avx_state2, sizeof(avx_state));
883 
884 	store_zmm(vec512array2);
885 	store_opmask(karray2);
886 
887 	kret = _thread_get_state_avx512(
888 		ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
889 		);
890 
891 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
892 	vec512_to_string(vec512array2, vec_str_buf);
893 	opmask_to_string(karray2, karray_str_buf);
894 	T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf);
895 
896 	copy_zmm_state_to_vector(&avx_state, vec512array1);
897 	assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
898 	copy_state_to_opmask(&avx_state, karray1);
899 	assert_opmask_eq(karray2, karray1);
900 
901 	populate_zmm();
902 	populate_opmask();
903 
904 	kret = _thread_get_state_avx512(
905 		ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
906 		);
907 
908 	store_zmm(vec512array2);
909 	store_opmask(karray2);
910 
911 	T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
912 	vec512_to_string(vec512array2, vec_str_buf);
913 	opmask_to_string(karray2, karray_str_buf);
914 	T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf);
915 
916 	copy_zmm_state_to_vector(&avx_state2, vec512array1);
917 	assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
918 	copy_state_to_opmask(&avx_state2, karray1);
919 	assert_opmask_eq(karray2, karray1);
920 
921 	T_LOG("Running for %ds…", time);
922 	start_timer(time, zmm_sigalrm_handler);
923 
924 	/* re-populate because printing mucks up XMMs */
925 	populate_zmm();
926 	populate_opmask();
927 
928 	/* Check state until timer fires */
929 	while (checking) {
930 		check_zmm(TRUE);
931 	}
932 
933 	/* Check that the sig handler changed our AVX state */
934 	store_zmm(vec512array1);
935 	store_opmask(karray1);
936 
937 	uint64_t *p = (uint64_t *) &vec512array1[7];
938 	if (p[0] != STOP_COOKIE_512 ||
939 	    p[2] != STOP_COOKIE_512 ||
940 	    p[4] != STOP_COOKIE_512 ||
941 	    karray1[7] != STOP_COOKIE_512) {
942 		vec512_to_string(vec512array1, vec_str_buf);
943 		opmask_to_string(karray1, karray_str_buf);
944 		T_ASSERT_FAIL("sigreturn failed to stick");
945 		T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf);
946 	}
947 
948 	T_LOG("Ran for %ds", time);
949 	T_PASS("No zmm register corruption occurred");
950 }
951 
952 void
zmm_zeroing_optimization_integrity(int time)953 zmm_zeroing_optimization_integrity(int time)
954 {
955 	/*
956 	 * Check ZMM zero and OpMask zero
957 	 */
958 	T_LOG("Checking ZMM zero and OpMask zero");
959 	checking = true;
960 	zero_zmm();
961 	zero_opmask();
962 
963 	T_LOG("Running for %ds…", time);
964 	start_timer(time, zmm_sigalrm_handler_no_mod);
965 
966 	/* re-populate because printing mucks up XMMs */
967 	zero_zmm();
968 	zero_opmask();
969 
970 	/* Check state until timer fires */
971 	while (checking) {
972 		check_zmm(FALSE);
973 	}
974 
975 	/* Check that sig handler did not changed our AVX state */
976 	store_zmm(vec512array2);
977 	store_opmask(karray2);
978 
979 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
980 	assert_opmask_eq(karray0, karray2);
981 
982 	T_LOG("Ran for %ds", time);
983 	T_PASS("ZMM zero and OpMask zero");
984 
985 
986 	/*
987 	 * Check ZMM zero and OpMask non-zero
988 	 */
989 	T_LOG("Checking ZMM zero and OpMask non-zero");
990 	checking = true;
991 	zero_zmm();
992 	populate_opmask();
993 
994 	T_LOG("Running for %ds…", time);
995 	start_timer(time, zmm_sigalrm_handler_no_mod);
996 
997 	/* re-populate because printing mucks up XMMs */
998 	zero_zmm();
999 	populate_opmask();
1000 
1001 	/* Check state until timer fires */
1002 	while (checking) {
1003 		check_zmm(FALSE);
1004 	}
1005 
1006 	/* Check that sig handler did not changed our AVX state */
1007 	store_zmm(vec512array2);
1008 	store_opmask(karray2);
1009 
1010 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1011 	assert_opmask_eq(karray0, karray2);
1012 
1013 	T_LOG("Ran for %ds", time);
1014 	T_PASS("ZMM zero and OpMask non-zero");
1015 
1016 
1017 	/*
1018 	 * Check ZMM non-zero and OpMask zero
1019 	 */
1020 	T_LOG("Checking ZMM non-zero and OpMask zero");
1021 	checking = true;
1022 	populate_zmm();
1023 	zero_opmask();
1024 
1025 	T_LOG("Running for %ds…", time);
1026 	start_timer(time, zmm_sigalrm_handler_no_mod);
1027 
1028 	/* re-populate because printing mucks up XMMs */
1029 	populate_zmm();
1030 	zero_opmask();
1031 
1032 	/* Check state until timer fires */
1033 	while (checking) {
1034 		check_zmm(FALSE);
1035 	}
1036 
1037 	/* Check that sig handler did not changed our AVX state */
1038 	store_zmm(vec512array2);
1039 	store_opmask(karray2);
1040 
1041 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1042 	assert_opmask_eq(karray0, karray2);
1043 
1044 	T_LOG("Ran for %ds", time);
1045 	T_PASS("ZMM non-zero and OpMask zero");
1046 
1047 
1048 	/*
1049 	 * Check ZMM non-zero and OpMask non-zero
1050 	 */
1051 	T_LOG("Checking ZMM non-zero and OpMask non-zero");
1052 	checking = true;
1053 	populate_zmm();
1054 	populate_opmask();
1055 
1056 	T_LOG("Running for %ds…", time);
1057 	start_timer(time, zmm_sigalrm_handler_no_mod);
1058 
1059 	/* re-populate because printing mucks up XMMs */
1060 	populate_zmm();
1061 	populate_opmask();
1062 
1063 	/* Check state until timer fires */
1064 	while (checking) {
1065 		check_zmm(FALSE);
1066 	}
1067 
1068 	/* Check that sig handler did not changed our AVX state */
1069 	store_zmm(vec512array2);
1070 	store_opmask(karray2);
1071 
1072 	assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1073 	assert_opmask_eq(karray0, karray2);
1074 
1075 	T_LOG("Ran for %ds", time);
1076 	T_PASS("ZMM non-zero and OpMask non-zero");
1077 }
1078 
1079 /*
1080  * Main test declarations
1081  */
1082 T_DECL(ymm_integrity,
1083     "Quick soak test to verify that AVX "
1084     "register state is maintained correctly",
1085     T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1086 	require_avx();
1087 	ymm_integrity(NORMAL_RUN_TIME);
1088 }
1089 
1090 T_DECL(ymm_integrity_stress,
1091     "Extended soak test to verify that AVX "
1092     "register state is maintained correctly",
1093     T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1094     T_META_ENABLED(false)) {
1095 	require_avx();
1096 	ymm_integrity(LONG_RUN_TIME);
1097 }
1098 
1099 T_DECL(zmm_integrity,
1100     "Quick soak test to verify that AVX-512 "
1101     "register state is maintained correctly",
1102     T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1103 	require_avx512();
1104 	zmm_integrity(NORMAL_RUN_TIME);
1105 }
1106 
1107 T_DECL(zmm_integrity_stress,
1108     "Extended soak test to verify that AVX-512 "
1109     "register state is maintained correctly",
1110     T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1111     T_META_ENABLED(false)) {
1112 	require_avx512();
1113 	zmm_integrity(LONG_RUN_TIME);
1114 }
1115 
1116 T_DECL(zmm_zeroing_optimization_integrity,
1117     "Quick soak test to verify AVX-512 "
1118     "register state is maintained with "
1119     "zeroing optimizations enabled",
1120     T_META_TIMEOUT(QUICK_RUN_TIME + TIMEOUT_OVERHEAD)) {
1121 	require_avx512();
1122 	zmm_zeroing_optimization_integrity(QUICK_RUN_TIME);
1123 }
1124