1*4d495c6eSApple OSS Distributions #ifdef T_NAMESPACE
2*4d495c6eSApple OSS Distributions #undef T_NAMESPACE
3*4d495c6eSApple OSS Distributions #endif
4*4d495c6eSApple OSS Distributions
5*4d495c6eSApple OSS Distributions #include <darwintest.h>
6*4d495c6eSApple OSS Distributions #include <unistd.h>
7*4d495c6eSApple OSS Distributions #include <signal.h>
8*4d495c6eSApple OSS Distributions #include <sys/time.h>
9*4d495c6eSApple OSS Distributions #include <sys/mman.h>
10*4d495c6eSApple OSS Distributions #include <immintrin.h>
11*4d495c6eSApple OSS Distributions #include <mach/mach.h>
12*4d495c6eSApple OSS Distributions #include <stdio.h>
13*4d495c6eSApple OSS Distributions #include <string.h>
14*4d495c6eSApple OSS Distributions #include <err.h>
15*4d495c6eSApple OSS Distributions #include <i386/cpu_capabilities.h>
16*4d495c6eSApple OSS Distributions
17*4d495c6eSApple OSS Distributions T_GLOBAL_META(
18*4d495c6eSApple OSS Distributions T_META_NAMESPACE("xnu.intel"),
19*4d495c6eSApple OSS Distributions T_META_CHECK_LEAKS(false),
20*4d495c6eSApple OSS Distributions T_META_RADAR_COMPONENT_NAME("xnu"),
21*4d495c6eSApple OSS Distributions T_META_RADAR_COMPONENT_VERSION("intel"),
22*4d495c6eSApple OSS Distributions T_META_OWNER("seth_goldberg"),
23*4d495c6eSApple OSS Distributions T_META_RUN_CONCURRENTLY(true)
24*4d495c6eSApple OSS Distributions );
25*4d495c6eSApple OSS Distributions
26*4d495c6eSApple OSS Distributions #define QUICK_RUN_TIME (2)
27*4d495c6eSApple OSS Distributions #define NORMAL_RUN_TIME (10)
28*4d495c6eSApple OSS Distributions #define LONG_RUN_TIME (10*60)
29*4d495c6eSApple OSS Distributions #define TIMEOUT_OVERHEAD (10)
30*4d495c6eSApple OSS Distributions
31*4d495c6eSApple OSS Distributions volatile boolean_t checking = true;
32*4d495c6eSApple OSS Distributions char vec_str_buf[8196];
33*4d495c6eSApple OSS Distributions char karray_str_buf[1024];
34*4d495c6eSApple OSS Distributions
35*4d495c6eSApple OSS Distributions /*
36*4d495c6eSApple OSS Distributions * ymm defines/globals/prototypes
37*4d495c6eSApple OSS Distributions */
38*4d495c6eSApple OSS Distributions #define STOP_COOKIE_256 0x01234567
39*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
40*4d495c6eSApple OSS Distributions #define YMM_MAX 16
41*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_T x86_avx_state64_t
42*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT
43*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64
44*4d495c6eSApple OSS Distributions #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64)
45*4d495c6eSApple OSS Distributions #else
46*4d495c6eSApple OSS Distributions #define YMM_MAX 8
47*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_T x86_avx_state32_t
48*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT
49*4d495c6eSApple OSS Distributions #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32
50*4d495c6eSApple OSS Distributions #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32)
51*4d495c6eSApple OSS Distributions #endif
52*4d495c6eSApple OSS Distributions #define VECTOR256 __m256
53*4d495c6eSApple OSS Distributions #define VEC256ALIGN __attribute ((aligned(32)))
54*4d495c6eSApple OSS Distributions static inline void populate_ymm(void);
55*4d495c6eSApple OSS Distributions static inline void check_ymm(void);
56*4d495c6eSApple OSS Distributions VECTOR256 vec256array0[YMM_MAX] VEC256ALIGN;
57*4d495c6eSApple OSS Distributions VECTOR256 vec256array1[YMM_MAX] VEC256ALIGN;
58*4d495c6eSApple OSS Distributions VECTOR256 vec256array2[YMM_MAX] VEC256ALIGN;
59*4d495c6eSApple OSS Distributions VECTOR256 vec256array3[YMM_MAX] VEC256ALIGN;
60*4d495c6eSApple OSS Distributions
61*4d495c6eSApple OSS Distributions /*
62*4d495c6eSApple OSS Distributions * zmm defines/globals/prototypes
63*4d495c6eSApple OSS Distributions */
64*4d495c6eSApple OSS Distributions #define STOP_COOKIE_512 0x0123456789abcdefULL
65*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
66*4d495c6eSApple OSS Distributions #define ZMM_MAX 32
67*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_T x86_avx512_state64_t
68*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT
69*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
70*4d495c6eSApple OSS Distributions #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64)
71*4d495c6eSApple OSS Distributions #else
72*4d495c6eSApple OSS Distributions #define ZMM_MAX 8
73*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_T x86_avx512_state32_t
74*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT
75*4d495c6eSApple OSS Distributions #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
76*4d495c6eSApple OSS Distributions #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32)
77*4d495c6eSApple OSS Distributions #endif
78*4d495c6eSApple OSS Distributions #define VECTOR512 __m512
79*4d495c6eSApple OSS Distributions #define VEC512ALIGN __attribute ((aligned(64)))
80*4d495c6eSApple OSS Distributions #define OPMASK uint64_t
81*4d495c6eSApple OSS Distributions #define KARRAY_MAX 8
82*4d495c6eSApple OSS Distributions static inline void zero_zmm(void);
83*4d495c6eSApple OSS Distributions static inline void zero_opmask(void);
84*4d495c6eSApple OSS Distributions static inline void populate_zmm(void);
85*4d495c6eSApple OSS Distributions static inline void populate_opmask(void);
86*4d495c6eSApple OSS Distributions static inline void check_zmm(boolean_t check_cookie);
87*4d495c6eSApple OSS Distributions VECTOR512 vec512array0[ZMM_MAX] VEC512ALIGN;
88*4d495c6eSApple OSS Distributions VECTOR512 vec512array1[ZMM_MAX] VEC512ALIGN;
89*4d495c6eSApple OSS Distributions VECTOR512 vec512array2[ZMM_MAX] VEC512ALIGN;
90*4d495c6eSApple OSS Distributions VECTOR512 vec512array3[ZMM_MAX] VEC512ALIGN;
91*4d495c6eSApple OSS Distributions OPMASK karray0[8];
92*4d495c6eSApple OSS Distributions OPMASK karray1[8];
93*4d495c6eSApple OSS Distributions OPMASK karray2[8];
94*4d495c6eSApple OSS Distributions OPMASK karray3[8];
95*4d495c6eSApple OSS Distributions
96*4d495c6eSApple OSS Distributions kern_return_t _thread_get_state_avx(thread_t thread, int flavor, thread_state_t state,
97*4d495c6eSApple OSS Distributions mach_msg_type_number_t *state_count);
98*4d495c6eSApple OSS Distributions kern_return_t _thread_get_state_avx512(thread_t thread, int flavor, thread_state_t state,
99*4d495c6eSApple OSS Distributions mach_msg_type_number_t *state_count);
100*4d495c6eSApple OSS Distributions
101*4d495c6eSApple OSS Distributions /*
102*4d495c6eSApple OSS Distributions * Common functions
103*4d495c6eSApple OSS Distributions */
104*4d495c6eSApple OSS Distributions
105*4d495c6eSApple OSS Distributions int
memcmp_unoptimized(const void * s1,const void * s2,size_t n)106*4d495c6eSApple OSS Distributions memcmp_unoptimized(const void *s1, const void *s2, size_t n)
107*4d495c6eSApple OSS Distributions {
108*4d495c6eSApple OSS Distributions if (n != 0) {
109*4d495c6eSApple OSS Distributions const unsigned char *p1 = s1, *p2 = s2;
110*4d495c6eSApple OSS Distributions do {
111*4d495c6eSApple OSS Distributions if (*p1++ != *p2++) {
112*4d495c6eSApple OSS Distributions return *--p1 - *--p2;
113*4d495c6eSApple OSS Distributions }
114*4d495c6eSApple OSS Distributions } while (--n != 0);
115*4d495c6eSApple OSS Distributions }
116*4d495c6eSApple OSS Distributions return 0;
117*4d495c6eSApple OSS Distributions }
118*4d495c6eSApple OSS Distributions
119*4d495c6eSApple OSS Distributions void
start_timer(int seconds,void (* handler)(int,siginfo_t *,void *))120*4d495c6eSApple OSS Distributions start_timer(int seconds, void (*handler)(int, siginfo_t *, void *))
121*4d495c6eSApple OSS Distributions {
122*4d495c6eSApple OSS Distributions struct sigaction sigalrm_action = {
123*4d495c6eSApple OSS Distributions .sa_sigaction = handler,
124*4d495c6eSApple OSS Distributions .sa_flags = SA_RESTART,
125*4d495c6eSApple OSS Distributions .sa_mask = 0
126*4d495c6eSApple OSS Distributions };
127*4d495c6eSApple OSS Distributions struct itimerval timer = {
128*4d495c6eSApple OSS Distributions .it_value.tv_sec = seconds,
129*4d495c6eSApple OSS Distributions .it_value.tv_usec = 0,
130*4d495c6eSApple OSS Distributions .it_interval.tv_sec = 0,
131*4d495c6eSApple OSS Distributions .it_interval.tv_usec = 0
132*4d495c6eSApple OSS Distributions };
133*4d495c6eSApple OSS Distributions T_QUIET; T_WITH_ERRNO;
134*4d495c6eSApple OSS Distributions T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL);
135*4d495c6eSApple OSS Distributions T_QUIET; T_WITH_ERRNO;
136*4d495c6eSApple OSS Distributions T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL);
137*4d495c6eSApple OSS Distributions }
138*4d495c6eSApple OSS Distributions
139*4d495c6eSApple OSS Distributions void
require_avx(void)140*4d495c6eSApple OSS Distributions require_avx(void)
141*4d495c6eSApple OSS Distributions {
142*4d495c6eSApple OSS Distributions if ((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) {
143*4d495c6eSApple OSS Distributions T_SKIP("AVX not supported on this system");
144*4d495c6eSApple OSS Distributions }
145*4d495c6eSApple OSS Distributions }
146*4d495c6eSApple OSS Distributions
147*4d495c6eSApple OSS Distributions void
require_avx512(void)148*4d495c6eSApple OSS Distributions require_avx512(void)
149*4d495c6eSApple OSS Distributions {
150*4d495c6eSApple OSS Distributions if ((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) {
151*4d495c6eSApple OSS Distributions T_SKIP("AVX-512 not supported on this system");
152*4d495c6eSApple OSS Distributions }
153*4d495c6eSApple OSS Distributions }
154*4d495c6eSApple OSS Distributions
155*4d495c6eSApple OSS Distributions /*
156*4d495c6eSApple OSS Distributions * ymm functions
157*4d495c6eSApple OSS Distributions */
158*4d495c6eSApple OSS Distributions
159*4d495c6eSApple OSS Distributions static inline void
store_ymm(VECTOR256 * vec256array)160*4d495c6eSApple OSS Distributions store_ymm(VECTOR256 *vec256array)
161*4d495c6eSApple OSS Distributions {
162*4d495c6eSApple OSS Distributions int i = 0;
163*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %%ymm0, %0" :"=m" (vec256array[i]));
164*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm1, %0" :"=m" (vec256array[i]));
165*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm2, %0" :"=m" (vec256array[i]));
166*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm3, %0" :"=m" (vec256array[i]));
167*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm4, %0" :"=m" (vec256array[i]));
168*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm5, %0" :"=m" (vec256array[i]));
169*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm6, %0" :"=m" (vec256array[i]));
170*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm7, %0" :"=m" (vec256array[i]));
171*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
172*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm8, %0" :"=m" (vec256array[i]));
173*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm9, %0" :"=m" (vec256array[i]));
174*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm10, %0" :"=m" (vec256array[i]));
175*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm11, %0" :"=m" (vec256array[i]));
176*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm12, %0" :"=m" (vec256array[i]));
177*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm13, %0" :"=m" (vec256array[i]));
178*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm14, %0" :"=m" (vec256array[i]));
179*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%ymm15, %0" :"=m" (vec256array[i]));
180*4d495c6eSApple OSS Distributions #endif
181*4d495c6eSApple OSS Distributions }
182*4d495c6eSApple OSS Distributions
183*4d495c6eSApple OSS Distributions static inline void
restore_ymm(VECTOR256 * vec256array)184*4d495c6eSApple OSS Distributions restore_ymm(VECTOR256 *vec256array)
185*4d495c6eSApple OSS Distributions {
186*4d495c6eSApple OSS Distributions VECTOR256 *p = vec256array;
187*4d495c6eSApple OSS Distributions
188*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); p++;
189*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); p++;
190*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); p++;
191*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); p++;
192*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); p++;
193*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); p++;
194*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); p++;
195*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
196*4d495c6eSApple OSS Distributions
197*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
198*4d495c6eSApple OSS Distributions ++p; __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); p++;
199*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); p++;
200*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); p++;
201*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); p++;
202*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); p++;
203*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); p++;
204*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); p++;
205*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
206*4d495c6eSApple OSS Distributions #endif
207*4d495c6eSApple OSS Distributions }
208*4d495c6eSApple OSS Distributions
209*4d495c6eSApple OSS Distributions static inline void
populate_ymm(void)210*4d495c6eSApple OSS Distributions populate_ymm(void)
211*4d495c6eSApple OSS Distributions {
212*4d495c6eSApple OSS Distributions int j;
213*4d495c6eSApple OSS Distributions uint32_t p[8] VEC256ALIGN;
214*4d495c6eSApple OSS Distributions
215*4d495c6eSApple OSS Distributions for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
216*4d495c6eSApple OSS Distributions p[j] = getpid();
217*4d495c6eSApple OSS Distributions }
218*4d495c6eSApple OSS Distributions
219*4d495c6eSApple OSS Distributions p[0] = 0x22222222;
220*4d495c6eSApple OSS Distributions p[7] = 0x77777777;
221*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0");
222*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1");
223*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2");
224*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3");
225*4d495c6eSApple OSS Distributions
226*4d495c6eSApple OSS Distributions p[0] = 0x44444444;
227*4d495c6eSApple OSS Distributions p[7] = 0xEEEEEEEE;
228*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4");
229*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5");
230*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6");
231*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
232*4d495c6eSApple OSS Distributions
233*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
234*4d495c6eSApple OSS Distributions p[0] = 0x88888888;
235*4d495c6eSApple OSS Distributions p[7] = 0xAAAAAAAA;
236*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8");
237*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9");
238*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10");
239*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11");
240*4d495c6eSApple OSS Distributions
241*4d495c6eSApple OSS Distributions p[0] = 0xBBBBBBBB;
242*4d495c6eSApple OSS Distributions p[7] = 0xCCCCCCCC;
243*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12");
244*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13");
245*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14");
246*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
247*4d495c6eSApple OSS Distributions #endif
248*4d495c6eSApple OSS Distributions
249*4d495c6eSApple OSS Distributions store_ymm(vec256array0);
250*4d495c6eSApple OSS Distributions }
251*4d495c6eSApple OSS Distributions
252*4d495c6eSApple OSS Distributions void
vec256_to_string(VECTOR256 * vec,char * buf)253*4d495c6eSApple OSS Distributions vec256_to_string(VECTOR256 *vec, char *buf)
254*4d495c6eSApple OSS Distributions {
255*4d495c6eSApple OSS Distributions unsigned int vec_idx = 0;
256*4d495c6eSApple OSS Distributions unsigned int buf_idx = 0;
257*4d495c6eSApple OSS Distributions int ret = 0;
258*4d495c6eSApple OSS Distributions
259*4d495c6eSApple OSS Distributions for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) {
260*4d495c6eSApple OSS Distributions uint64_t a[4];
261*4d495c6eSApple OSS Distributions bcopy(&vec[vec_idx], &a[0], sizeof(a));
262*4d495c6eSApple OSS Distributions ret = sprintf(
263*4d495c6eSApple OSS Distributions buf + buf_idx,
264*4d495c6eSApple OSS Distributions "0x%016llx:%016llx:%016llx:%016llx\n",
265*4d495c6eSApple OSS Distributions a[0], a[1], a[2], a[3]
266*4d495c6eSApple OSS Distributions );
267*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
268*4d495c6eSApple OSS Distributions buf_idx += ret;
269*4d495c6eSApple OSS Distributions }
270*4d495c6eSApple OSS Distributions }
271*4d495c6eSApple OSS Distributions
272*4d495c6eSApple OSS Distributions void
assert_ymm_eq(void * a,void * b,int c)273*4d495c6eSApple OSS Distributions assert_ymm_eq(void *a, void *b, int c)
274*4d495c6eSApple OSS Distributions {
275*4d495c6eSApple OSS Distributions if (memcmp_unoptimized(a, b, c)) {
276*4d495c6eSApple OSS Distributions vec256_to_string(a, vec_str_buf);
277*4d495c6eSApple OSS Distributions T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
278*4d495c6eSApple OSS Distributions vec256_to_string(b, vec_str_buf);
279*4d495c6eSApple OSS Distributions T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
280*4d495c6eSApple OSS Distributions T_ASSERT_FAIL("vectors not equal");
281*4d495c6eSApple OSS Distributions }
282*4d495c6eSApple OSS Distributions }
283*4d495c6eSApple OSS Distributions
284*4d495c6eSApple OSS Distributions void
check_ymm(void)285*4d495c6eSApple OSS Distributions check_ymm(void)
286*4d495c6eSApple OSS Distributions {
287*4d495c6eSApple OSS Distributions uint32_t *p = (uint32_t *) &vec256array1[7];
288*4d495c6eSApple OSS Distributions store_ymm(vec256array1);
289*4d495c6eSApple OSS Distributions if (p[0] == STOP_COOKIE_256) {
290*4d495c6eSApple OSS Distributions return;
291*4d495c6eSApple OSS Distributions }
292*4d495c6eSApple OSS Distributions assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0));
293*4d495c6eSApple OSS Distributions }
294*4d495c6eSApple OSS Distributions
295*4d495c6eSApple OSS Distributions static void
copy_ymm_state_to_vector(X86_AVX_STATE_T * sp,VECTOR256 * vp)296*4d495c6eSApple OSS Distributions copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp)
297*4d495c6eSApple OSS Distributions {
298*4d495c6eSApple OSS Distributions int i;
299*4d495c6eSApple OSS Distributions struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
300*4d495c6eSApple OSS Distributions struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
301*4d495c6eSApple OSS Distributions
302*4d495c6eSApple OSS Distributions for (i = 0; i < YMM_MAX; i++) {
303*4d495c6eSApple OSS Distributions bcopy(&xmm[i], &vp[i], sizeof(*xmm));
304*4d495c6eSApple OSS Distributions bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
305*4d495c6eSApple OSS Distributions }
306*4d495c6eSApple OSS Distributions }
307*4d495c6eSApple OSS Distributions
308*4d495c6eSApple OSS Distributions static void
ymm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)309*4d495c6eSApple OSS Distributions ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
310*4d495c6eSApple OSS Distributions {
311*4d495c6eSApple OSS Distributions ucontext_t *contextp = (ucontext_t *) ctx;
312*4d495c6eSApple OSS Distributions mcontext_t mcontext = contextp->uc_mcontext;
313*4d495c6eSApple OSS Distributions X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs;
314*4d495c6eSApple OSS Distributions uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7;
315*4d495c6eSApple OSS Distributions uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7;
316*4d495c6eSApple OSS Distributions
317*4d495c6eSApple OSS Distributions T_LOG("Got SIGALRM");
318*4d495c6eSApple OSS Distributions
319*4d495c6eSApple OSS Distributions /* Check for AVX state */
320*4d495c6eSApple OSS Distributions T_QUIET;
321*4d495c6eSApple OSS Distributions T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size");
322*4d495c6eSApple OSS Distributions
323*4d495c6eSApple OSS Distributions /* Check that the state in the context is what's set and expected */
324*4d495c6eSApple OSS Distributions copy_ymm_state_to_vector(avx_state, vec256array3);
325*4d495c6eSApple OSS Distributions assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1));
326*4d495c6eSApple OSS Distributions
327*4d495c6eSApple OSS Distributions /* Change the context and break the main loop */
328*4d495c6eSApple OSS Distributions xp[0] = STOP_COOKIE_256;
329*4d495c6eSApple OSS Distributions yp[0] = STOP_COOKIE_256;
330*4d495c6eSApple OSS Distributions checking = FALSE;
331*4d495c6eSApple OSS Distributions }
332*4d495c6eSApple OSS Distributions
333*4d495c6eSApple OSS Distributions kern_return_t
_thread_get_state_avx(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)334*4d495c6eSApple OSS Distributions _thread_get_state_avx(
335*4d495c6eSApple OSS Distributions thread_t thread,
336*4d495c6eSApple OSS Distributions int flavor,
337*4d495c6eSApple OSS Distributions thread_state_t state, /* pointer to OUT array */
338*4d495c6eSApple OSS Distributions mach_msg_type_number_t *state_count) /*IN/OUT*/
339*4d495c6eSApple OSS Distributions {
340*4d495c6eSApple OSS Distributions kern_return_t rv;
341*4d495c6eSApple OSS Distributions VECTOR256 ymms[YMM_MAX];
342*4d495c6eSApple OSS Distributions
343*4d495c6eSApple OSS Distributions /*
344*4d495c6eSApple OSS Distributions * We must save and restore the YMMs across thread_get_state() because
345*4d495c6eSApple OSS Distributions * code in thread_get_state changes at least one xmm register AFTER the
346*4d495c6eSApple OSS Distributions * thread_get_state has saved the state in userspace. While it's still
347*4d495c6eSApple OSS Distributions * possible for something to muck with %xmms BEFORE making the mach
348*4d495c6eSApple OSS Distributions * system call (and rendering this save/restore useless), that does not
349*4d495c6eSApple OSS Distributions * currently occur, and since we depend on the avx state saved in the
350*4d495c6eSApple OSS Distributions * thread_get_state to be the same as that manually copied from YMMs after
351*4d495c6eSApple OSS Distributions * thread_get_state returns, we have to go through these machinations.
352*4d495c6eSApple OSS Distributions */
353*4d495c6eSApple OSS Distributions store_ymm(ymms);
354*4d495c6eSApple OSS Distributions
355*4d495c6eSApple OSS Distributions rv = thread_get_state(thread, flavor, state, state_count);
356*4d495c6eSApple OSS Distributions
357*4d495c6eSApple OSS Distributions restore_ymm(ymms);
358*4d495c6eSApple OSS Distributions
359*4d495c6eSApple OSS Distributions return rv;
360*4d495c6eSApple OSS Distributions }
361*4d495c6eSApple OSS Distributions
362*4d495c6eSApple OSS Distributions void
ymm_integrity(int time)363*4d495c6eSApple OSS Distributions ymm_integrity(int time)
364*4d495c6eSApple OSS Distributions {
365*4d495c6eSApple OSS Distributions mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT;
366*4d495c6eSApple OSS Distributions kern_return_t kret;
367*4d495c6eSApple OSS Distributions X86_AVX_STATE_T avx_state, avx_state2;
368*4d495c6eSApple OSS Distributions mach_port_t ts = mach_thread_self();
369*4d495c6eSApple OSS Distributions
370*4d495c6eSApple OSS Distributions bzero(&avx_state, sizeof(avx_state));
371*4d495c6eSApple OSS Distributions bzero(&avx_state2, sizeof(avx_state));
372*4d495c6eSApple OSS Distributions
373*4d495c6eSApple OSS Distributions kret = _thread_get_state_avx(
374*4d495c6eSApple OSS Distributions ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
375*4d495c6eSApple OSS Distributions );
376*4d495c6eSApple OSS Distributions
377*4d495c6eSApple OSS Distributions store_ymm(vec256array2);
378*4d495c6eSApple OSS Distributions
379*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
380*4d495c6eSApple OSS Distributions vec256_to_string(vec256array2, vec_str_buf);
381*4d495c6eSApple OSS Distributions T_LOG("Initial state:\n%s", vec_str_buf);
382*4d495c6eSApple OSS Distributions
383*4d495c6eSApple OSS Distributions copy_ymm_state_to_vector(&avx_state, vec256array1);
384*4d495c6eSApple OSS Distributions assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1));
385*4d495c6eSApple OSS Distributions
386*4d495c6eSApple OSS Distributions populate_ymm();
387*4d495c6eSApple OSS Distributions
388*4d495c6eSApple OSS Distributions kret = _thread_get_state_avx(
389*4d495c6eSApple OSS Distributions ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
390*4d495c6eSApple OSS Distributions );
391*4d495c6eSApple OSS Distributions
392*4d495c6eSApple OSS Distributions store_ymm(vec256array2);
393*4d495c6eSApple OSS Distributions
394*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
395*4d495c6eSApple OSS Distributions vec256_to_string(vec256array2, vec_str_buf);
396*4d495c6eSApple OSS Distributions T_LOG("Populated state:\n%s", vec_str_buf);
397*4d495c6eSApple OSS Distributions
398*4d495c6eSApple OSS Distributions copy_ymm_state_to_vector(&avx_state2, vec256array1);
399*4d495c6eSApple OSS Distributions assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0));
400*4d495c6eSApple OSS Distributions
401*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
402*4d495c6eSApple OSS Distributions start_timer(time, ymm_sigalrm_handler);
403*4d495c6eSApple OSS Distributions
404*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
405*4d495c6eSApple OSS Distributions populate_ymm();
406*4d495c6eSApple OSS Distributions
407*4d495c6eSApple OSS Distributions /* Check state until timer fires */
408*4d495c6eSApple OSS Distributions while (checking) {
409*4d495c6eSApple OSS Distributions check_ymm();
410*4d495c6eSApple OSS Distributions }
411*4d495c6eSApple OSS Distributions
412*4d495c6eSApple OSS Distributions /* Check that the sig handler changed out AVX state */
413*4d495c6eSApple OSS Distributions store_ymm(vec256array1);
414*4d495c6eSApple OSS Distributions
415*4d495c6eSApple OSS Distributions uint32_t *p = (uint32_t *) &vec256array1[7];
416*4d495c6eSApple OSS Distributions if (p[0] != STOP_COOKIE_256 ||
417*4d495c6eSApple OSS Distributions p[4] != STOP_COOKIE_256) {
418*4d495c6eSApple OSS Distributions vec256_to_string(vec256array1, vec_str_buf);
419*4d495c6eSApple OSS Distributions T_ASSERT_FAIL("sigreturn failed to stick");
420*4d495c6eSApple OSS Distributions T_LOG("State:\n%s", vec_str_buf);
421*4d495c6eSApple OSS Distributions }
422*4d495c6eSApple OSS Distributions
423*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
424*4d495c6eSApple OSS Distributions T_PASS("No ymm register corruption occurred");
425*4d495c6eSApple OSS Distributions }
426*4d495c6eSApple OSS Distributions
427*4d495c6eSApple OSS Distributions /*
428*4d495c6eSApple OSS Distributions * zmm functions
429*4d495c6eSApple OSS Distributions */
430*4d495c6eSApple OSS Distributions
431*4d495c6eSApple OSS Distributions static inline void
store_opmask(OPMASK k[])432*4d495c6eSApple OSS Distributions store_opmask(OPMASK k[])
433*4d495c6eSApple OSS Distributions {
434*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k0, %0" :"=m" (k[0]));
435*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k1, %0" :"=m" (k[1]));
436*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k2, %0" :"=m" (k[2]));
437*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k3, %0" :"=m" (k[3]));
438*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k4, %0" :"=m" (k[4]));
439*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k5, %0" :"=m" (k[5]));
440*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k6, %0" :"=m" (k[6]));
441*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %%k7, %0" :"=m" (k[7]));
442*4d495c6eSApple OSS Distributions }
443*4d495c6eSApple OSS Distributions
444*4d495c6eSApple OSS Distributions static inline void
store_zmm(VECTOR512 * vecarray)445*4d495c6eSApple OSS Distributions store_zmm(VECTOR512 *vecarray)
446*4d495c6eSApple OSS Distributions {
447*4d495c6eSApple OSS Distributions int i = 0;
448*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %%zmm0, %0" :"=m" (vecarray[i]));
449*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm1, %0" :"=m" (vecarray[i]));
450*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm2, %0" :"=m" (vecarray[i]));
451*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm3, %0" :"=m" (vecarray[i]));
452*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm4, %0" :"=m" (vecarray[i]));
453*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm5, %0" :"=m" (vecarray[i]));
454*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm6, %0" :"=m" (vecarray[i]));
455*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm7, %0" :"=m" (vecarray[i]));
456*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
457*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm8, %0" :"=m" (vecarray[i]));
458*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm9, %0" :"=m" (vecarray[i]));
459*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm10, %0" :"=m" (vecarray[i]));
460*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm11, %0" :"=m" (vecarray[i]));
461*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm12, %0" :"=m" (vecarray[i]));
462*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm13, %0" :"=m" (vecarray[i]));
463*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm14, %0" :"=m" (vecarray[i]));
464*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm15, %0" :"=m" (vecarray[i]));
465*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm16, %0" :"=m" (vecarray[i]));
466*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm17, %0" :"=m" (vecarray[i]));
467*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm18, %0" :"=m" (vecarray[i]));
468*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm19, %0" :"=m" (vecarray[i]));
469*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm20, %0" :"=m" (vecarray[i]));
470*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm21, %0" :"=m" (vecarray[i]));
471*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm22, %0" :"=m" (vecarray[i]));
472*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm23, %0" :"=m" (vecarray[i]));
473*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm24, %0" :"=m" (vecarray[i]));
474*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm25, %0" :"=m" (vecarray[i]));
475*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm26, %0" :"=m" (vecarray[i]));
476*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm27, %0" :"=m" (vecarray[i]));
477*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm28, %0" :"=m" (vecarray[i]));
478*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm29, %0" :"=m" (vecarray[i]));
479*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm30, %0" :"=m" (vecarray[i]));
480*4d495c6eSApple OSS Distributions i++; __asm__ volatile ("vmovaps %%zmm31, %0" :"=m" (vecarray[i]));
481*4d495c6eSApple OSS Distributions #endif
482*4d495c6eSApple OSS Distributions }
483*4d495c6eSApple OSS Distributions
484*4d495c6eSApple OSS Distributions static inline void
restore_zmm(VECTOR512 * vecarray)485*4d495c6eSApple OSS Distributions restore_zmm(VECTOR512 *vecarray)
486*4d495c6eSApple OSS Distributions {
487*4d495c6eSApple OSS Distributions VECTOR512 *p = vecarray;
488*4d495c6eSApple OSS Distributions
489*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0"); p++;
490*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1"); p++;
491*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2"); p++;
492*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3"); p++;
493*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4"); p++;
494*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5"); p++;
495*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6"); p++;
496*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
497*4d495c6eSApple OSS Distributions
498*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
499*4d495c6eSApple OSS Distributions ++p; __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8"); p++;
500*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9"); p++;
501*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10"); p++;
502*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11"); p++;
503*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12"); p++;
504*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13"); p++;
505*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14"); p++;
506*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15"); p++;
507*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16"); p++;
508*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17"); p++;
509*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18"); p++;
510*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19"); p++;
511*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20"); p++;
512*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21"); p++;
513*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22"); p++;
514*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23"); p++;
515*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24"); p++;
516*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25"); p++;
517*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26"); p++;
518*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27"); p++;
519*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28"); p++;
520*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29"); p++;
521*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30"); p++;
522*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
523*4d495c6eSApple OSS Distributions #endif
524*4d495c6eSApple OSS Distributions }
525*4d495c6eSApple OSS Distributions
526*4d495c6eSApple OSS Distributions static inline void
zero_opmask(void)527*4d495c6eSApple OSS Distributions zero_opmask(void)
528*4d495c6eSApple OSS Distributions {
529*4d495c6eSApple OSS Distributions uint64_t zero = 0x0000000000000000ULL;
530*4d495c6eSApple OSS Distributions
531*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k0" : :"m" (zero) : "k0");
532*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k1" : :"m" (zero) : "k1");
533*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k2" : :"m" (zero) : "k2");
534*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k3" : :"m" (zero) : "k3");
535*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k4" : :"m" (zero) : "k4");
536*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k5" : :"m" (zero) : "k5");
537*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k6" : :"m" (zero) : "k6");
538*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k7" : :"m" (zero) : "k7");
539*4d495c6eSApple OSS Distributions store_opmask(karray0);
540*4d495c6eSApple OSS Distributions }
541*4d495c6eSApple OSS Distributions
542*4d495c6eSApple OSS Distributions static inline void
populate_opmask(void)543*4d495c6eSApple OSS Distributions populate_opmask(void)
544*4d495c6eSApple OSS Distributions {
545*4d495c6eSApple OSS Distributions uint64_t k[8];
546*4d495c6eSApple OSS Distributions
547*4d495c6eSApple OSS Distributions for (int j = 0; j < 8; j++) {
548*4d495c6eSApple OSS Distributions k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j);
549*4d495c6eSApple OSS Distributions }
550*4d495c6eSApple OSS Distributions
551*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k0" : :"m" (k[0]) : "k0");
552*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k1" : :"m" (k[1]) : "k1");
553*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k2" : :"m" (k[2]) : "k2");
554*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k3" : :"m" (k[3]) : "k3");
555*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k4" : :"m" (k[4]) : "k4");
556*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k5" : :"m" (k[5]) : "k5");
557*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k6" : :"m" (k[6]) : "k6");
558*4d495c6eSApple OSS Distributions __asm__ volatile ("kmovq %0, %%k7" : :"m" (k[7]) : "k7");
559*4d495c6eSApple OSS Distributions
560*4d495c6eSApple OSS Distributions store_opmask(karray0);
561*4d495c6eSApple OSS Distributions }
562*4d495c6eSApple OSS Distributions
563*4d495c6eSApple OSS Distributions kern_return_t
_thread_get_state_avx512(thread_t thread,int flavor,thread_state_t state,mach_msg_type_number_t * state_count)564*4d495c6eSApple OSS Distributions _thread_get_state_avx512(
565*4d495c6eSApple OSS Distributions thread_t thread,
566*4d495c6eSApple OSS Distributions int flavor,
567*4d495c6eSApple OSS Distributions thread_state_t state, /* pointer to OUT array */
568*4d495c6eSApple OSS Distributions mach_msg_type_number_t *state_count) /*IN/OUT*/
569*4d495c6eSApple OSS Distributions {
570*4d495c6eSApple OSS Distributions kern_return_t rv;
571*4d495c6eSApple OSS Distributions VECTOR512 zmms[ZMM_MAX];
572*4d495c6eSApple OSS Distributions
573*4d495c6eSApple OSS Distributions /*
574*4d495c6eSApple OSS Distributions * We must save and restore the ZMMs across thread_get_state() because
575*4d495c6eSApple OSS Distributions * code in thread_get_state changes at least one xmm register AFTER the
576*4d495c6eSApple OSS Distributions * thread_get_state has saved the state in userspace. While it's still
577*4d495c6eSApple OSS Distributions * possible for something to muck with %XMMs BEFORE making the mach
578*4d495c6eSApple OSS Distributions * system call (and rendering this save/restore useless), that does not
579*4d495c6eSApple OSS Distributions * currently occur, and since we depend on the avx512 state saved in the
580*4d495c6eSApple OSS Distributions * thread_get_state to be the same as that manually copied from ZMMs after
581*4d495c6eSApple OSS Distributions * thread_get_state returns, we have to go through these machinations.
582*4d495c6eSApple OSS Distributions */
583*4d495c6eSApple OSS Distributions store_zmm(zmms);
584*4d495c6eSApple OSS Distributions
585*4d495c6eSApple OSS Distributions rv = thread_get_state(thread, flavor, state, state_count);
586*4d495c6eSApple OSS Distributions
587*4d495c6eSApple OSS Distributions restore_zmm(zmms);
588*4d495c6eSApple OSS Distributions
589*4d495c6eSApple OSS Distributions return rv;
590*4d495c6eSApple OSS Distributions }
591*4d495c6eSApple OSS Distributions
592*4d495c6eSApple OSS Distributions static inline void
zero_zmm(void)593*4d495c6eSApple OSS Distributions zero_zmm(void)
594*4d495c6eSApple OSS Distributions {
595*4d495c6eSApple OSS Distributions uint64_t zero[8] VEC512ALIGN = {0};
596*4d495c6eSApple OSS Distributions
597*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (zero) : "zmm0");
598*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (zero) : "zmm1");
599*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (zero) : "zmm2");
600*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (zero) : "zmm3");
601*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (zero) : "zmm4");
602*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (zero) : "zmm5");
603*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (zero) : "zmm6");
604*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (zero) : "zmm7");
605*4d495c6eSApple OSS Distributions
606*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
607*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (zero) : "zmm8");
608*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (zero) : "zmm9");
609*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (zero) : "zmm10");
610*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (zero) : "zmm11");
611*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (zero) : "zmm12");
612*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (zero) : "zmm13");
613*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (zero) : "zmm14");
614*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (zero) : "zmm15");
615*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (zero) : "zmm16");
616*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (zero) : "zmm17");
617*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (zero) : "zmm18");
618*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (zero) : "zmm19");
619*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (zero) : "zmm20");
620*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (zero) : "zmm21");
621*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (zero) : "zmm22");
622*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (zero) : "zmm23");
623*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (zero) : "zmm24");
624*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (zero) : "zmm25");
625*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (zero) : "zmm26");
626*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (zero) : "zmm27");
627*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (zero) : "zmm28");
628*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (zero) : "zmm29");
629*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (zero) : "zmm30");
630*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (zero) : "zmm31");
631*4d495c6eSApple OSS Distributions #endif
632*4d495c6eSApple OSS Distributions
633*4d495c6eSApple OSS Distributions store_zmm(vec512array0);
634*4d495c6eSApple OSS Distributions }
635*4d495c6eSApple OSS Distributions
636*4d495c6eSApple OSS Distributions static inline void
populate_zmm(void)637*4d495c6eSApple OSS Distributions populate_zmm(void)
638*4d495c6eSApple OSS Distributions {
639*4d495c6eSApple OSS Distributions int j;
640*4d495c6eSApple OSS Distributions uint64_t p[8] VEC512ALIGN;
641*4d495c6eSApple OSS Distributions
642*4d495c6eSApple OSS Distributions for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
643*4d495c6eSApple OSS Distributions p[j] = ((uint64_t) getpid() << 32) + getpid();
644*4d495c6eSApple OSS Distributions }
645*4d495c6eSApple OSS Distributions
646*4d495c6eSApple OSS Distributions p[0] = 0x0000000000000000ULL;
647*4d495c6eSApple OSS Distributions p[2] = 0x4444444444444444ULL;
648*4d495c6eSApple OSS Distributions p[4] = 0x8888888888888888ULL;
649*4d495c6eSApple OSS Distributions p[7] = 0xCCCCCCCCCCCCCCCCULL;
650*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0");
651*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1");
652*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2");
653*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3");
654*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4");
655*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5");
656*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6");
657*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
658*4d495c6eSApple OSS Distributions
659*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
660*4d495c6eSApple OSS Distributions p[0] = 0x1111111111111111ULL;
661*4d495c6eSApple OSS Distributions p[2] = 0x5555555555555555ULL;
662*4d495c6eSApple OSS Distributions p[4] = 0x9999999999999999ULL;
663*4d495c6eSApple OSS Distributions p[7] = 0xDDDDDDDDDDDDDDDDULL;
664*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8");
665*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9");
666*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10");
667*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11");
668*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12");
669*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13");
670*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14");
671*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15");
672*4d495c6eSApple OSS Distributions
673*4d495c6eSApple OSS Distributions p[0] = 0x2222222222222222ULL;
674*4d495c6eSApple OSS Distributions p[2] = 0x6666666666666666ULL;
675*4d495c6eSApple OSS Distributions p[4] = 0xAAAAAAAAAAAAAAAAULL;
676*4d495c6eSApple OSS Distributions p[7] = 0xEEEEEEEEEEEEEEEEULL;
677*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16");
678*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17");
679*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18");
680*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19");
681*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20");
682*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21");
683*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22");
684*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23");
685*4d495c6eSApple OSS Distributions
686*4d495c6eSApple OSS Distributions p[0] = 0x3333333333333333ULL;
687*4d495c6eSApple OSS Distributions p[2] = 0x7777777777777777ULL;
688*4d495c6eSApple OSS Distributions p[4] = 0xBBBBBBBBBBBBBBBBULL;
689*4d495c6eSApple OSS Distributions p[7] = 0xFFFFFFFFFFFFFFFFULL;
690*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24");
691*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25");
692*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26");
693*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27");
694*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28");
695*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29");
696*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30");
697*4d495c6eSApple OSS Distributions __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
698*4d495c6eSApple OSS Distributions #endif
699*4d495c6eSApple OSS Distributions
700*4d495c6eSApple OSS Distributions store_zmm(vec512array0);
701*4d495c6eSApple OSS Distributions }
702*4d495c6eSApple OSS Distributions
703*4d495c6eSApple OSS Distributions void
vec512_to_string(VECTOR512 * vec,char * buf)704*4d495c6eSApple OSS Distributions vec512_to_string(VECTOR512 *vec, char *buf)
705*4d495c6eSApple OSS Distributions {
706*4d495c6eSApple OSS Distributions unsigned int vec_idx = 0;
707*4d495c6eSApple OSS Distributions unsigned int buf_idx = 0;
708*4d495c6eSApple OSS Distributions int ret = 0;
709*4d495c6eSApple OSS Distributions
710*4d495c6eSApple OSS Distributions for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) {
711*4d495c6eSApple OSS Distributions uint64_t a[8];
712*4d495c6eSApple OSS Distributions bcopy(&vec[vec_idx], &a[0], sizeof(a));
713*4d495c6eSApple OSS Distributions ret = sprintf(
714*4d495c6eSApple OSS Distributions buf + buf_idx,
715*4d495c6eSApple OSS Distributions "0x%016llx:%016llx:%016llx:%016llx:"
716*4d495c6eSApple OSS Distributions "%016llx:%016llx:%016llx:%016llx%s",
717*4d495c6eSApple OSS Distributions a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
718*4d495c6eSApple OSS Distributions vec_idx < ZMM_MAX - 1 ? "\n" : ""
719*4d495c6eSApple OSS Distributions );
720*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
721*4d495c6eSApple OSS Distributions buf_idx += ret;
722*4d495c6eSApple OSS Distributions }
723*4d495c6eSApple OSS Distributions }
724*4d495c6eSApple OSS Distributions
725*4d495c6eSApple OSS Distributions void
opmask_to_string(OPMASK * karray,char * buf)726*4d495c6eSApple OSS Distributions opmask_to_string(OPMASK *karray, char *buf)
727*4d495c6eSApple OSS Distributions {
728*4d495c6eSApple OSS Distributions unsigned int karray_idx = 0;
729*4d495c6eSApple OSS Distributions unsigned int buf_idx = 0;
730*4d495c6eSApple OSS Distributions int ret = 0;
731*4d495c6eSApple OSS Distributions
732*4d495c6eSApple OSS Distributions for (karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) {
733*4d495c6eSApple OSS Distributions ret = sprintf(
734*4d495c6eSApple OSS Distributions buf + buf_idx,
735*4d495c6eSApple OSS Distributions "k%d: 0x%016llx%s",
736*4d495c6eSApple OSS Distributions karray_idx, karray[karray_idx],
737*4d495c6eSApple OSS Distributions karray_idx < KARRAY_MAX ? "\n" : ""
738*4d495c6eSApple OSS Distributions );
739*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
740*4d495c6eSApple OSS Distributions buf_idx += ret;
741*4d495c6eSApple OSS Distributions }
742*4d495c6eSApple OSS Distributions }
743*4d495c6eSApple OSS Distributions
744*4d495c6eSApple OSS Distributions static void
assert_zmm_eq(void * a,void * b,int c)745*4d495c6eSApple OSS Distributions assert_zmm_eq(void *a, void *b, int c)
746*4d495c6eSApple OSS Distributions {
747*4d495c6eSApple OSS Distributions if (memcmp_unoptimized(a, b, c)) {
748*4d495c6eSApple OSS Distributions vec512_to_string(a, vec_str_buf);
749*4d495c6eSApple OSS Distributions T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
750*4d495c6eSApple OSS Distributions vec512_to_string(b, vec_str_buf);
751*4d495c6eSApple OSS Distributions T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
752*4d495c6eSApple OSS Distributions T_ASSERT_FAIL("Vectors not equal");
753*4d495c6eSApple OSS Distributions }
754*4d495c6eSApple OSS Distributions }
755*4d495c6eSApple OSS Distributions
756*4d495c6eSApple OSS Distributions static void
assert_opmask_eq(OPMASK * a,OPMASK * b)757*4d495c6eSApple OSS Distributions assert_opmask_eq(OPMASK *a, OPMASK *b)
758*4d495c6eSApple OSS Distributions {
759*4d495c6eSApple OSS Distributions for (int i = 0; i < KARRAY_MAX; i++) {
760*4d495c6eSApple OSS Distributions if (a[i] != b[i]) {
761*4d495c6eSApple OSS Distributions opmask_to_string(a, karray_str_buf);
762*4d495c6eSApple OSS Distributions T_LOG("Compare failed, opmask A:\n%s", karray_str_buf);
763*4d495c6eSApple OSS Distributions opmask_to_string(b, karray_str_buf);
764*4d495c6eSApple OSS Distributions T_LOG("Compare failed, opmask B:\n%s", karray_str_buf);
765*4d495c6eSApple OSS Distributions T_ASSERT_FAIL("opmasks not equal");
766*4d495c6eSApple OSS Distributions }
767*4d495c6eSApple OSS Distributions }
768*4d495c6eSApple OSS Distributions }
769*4d495c6eSApple OSS Distributions
770*4d495c6eSApple OSS Distributions void
check_zmm(boolean_t check_cookie)771*4d495c6eSApple OSS Distributions check_zmm(boolean_t check_cookie)
772*4d495c6eSApple OSS Distributions {
773*4d495c6eSApple OSS Distributions uint64_t *p = (uint64_t *) &vec512array1[7];
774*4d495c6eSApple OSS Distributions store_opmask(karray1);
775*4d495c6eSApple OSS Distributions store_zmm(vec512array1);
776*4d495c6eSApple OSS Distributions if (check_cookie && p[0] == STOP_COOKIE_512) {
777*4d495c6eSApple OSS Distributions return;
778*4d495c6eSApple OSS Distributions }
779*4d495c6eSApple OSS Distributions
780*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0));
781*4d495c6eSApple OSS Distributions assert_opmask_eq(karray0, karray1);
782*4d495c6eSApple OSS Distributions }
783*4d495c6eSApple OSS Distributions
784*4d495c6eSApple OSS Distributions static void
copy_state_to_opmask(X86_AVX512_STATE_T * sp,OPMASK * op)785*4d495c6eSApple OSS Distributions copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op)
786*4d495c6eSApple OSS Distributions {
787*4d495c6eSApple OSS Distributions OPMASK *k = (OPMASK *) &sp->__fpu_k0;
788*4d495c6eSApple OSS Distributions for (int i = 0; i < KARRAY_MAX; i++) {
789*4d495c6eSApple OSS Distributions bcopy(&k[i], &op[i], sizeof(*op));
790*4d495c6eSApple OSS Distributions }
791*4d495c6eSApple OSS Distributions }
792*4d495c6eSApple OSS Distributions
793*4d495c6eSApple OSS Distributions static void
copy_zmm_state_to_vector(X86_AVX512_STATE_T * sp,VECTOR512 * vp)794*4d495c6eSApple OSS Distributions copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp)
795*4d495c6eSApple OSS Distributions {
796*4d495c6eSApple OSS Distributions int i;
797*4d495c6eSApple OSS Distributions struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
798*4d495c6eSApple OSS Distributions struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
799*4d495c6eSApple OSS Distributions struct __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0;
800*4d495c6eSApple OSS Distributions #if defined(__x86_64__)
801*4d495c6eSApple OSS Distributions struct __darwin_zmm_reg *zmm = &sp->__fpu_zmm16;
802*4d495c6eSApple OSS Distributions
803*4d495c6eSApple OSS Distributions for (i = 0; i < ZMM_MAX / 2; i++) {
804*4d495c6eSApple OSS Distributions bcopy(&xmm[i], &vp[i], sizeof(*xmm));
805*4d495c6eSApple OSS Distributions bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
806*4d495c6eSApple OSS Distributions bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
807*4d495c6eSApple OSS Distributions bcopy(&zmm[i], &vp[(ZMM_MAX / 2) + i], sizeof(*zmm));
808*4d495c6eSApple OSS Distributions }
809*4d495c6eSApple OSS Distributions #else
810*4d495c6eSApple OSS Distributions for (i = 0; i < ZMM_MAX; i++) {
811*4d495c6eSApple OSS Distributions bcopy(&xmm[i], &vp[i], sizeof(*xmm));
812*4d495c6eSApple OSS Distributions bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
813*4d495c6eSApple OSS Distributions bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
814*4d495c6eSApple OSS Distributions }
815*4d495c6eSApple OSS Distributions #endif
816*4d495c6eSApple OSS Distributions }
817*4d495c6eSApple OSS Distributions
818*4d495c6eSApple OSS Distributions static void
zmm_sigalrm_handler(int signum __unused,siginfo_t * info __unused,void * ctx)819*4d495c6eSApple OSS Distributions zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
820*4d495c6eSApple OSS Distributions {
821*4d495c6eSApple OSS Distributions ucontext_t *contextp = (ucontext_t *) ctx;
822*4d495c6eSApple OSS Distributions mcontext_t mcontext = contextp->uc_mcontext;
823*4d495c6eSApple OSS Distributions X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
824*4d495c6eSApple OSS Distributions uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
825*4d495c6eSApple OSS Distributions uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
826*4d495c6eSApple OSS Distributions uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
827*4d495c6eSApple OSS Distributions uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
828*4d495c6eSApple OSS Distributions
829*4d495c6eSApple OSS Distributions /* Check for AVX512 state */
830*4d495c6eSApple OSS Distributions T_QUIET;
831*4d495c6eSApple OSS Distributions T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
832*4d495c6eSApple OSS Distributions
833*4d495c6eSApple OSS Distributions /* Check that the state in the context is what's set and expected */
834*4d495c6eSApple OSS Distributions copy_zmm_state_to_vector(avx_state, vec512array3);
835*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
836*4d495c6eSApple OSS Distributions copy_state_to_opmask(avx_state, karray3);
837*4d495c6eSApple OSS Distributions assert_opmask_eq(karray3, karray0);
838*4d495c6eSApple OSS Distributions
839*4d495c6eSApple OSS Distributions /* Change the context and break the main loop */
840*4d495c6eSApple OSS Distributions xp[0] = STOP_COOKIE_512;
841*4d495c6eSApple OSS Distributions yp[0] = STOP_COOKIE_512;
842*4d495c6eSApple OSS Distributions zp[0] = STOP_COOKIE_512;
843*4d495c6eSApple OSS Distributions kp[7] = STOP_COOKIE_512;
844*4d495c6eSApple OSS Distributions checking = FALSE;
845*4d495c6eSApple OSS Distributions }
846*4d495c6eSApple OSS Distributions
847*4d495c6eSApple OSS Distributions static void
zmm_sigalrm_handler_no_mod(int signum __unused,siginfo_t * info __unused,void * ctx)848*4d495c6eSApple OSS Distributions zmm_sigalrm_handler_no_mod(int signum __unused, siginfo_t *info __unused, void *ctx)
849*4d495c6eSApple OSS Distributions {
850*4d495c6eSApple OSS Distributions ucontext_t *contextp = (ucontext_t *) ctx;
851*4d495c6eSApple OSS Distributions mcontext_t mcontext = contextp->uc_mcontext;
852*4d495c6eSApple OSS Distributions X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
853*4d495c6eSApple OSS Distributions uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
854*4d495c6eSApple OSS Distributions uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
855*4d495c6eSApple OSS Distributions uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
856*4d495c6eSApple OSS Distributions uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
857*4d495c6eSApple OSS Distributions
858*4d495c6eSApple OSS Distributions /* Check for AVX512 state */
859*4d495c6eSApple OSS Distributions T_QUIET;
860*4d495c6eSApple OSS Distributions T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
861*4d495c6eSApple OSS Distributions
862*4d495c6eSApple OSS Distributions /* Check that the state in the context is what's set and expected */
863*4d495c6eSApple OSS Distributions copy_zmm_state_to_vector(avx_state, vec512array3);
864*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array3));
865*4d495c6eSApple OSS Distributions copy_state_to_opmask(avx_state, karray3);
866*4d495c6eSApple OSS Distributions assert_opmask_eq(karray3, karray0);
867*4d495c6eSApple OSS Distributions
868*4d495c6eSApple OSS Distributions /* Change the context and break the main loop */
869*4d495c6eSApple OSS Distributions checking = FALSE;
870*4d495c6eSApple OSS Distributions }
871*4d495c6eSApple OSS Distributions
872*4d495c6eSApple OSS Distributions
873*4d495c6eSApple OSS Distributions void
zmm_integrity(int time)874*4d495c6eSApple OSS Distributions zmm_integrity(int time)
875*4d495c6eSApple OSS Distributions {
876*4d495c6eSApple OSS Distributions mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT;
877*4d495c6eSApple OSS Distributions kern_return_t kret;
878*4d495c6eSApple OSS Distributions X86_AVX512_STATE_T avx_state, avx_state2;
879*4d495c6eSApple OSS Distributions mach_port_t ts = mach_thread_self();
880*4d495c6eSApple OSS Distributions
881*4d495c6eSApple OSS Distributions bzero(&avx_state, sizeof(avx_state));
882*4d495c6eSApple OSS Distributions bzero(&avx_state2, sizeof(avx_state));
883*4d495c6eSApple OSS Distributions
884*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
885*4d495c6eSApple OSS Distributions store_opmask(karray2);
886*4d495c6eSApple OSS Distributions
887*4d495c6eSApple OSS Distributions kret = _thread_get_state_avx512(
888*4d495c6eSApple OSS Distributions ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
889*4d495c6eSApple OSS Distributions );
890*4d495c6eSApple OSS Distributions
891*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
892*4d495c6eSApple OSS Distributions vec512_to_string(vec512array2, vec_str_buf);
893*4d495c6eSApple OSS Distributions opmask_to_string(karray2, karray_str_buf);
894*4d495c6eSApple OSS Distributions T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf);
895*4d495c6eSApple OSS Distributions
896*4d495c6eSApple OSS Distributions copy_zmm_state_to_vector(&avx_state, vec512array1);
897*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
898*4d495c6eSApple OSS Distributions copy_state_to_opmask(&avx_state, karray1);
899*4d495c6eSApple OSS Distributions assert_opmask_eq(karray2, karray1);
900*4d495c6eSApple OSS Distributions
901*4d495c6eSApple OSS Distributions populate_zmm();
902*4d495c6eSApple OSS Distributions populate_opmask();
903*4d495c6eSApple OSS Distributions
904*4d495c6eSApple OSS Distributions kret = _thread_get_state_avx512(
905*4d495c6eSApple OSS Distributions ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
906*4d495c6eSApple OSS Distributions );
907*4d495c6eSApple OSS Distributions
908*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
909*4d495c6eSApple OSS Distributions store_opmask(karray2);
910*4d495c6eSApple OSS Distributions
911*4d495c6eSApple OSS Distributions T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
912*4d495c6eSApple OSS Distributions vec512_to_string(vec512array2, vec_str_buf);
913*4d495c6eSApple OSS Distributions opmask_to_string(karray2, karray_str_buf);
914*4d495c6eSApple OSS Distributions T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf);
915*4d495c6eSApple OSS Distributions
916*4d495c6eSApple OSS Distributions copy_zmm_state_to_vector(&avx_state2, vec512array1);
917*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
918*4d495c6eSApple OSS Distributions copy_state_to_opmask(&avx_state2, karray1);
919*4d495c6eSApple OSS Distributions assert_opmask_eq(karray2, karray1);
920*4d495c6eSApple OSS Distributions
921*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
922*4d495c6eSApple OSS Distributions start_timer(time, zmm_sigalrm_handler);
923*4d495c6eSApple OSS Distributions
924*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
925*4d495c6eSApple OSS Distributions populate_zmm();
926*4d495c6eSApple OSS Distributions populate_opmask();
927*4d495c6eSApple OSS Distributions
928*4d495c6eSApple OSS Distributions /* Check state until timer fires */
929*4d495c6eSApple OSS Distributions while (checking) {
930*4d495c6eSApple OSS Distributions check_zmm(TRUE);
931*4d495c6eSApple OSS Distributions }
932*4d495c6eSApple OSS Distributions
933*4d495c6eSApple OSS Distributions /* Check that the sig handler changed our AVX state */
934*4d495c6eSApple OSS Distributions store_zmm(vec512array1);
935*4d495c6eSApple OSS Distributions store_opmask(karray1);
936*4d495c6eSApple OSS Distributions
937*4d495c6eSApple OSS Distributions uint64_t *p = (uint64_t *) &vec512array1[7];
938*4d495c6eSApple OSS Distributions if (p[0] != STOP_COOKIE_512 ||
939*4d495c6eSApple OSS Distributions p[2] != STOP_COOKIE_512 ||
940*4d495c6eSApple OSS Distributions p[4] != STOP_COOKIE_512 ||
941*4d495c6eSApple OSS Distributions karray1[7] != STOP_COOKIE_512) {
942*4d495c6eSApple OSS Distributions vec512_to_string(vec512array1, vec_str_buf);
943*4d495c6eSApple OSS Distributions opmask_to_string(karray1, karray_str_buf);
944*4d495c6eSApple OSS Distributions T_ASSERT_FAIL("sigreturn failed to stick");
945*4d495c6eSApple OSS Distributions T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf);
946*4d495c6eSApple OSS Distributions }
947*4d495c6eSApple OSS Distributions
948*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
949*4d495c6eSApple OSS Distributions T_PASS("No zmm register corruption occurred");
950*4d495c6eSApple OSS Distributions }
951*4d495c6eSApple OSS Distributions
952*4d495c6eSApple OSS Distributions void
zmm_zeroing_optimization_integrity(int time)953*4d495c6eSApple OSS Distributions zmm_zeroing_optimization_integrity(int time)
954*4d495c6eSApple OSS Distributions {
955*4d495c6eSApple OSS Distributions /*
956*4d495c6eSApple OSS Distributions * Check ZMM zero and OpMask zero
957*4d495c6eSApple OSS Distributions */
958*4d495c6eSApple OSS Distributions T_LOG("Checking ZMM zero and OpMask zero");
959*4d495c6eSApple OSS Distributions checking = true;
960*4d495c6eSApple OSS Distributions zero_zmm();
961*4d495c6eSApple OSS Distributions zero_opmask();
962*4d495c6eSApple OSS Distributions
963*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
964*4d495c6eSApple OSS Distributions start_timer(time, zmm_sigalrm_handler_no_mod);
965*4d495c6eSApple OSS Distributions
966*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
967*4d495c6eSApple OSS Distributions zero_zmm();
968*4d495c6eSApple OSS Distributions zero_opmask();
969*4d495c6eSApple OSS Distributions
970*4d495c6eSApple OSS Distributions /* Check state until timer fires */
971*4d495c6eSApple OSS Distributions while (checking) {
972*4d495c6eSApple OSS Distributions check_zmm(FALSE);
973*4d495c6eSApple OSS Distributions }
974*4d495c6eSApple OSS Distributions
975*4d495c6eSApple OSS Distributions /* Check that sig handler did not changed our AVX state */
976*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
977*4d495c6eSApple OSS Distributions store_opmask(karray2);
978*4d495c6eSApple OSS Distributions
979*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
980*4d495c6eSApple OSS Distributions assert_opmask_eq(karray0, karray2);
981*4d495c6eSApple OSS Distributions
982*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
983*4d495c6eSApple OSS Distributions T_PASS("ZMM zero and OpMask zero");
984*4d495c6eSApple OSS Distributions
985*4d495c6eSApple OSS Distributions
986*4d495c6eSApple OSS Distributions /*
987*4d495c6eSApple OSS Distributions * Check ZMM zero and OpMask non-zero
988*4d495c6eSApple OSS Distributions */
989*4d495c6eSApple OSS Distributions T_LOG("Checking ZMM zero and OpMask non-zero");
990*4d495c6eSApple OSS Distributions checking = true;
991*4d495c6eSApple OSS Distributions zero_zmm();
992*4d495c6eSApple OSS Distributions populate_opmask();
993*4d495c6eSApple OSS Distributions
994*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
995*4d495c6eSApple OSS Distributions start_timer(time, zmm_sigalrm_handler_no_mod);
996*4d495c6eSApple OSS Distributions
997*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
998*4d495c6eSApple OSS Distributions zero_zmm();
999*4d495c6eSApple OSS Distributions populate_opmask();
1000*4d495c6eSApple OSS Distributions
1001*4d495c6eSApple OSS Distributions /* Check state until timer fires */
1002*4d495c6eSApple OSS Distributions while (checking) {
1003*4d495c6eSApple OSS Distributions check_zmm(FALSE);
1004*4d495c6eSApple OSS Distributions }
1005*4d495c6eSApple OSS Distributions
1006*4d495c6eSApple OSS Distributions /* Check that sig handler did not changed our AVX state */
1007*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
1008*4d495c6eSApple OSS Distributions store_opmask(karray2);
1009*4d495c6eSApple OSS Distributions
1010*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1011*4d495c6eSApple OSS Distributions assert_opmask_eq(karray0, karray2);
1012*4d495c6eSApple OSS Distributions
1013*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
1014*4d495c6eSApple OSS Distributions T_PASS("ZMM zero and OpMask non-zero");
1015*4d495c6eSApple OSS Distributions
1016*4d495c6eSApple OSS Distributions
1017*4d495c6eSApple OSS Distributions /*
1018*4d495c6eSApple OSS Distributions * Check ZMM non-zero and OpMask zero
1019*4d495c6eSApple OSS Distributions */
1020*4d495c6eSApple OSS Distributions T_LOG("Checking ZMM non-zero and OpMask zero");
1021*4d495c6eSApple OSS Distributions checking = true;
1022*4d495c6eSApple OSS Distributions populate_zmm();
1023*4d495c6eSApple OSS Distributions zero_opmask();
1024*4d495c6eSApple OSS Distributions
1025*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
1026*4d495c6eSApple OSS Distributions start_timer(time, zmm_sigalrm_handler_no_mod);
1027*4d495c6eSApple OSS Distributions
1028*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
1029*4d495c6eSApple OSS Distributions populate_zmm();
1030*4d495c6eSApple OSS Distributions zero_opmask();
1031*4d495c6eSApple OSS Distributions
1032*4d495c6eSApple OSS Distributions /* Check state until timer fires */
1033*4d495c6eSApple OSS Distributions while (checking) {
1034*4d495c6eSApple OSS Distributions check_zmm(FALSE);
1035*4d495c6eSApple OSS Distributions }
1036*4d495c6eSApple OSS Distributions
1037*4d495c6eSApple OSS Distributions /* Check that sig handler did not changed our AVX state */
1038*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
1039*4d495c6eSApple OSS Distributions store_opmask(karray2);
1040*4d495c6eSApple OSS Distributions
1041*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1042*4d495c6eSApple OSS Distributions assert_opmask_eq(karray0, karray2);
1043*4d495c6eSApple OSS Distributions
1044*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
1045*4d495c6eSApple OSS Distributions T_PASS("ZMM non-zero and OpMask zero");
1046*4d495c6eSApple OSS Distributions
1047*4d495c6eSApple OSS Distributions
1048*4d495c6eSApple OSS Distributions /*
1049*4d495c6eSApple OSS Distributions * Check ZMM non-zero and OpMask non-zero
1050*4d495c6eSApple OSS Distributions */
1051*4d495c6eSApple OSS Distributions T_LOG("Checking ZMM non-zero and OpMask non-zero");
1052*4d495c6eSApple OSS Distributions checking = true;
1053*4d495c6eSApple OSS Distributions populate_zmm();
1054*4d495c6eSApple OSS Distributions populate_opmask();
1055*4d495c6eSApple OSS Distributions
1056*4d495c6eSApple OSS Distributions T_LOG("Running for %ds…", time);
1057*4d495c6eSApple OSS Distributions start_timer(time, zmm_sigalrm_handler_no_mod);
1058*4d495c6eSApple OSS Distributions
1059*4d495c6eSApple OSS Distributions /* re-populate because printing mucks up XMMs */
1060*4d495c6eSApple OSS Distributions populate_zmm();
1061*4d495c6eSApple OSS Distributions populate_opmask();
1062*4d495c6eSApple OSS Distributions
1063*4d495c6eSApple OSS Distributions /* Check state until timer fires */
1064*4d495c6eSApple OSS Distributions while (checking) {
1065*4d495c6eSApple OSS Distributions check_zmm(FALSE);
1066*4d495c6eSApple OSS Distributions }
1067*4d495c6eSApple OSS Distributions
1068*4d495c6eSApple OSS Distributions /* Check that sig handler did not changed our AVX state */
1069*4d495c6eSApple OSS Distributions store_zmm(vec512array2);
1070*4d495c6eSApple OSS Distributions store_opmask(karray2);
1071*4d495c6eSApple OSS Distributions
1072*4d495c6eSApple OSS Distributions assert_zmm_eq(vec512array0, vec512array2, sizeof(vec512array2));
1073*4d495c6eSApple OSS Distributions assert_opmask_eq(karray0, karray2);
1074*4d495c6eSApple OSS Distributions
1075*4d495c6eSApple OSS Distributions T_LOG("Ran for %ds", time);
1076*4d495c6eSApple OSS Distributions T_PASS("ZMM non-zero and OpMask non-zero");
1077*4d495c6eSApple OSS Distributions }
1078*4d495c6eSApple OSS Distributions
1079*4d495c6eSApple OSS Distributions /*
1080*4d495c6eSApple OSS Distributions * Main test declarations
1081*4d495c6eSApple OSS Distributions */
1082*4d495c6eSApple OSS Distributions T_DECL(ymm_integrity,
1083*4d495c6eSApple OSS Distributions "Quick soak test to verify that AVX "
1084*4d495c6eSApple OSS Distributions "register state is maintained correctly",
1085*4d495c6eSApple OSS Distributions T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1086*4d495c6eSApple OSS Distributions require_avx();
1087*4d495c6eSApple OSS Distributions ymm_integrity(NORMAL_RUN_TIME);
1088*4d495c6eSApple OSS Distributions }
1089*4d495c6eSApple OSS Distributions
1090*4d495c6eSApple OSS Distributions T_DECL(ymm_integrity_stress,
1091*4d495c6eSApple OSS Distributions "Extended soak test to verify that AVX "
1092*4d495c6eSApple OSS Distributions "register state is maintained correctly",
1093*4d495c6eSApple OSS Distributions T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1094*4d495c6eSApple OSS Distributions T_META_ENABLED(false)) {
1095*4d495c6eSApple OSS Distributions require_avx();
1096*4d495c6eSApple OSS Distributions ymm_integrity(LONG_RUN_TIME);
1097*4d495c6eSApple OSS Distributions }
1098*4d495c6eSApple OSS Distributions
1099*4d495c6eSApple OSS Distributions T_DECL(zmm_integrity,
1100*4d495c6eSApple OSS Distributions "Quick soak test to verify that AVX-512 "
1101*4d495c6eSApple OSS Distributions "register state is maintained correctly",
1102*4d495c6eSApple OSS Distributions T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
1103*4d495c6eSApple OSS Distributions require_avx512();
1104*4d495c6eSApple OSS Distributions zmm_integrity(NORMAL_RUN_TIME);
1105*4d495c6eSApple OSS Distributions }
1106*4d495c6eSApple OSS Distributions
1107*4d495c6eSApple OSS Distributions T_DECL(zmm_integrity_stress,
1108*4d495c6eSApple OSS Distributions "Extended soak test to verify that AVX-512 "
1109*4d495c6eSApple OSS Distributions "register state is maintained correctly",
1110*4d495c6eSApple OSS Distributions T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
1111*4d495c6eSApple OSS Distributions T_META_ENABLED(false)) {
1112*4d495c6eSApple OSS Distributions require_avx512();
1113*4d495c6eSApple OSS Distributions zmm_integrity(LONG_RUN_TIME);
1114*4d495c6eSApple OSS Distributions }
1115*4d495c6eSApple OSS Distributions
1116*4d495c6eSApple OSS Distributions T_DECL(zmm_zeroing_optimization_integrity,
1117*4d495c6eSApple OSS Distributions "Quick soak test to verify AVX-512 "
1118*4d495c6eSApple OSS Distributions "register state is maintained with "
1119*4d495c6eSApple OSS Distributions "zeroing optimizations enabled",
1120*4d495c6eSApple OSS Distributions T_META_TIMEOUT(QUICK_RUN_TIME + TIMEOUT_OVERHEAD)) {
1121*4d495c6eSApple OSS Distributions require_avx512();
1122*4d495c6eSApple OSS Distributions zmm_zeroing_optimization_integrity(QUICK_RUN_TIME);
1123*4d495c6eSApple OSS Distributions }
1124