xref: /xnu-12377.1.9/tests/arm_matrix_sme.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1*f6217f89SApple OSS Distributions /*
2*f6217f89SApple OSS Distributions  * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3*f6217f89SApple OSS Distributions  *
4*f6217f89SApple OSS Distributions  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5*f6217f89SApple OSS Distributions  *
6*f6217f89SApple OSS Distributions  * This file contains Original Code and/or Modifications of Original Code
7*f6217f89SApple OSS Distributions  * as defined in and that are subject to the Apple Public Source License
8*f6217f89SApple OSS Distributions  * Version 2.0 (the 'License'). You may not use this file except in
9*f6217f89SApple OSS Distributions  * compliance with the License. The rights granted to you under the License
10*f6217f89SApple OSS Distributions  * may not be used to create, or enable the creation or redistribution of,
11*f6217f89SApple OSS Distributions  * unlawful or unlicensed copies of an Apple operating system, or to
12*f6217f89SApple OSS Distributions  * circumvent, violate, or enable the circumvention or violation of, any
13*f6217f89SApple OSS Distributions  * terms of an Apple operating system software license agreement.
14*f6217f89SApple OSS Distributions  *
15*f6217f89SApple OSS Distributions  * Please obtain a copy of the License at
16*f6217f89SApple OSS Distributions  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17*f6217f89SApple OSS Distributions  *
18*f6217f89SApple OSS Distributions  * The Original Code and all software distributed under the License are
19*f6217f89SApple OSS Distributions  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20*f6217f89SApple OSS Distributions  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21*f6217f89SApple OSS Distributions  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22*f6217f89SApple OSS Distributions  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23*f6217f89SApple OSS Distributions  * Please see the License for the specific language governing rights and
24*f6217f89SApple OSS Distributions  * limitations under the License.
25*f6217f89SApple OSS Distributions  *
26*f6217f89SApple OSS Distributions  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27*f6217f89SApple OSS Distributions  */
28*f6217f89SApple OSS Distributions 
29*f6217f89SApple OSS Distributions #include <mach/thread_act.h>
30*f6217f89SApple OSS Distributions #include <stdint.h>
31*f6217f89SApple OSS Distributions #include <stdlib.h>
32*f6217f89SApple OSS Distributions #include <sys/sysctl.h>
33*f6217f89SApple OSS Distributions 
34*f6217f89SApple OSS Distributions #include "arm_matrix.h"
35*f6217f89SApple OSS Distributions 
36*f6217f89SApple OSS Distributions const static unsigned int SME_Z_VECTORS = 32;
37*f6217f89SApple OSS Distributions const static unsigned int SME_P_VECTORS = 16;
38*f6217f89SApple OSS Distributions 
39*f6217f89SApple OSS Distributions static unsigned int
sme_version(void)40*f6217f89SApple OSS Distributions sme_version(void)
41*f6217f89SApple OSS Distributions {
42*f6217f89SApple OSS Distributions 	static unsigned int ret = 0;
43*f6217f89SApple OSS Distributions 	static bool already_read = false;
44*f6217f89SApple OSS Distributions 
45*f6217f89SApple OSS Distributions 	if (!already_read) {
46*f6217f89SApple OSS Distributions 		size_t size = sizeof(unsigned int);
47*f6217f89SApple OSS Distributions 		unsigned int feat_sme, feat_sme2;
48*f6217f89SApple OSS Distributions 		sysctlbyname("hw.optional.arm.FEAT_SME", &feat_sme, &size, NULL, 0);
49*f6217f89SApple OSS Distributions 		sysctlbyname("hw.optional.arm.FEAT_SME2", &feat_sme2, &size, NULL, 0);
50*f6217f89SApple OSS Distributions 
51*f6217f89SApple OSS Distributions 		if (feat_sme2) {
52*f6217f89SApple OSS Distributions 			ret = 2;
53*f6217f89SApple OSS Distributions 		} else if (feat_sme) {
54*f6217f89SApple OSS Distributions 			ret = 1;
55*f6217f89SApple OSS Distributions 		} else {
56*f6217f89SApple OSS Distributions 			ret = 0;
57*f6217f89SApple OSS Distributions 		}
58*f6217f89SApple OSS Distributions 
59*f6217f89SApple OSS Distributions 		already_read = true;
60*f6217f89SApple OSS Distributions 	}
61*f6217f89SApple OSS Distributions 
62*f6217f89SApple OSS Distributions 	return ret;
63*f6217f89SApple OSS Distributions }
64*f6217f89SApple OSS Distributions 
65*f6217f89SApple OSS Distributions static uint16_t
arm_sme_svl_b(void)66*f6217f89SApple OSS Distributions arm_sme_svl_b(void)
67*f6217f89SApple OSS Distributions {
68*f6217f89SApple OSS Distributions 	uint64_t ret = 0;
69*f6217f89SApple OSS Distributions 	asm volatile (
70*f6217f89SApple OSS Distributions                 "rdsvl	%[ret], #1"
71*f6217f89SApple OSS Distributions                 : [ret] "=r"(ret)
72*f6217f89SApple OSS Distributions         );
73*f6217f89SApple OSS Distributions 	return (uint16_t)ret;
74*f6217f89SApple OSS Distributions }
75*f6217f89SApple OSS Distributions 
76*f6217f89SApple OSS Distributions static size_t
sme_za_size(void)77*f6217f89SApple OSS Distributions sme_za_size(void)
78*f6217f89SApple OSS Distributions {
79*f6217f89SApple OSS Distributions 	return arm_sme_svl_b() * arm_sme_svl_b();
80*f6217f89SApple OSS Distributions }
81*f6217f89SApple OSS Distributions 
82*f6217f89SApple OSS Distributions static size_t
sme_z_size(void)83*f6217f89SApple OSS Distributions sme_z_size(void)
84*f6217f89SApple OSS Distributions {
85*f6217f89SApple OSS Distributions 	return arm_sme_svl_b() * SME_Z_VECTORS;
86*f6217f89SApple OSS Distributions }
87*f6217f89SApple OSS Distributions 
88*f6217f89SApple OSS Distributions static size_t
sme_p_size(void)89*f6217f89SApple OSS Distributions sme_p_size(void)
90*f6217f89SApple OSS Distributions {
91*f6217f89SApple OSS Distributions 	return arm_sme_svl_b() * SME_P_VECTORS / 8;
92*f6217f89SApple OSS Distributions }
93*f6217f89SApple OSS Distributions 
94*f6217f89SApple OSS Distributions static size_t
sme_zt0_size(void)95*f6217f89SApple OSS Distributions sme_zt0_size(void)
96*f6217f89SApple OSS Distributions {
97*f6217f89SApple OSS Distributions 	if (sme_version() >= 2) {
98*f6217f89SApple OSS Distributions 		return 64;
99*f6217f89SApple OSS Distributions 	} else {
100*f6217f89SApple OSS Distributions 		return 0;
101*f6217f89SApple OSS Distributions 	}
102*f6217f89SApple OSS Distributions }
103*f6217f89SApple OSS Distributions 
104*f6217f89SApple OSS Distributions static size_t
sme_tpidr2_size(void)105*f6217f89SApple OSS Distributions sme_tpidr2_size(void)
106*f6217f89SApple OSS Distributions {
107*f6217f89SApple OSS Distributions 	return sizeof(uint64_t);
108*f6217f89SApple OSS Distributions }
109*f6217f89SApple OSS Distributions 
110*f6217f89SApple OSS Distributions static inline uint8_t *
sme_za(void * addr)111*f6217f89SApple OSS Distributions sme_za(void *addr)
112*f6217f89SApple OSS Distributions {
113*f6217f89SApple OSS Distributions 	return addr;
114*f6217f89SApple OSS Distributions }
115*f6217f89SApple OSS Distributions 
116*f6217f89SApple OSS Distributions static inline const uint8_t *
const_sme_za(const void * addr)117*f6217f89SApple OSS Distributions const_sme_za(const void *addr)
118*f6217f89SApple OSS Distributions {
119*f6217f89SApple OSS Distributions 	return addr;
120*f6217f89SApple OSS Distributions }
121*f6217f89SApple OSS Distributions 
122*f6217f89SApple OSS Distributions static inline uint8_t *
sme_z(void * addr)123*f6217f89SApple OSS Distributions sme_z(void *addr)
124*f6217f89SApple OSS Distributions {
125*f6217f89SApple OSS Distributions 	return sme_za(addr) + sme_za_size();
126*f6217f89SApple OSS Distributions }
127*f6217f89SApple OSS Distributions 
128*f6217f89SApple OSS Distributions static inline const uint8_t *
const_sme_z(const void * addr)129*f6217f89SApple OSS Distributions const_sme_z(const void *addr)
130*f6217f89SApple OSS Distributions {
131*f6217f89SApple OSS Distributions 	return const_sme_za(addr) + sme_za_size();
132*f6217f89SApple OSS Distributions }
133*f6217f89SApple OSS Distributions 
134*f6217f89SApple OSS Distributions static inline uint8_t *
sme_p(void * addr)135*f6217f89SApple OSS Distributions sme_p(void *addr)
136*f6217f89SApple OSS Distributions {
137*f6217f89SApple OSS Distributions 	return sme_z(addr) + sme_z_size();
138*f6217f89SApple OSS Distributions }
139*f6217f89SApple OSS Distributions 
140*f6217f89SApple OSS Distributions static inline const uint8_t *
const_sme_p(const void * addr)141*f6217f89SApple OSS Distributions const_sme_p(const void *addr)
142*f6217f89SApple OSS Distributions {
143*f6217f89SApple OSS Distributions 	return const_sme_z(addr) + sme_z_size();
144*f6217f89SApple OSS Distributions }
145*f6217f89SApple OSS Distributions 
146*f6217f89SApple OSS Distributions static inline uint8_t *
sme_zt0(void * addr)147*f6217f89SApple OSS Distributions sme_zt0(void *addr)
148*f6217f89SApple OSS Distributions {
149*f6217f89SApple OSS Distributions 	return sme_p(addr) + sme_p_size();
150*f6217f89SApple OSS Distributions }
151*f6217f89SApple OSS Distributions 
152*f6217f89SApple OSS Distributions static inline const uint8_t *
const_sme_zt0(const void * addr)153*f6217f89SApple OSS Distributions const_sme_zt0(const void *addr)
154*f6217f89SApple OSS Distributions {
155*f6217f89SApple OSS Distributions 	return const_sme_p(addr) + sme_p_size();
156*f6217f89SApple OSS Distributions }
157*f6217f89SApple OSS Distributions 
158*f6217f89SApple OSS Distributions static size_t
sme_data_size(void)159*f6217f89SApple OSS Distributions sme_data_size(void)
160*f6217f89SApple OSS Distributions {
161*f6217f89SApple OSS Distributions 	return sme_za_size() + sme_z_size() + sme_p_size() + sme_zt0_size() + sme_tpidr2_size();
162*f6217f89SApple OSS Distributions }
163*f6217f89SApple OSS Distributions 
164*f6217f89SApple OSS Distributions static inline void
set_sme_tpidr2_el0(void * addr,uint64_t val)165*f6217f89SApple OSS Distributions set_sme_tpidr2_el0(void *addr, uint64_t val)
166*f6217f89SApple OSS Distributions {
167*f6217f89SApple OSS Distributions 	uint64_t *ptr = (uint64_t *)(sme_zt0(addr) + sme_zt0_size());
168*f6217f89SApple OSS Distributions 	*ptr = val;
169*f6217f89SApple OSS Distributions }
170*f6217f89SApple OSS Distributions 
171*f6217f89SApple OSS Distributions static inline uint64_t
get_sme_tpidr2_el0(const void * addr)172*f6217f89SApple OSS Distributions get_sme_tpidr2_el0(const void *addr)
173*f6217f89SApple OSS Distributions {
174*f6217f89SApple OSS Distributions 	const uint64_t *ptr = (const uint64_t *)(const_sme_zt0(addr) + sme_zt0_size());
175*f6217f89SApple OSS Distributions 	return *ptr;
176*f6217f89SApple OSS Distributions }
177*f6217f89SApple OSS Distributions 
178*f6217f89SApple OSS Distributions static void *
sme_alloc_data(void)179*f6217f89SApple OSS Distributions sme_alloc_data(void)
180*f6217f89SApple OSS Distributions {
181*f6217f89SApple OSS Distributions 	return malloc(sme_data_size());
182*f6217f89SApple OSS Distributions }
183*f6217f89SApple OSS Distributions 
184*f6217f89SApple OSS Distributions static bool
sme_is_available(void)185*f6217f89SApple OSS Distributions sme_is_available(void)
186*f6217f89SApple OSS Distributions {
187*f6217f89SApple OSS Distributions 	return sme_version() > 0;
188*f6217f89SApple OSS Distributions }
189*f6217f89SApple OSS Distributions 
190*f6217f89SApple OSS Distributions static void
sme_start(void)191*f6217f89SApple OSS Distributions sme_start(void)
192*f6217f89SApple OSS Distributions {
193*f6217f89SApple OSS Distributions 	asm volatile ("smstart");
194*f6217f89SApple OSS Distributions }
195*f6217f89SApple OSS Distributions 
196*f6217f89SApple OSS Distributions static void
sme_stop(void)197*f6217f89SApple OSS Distributions sme_stop(void)
198*f6217f89SApple OSS Distributions {
199*f6217f89SApple OSS Distributions 	asm volatile ("smstop");
200*f6217f89SApple OSS Distributions }
201*f6217f89SApple OSS Distributions 
202*f6217f89SApple OSS Distributions static void
sme_load_one_vector(const void * addr)203*f6217f89SApple OSS Distributions sme_load_one_vector(const void *addr)
204*f6217f89SApple OSS Distributions {
205*f6217f89SApple OSS Distributions 	asm volatile (
206*f6217f89SApple OSS Distributions                 "mov    w12, #0"                "\n"
207*f6217f89SApple OSS Distributions                 "ldr    za[w12, #0], [%[addr]]" "\n"
208*f6217f89SApple OSS Distributions                 :
209*f6217f89SApple OSS Distributions                 : [addr] "r"(addr)
210*f6217f89SApple OSS Distributions                 : "w12"
211*f6217f89SApple OSS Distributions         );
212*f6217f89SApple OSS Distributions }
213*f6217f89SApple OSS Distributions 
214*f6217f89SApple OSS Distributions static void
sme_load_data(const void * addr)215*f6217f89SApple OSS Distributions sme_load_data(const void *addr)
216*f6217f89SApple OSS Distributions {
217*f6217f89SApple OSS Distributions 	const uint8_t *za = const_sme_za(addr);
218*f6217f89SApple OSS Distributions 	const uint8_t *z = const_sme_z(addr);
219*f6217f89SApple OSS Distributions 	const uint8_t *p = const_sme_p(addr);
220*f6217f89SApple OSS Distributions 	uint16_t svl_b = arm_sme_svl_b();
221*f6217f89SApple OSS Distributions 
222*f6217f89SApple OSS Distributions 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
223*f6217f89SApple OSS Distributions 		asm volatile (
224*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
225*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
226*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
227*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
228*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
229*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
230*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
231*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
232*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
233*f6217f89SApple OSS Distributions                         "ldr    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
234*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
235*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
236*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
237*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
238*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
239*f6217f89SApple OSS Distributions                         "ldr    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
240*f6217f89SApple OSS Distributions                         :
241*f6217f89SApple OSS Distributions                         : [i] "r"(i),
242*f6217f89SApple OSS Distributions                           [addr] "r"(za + (i * svl_b))
243*f6217f89SApple OSS Distributions                 );
244*f6217f89SApple OSS Distributions 	}
245*f6217f89SApple OSS Distributions 
246*f6217f89SApple OSS Distributions 	asm volatile (
247*f6217f89SApple OSS Distributions                 "ldr    z0, [%[z],   #0, mul vl]"        "\n"
248*f6217f89SApple OSS Distributions                 "ldr    z1, [%[z],   #1, mul vl]"        "\n"
249*f6217f89SApple OSS Distributions                 "ldr    z2, [%[z],   #2, mul vl]"        "\n"
250*f6217f89SApple OSS Distributions                 "ldr    z3, [%[z],   #3, mul vl]"        "\n"
251*f6217f89SApple OSS Distributions                 "ldr    z4, [%[z],   #4, mul vl]"        "\n"
252*f6217f89SApple OSS Distributions                 "ldr    z5, [%[z],   #5, mul vl]"        "\n"
253*f6217f89SApple OSS Distributions                 "ldr    z6, [%[z],   #6, mul vl]"        "\n"
254*f6217f89SApple OSS Distributions                 "ldr    z7, [%[z],   #7, mul vl]"        "\n"
255*f6217f89SApple OSS Distributions                 "ldr    z8, [%[z],   #8, mul vl]"        "\n"
256*f6217f89SApple OSS Distributions                 "ldr    z9, [%[z],   #9, mul vl]"        "\n"
257*f6217f89SApple OSS Distributions                 "ldr   z10, [%[z],  #10, mul vl]"        "\n"
258*f6217f89SApple OSS Distributions                 "ldr   z11, [%[z],  #11, mul vl]"        "\n"
259*f6217f89SApple OSS Distributions                 "ldr   z12, [%[z],  #12, mul vl]"        "\n"
260*f6217f89SApple OSS Distributions                 "ldr   z13, [%[z],  #13, mul vl]"        "\n"
261*f6217f89SApple OSS Distributions                 "ldr   z14, [%[z],  #14, mul vl]"        "\n"
262*f6217f89SApple OSS Distributions                 "ldr   z15, [%[z],  #15, mul vl]"        "\n"
263*f6217f89SApple OSS Distributions                 "ldr   z16, [%[z],  #16, mul vl]"        "\n"
264*f6217f89SApple OSS Distributions                 "ldr   z17, [%[z],  #17, mul vl]"        "\n"
265*f6217f89SApple OSS Distributions                 "ldr   z18, [%[z],  #18, mul vl]"        "\n"
266*f6217f89SApple OSS Distributions                 "ldr   z19, [%[z],  #19, mul vl]"        "\n"
267*f6217f89SApple OSS Distributions                 "ldr   z20, [%[z],  #20, mul vl]"        "\n"
268*f6217f89SApple OSS Distributions                 "ldr   z21, [%[z],  #21, mul vl]"        "\n"
269*f6217f89SApple OSS Distributions                 "ldr   z22, [%[z],  #22, mul vl]"        "\n"
270*f6217f89SApple OSS Distributions                 "ldr   z23, [%[z],  #23, mul vl]"        "\n"
271*f6217f89SApple OSS Distributions                 "ldr   z24, [%[z],  #24, mul vl]"        "\n"
272*f6217f89SApple OSS Distributions                 "ldr   z25, [%[z],  #25, mul vl]"        "\n"
273*f6217f89SApple OSS Distributions                 "ldr   z26, [%[z],  #26, mul vl]"        "\n"
274*f6217f89SApple OSS Distributions                 "ldr   z27, [%[z],  #27, mul vl]"        "\n"
275*f6217f89SApple OSS Distributions                 "ldr   z28, [%[z],  #28, mul vl]"        "\n"
276*f6217f89SApple OSS Distributions                 "ldr   z29, [%[z],  #29, mul vl]"        "\n"
277*f6217f89SApple OSS Distributions                 "ldr   z30, [%[z],  #30, mul vl]"        "\n"
278*f6217f89SApple OSS Distributions                 "ldr   z31, [%[z],  #31, mul vl]"        "\n"
279*f6217f89SApple OSS Distributions                 :
280*f6217f89SApple OSS Distributions                 : [z] "r"(z)
281*f6217f89SApple OSS Distributions         );
282*f6217f89SApple OSS Distributions 
283*f6217f89SApple OSS Distributions 	asm volatile (
284*f6217f89SApple OSS Distributions                 "ldr     p0, [%[p],  #0, mul vl]"        "\n"
285*f6217f89SApple OSS Distributions                 "ldr     p1, [%[p],  #1, mul vl]"        "\n"
286*f6217f89SApple OSS Distributions                 "ldr     p2, [%[p],  #2, mul vl]"        "\n"
287*f6217f89SApple OSS Distributions                 "ldr     p3, [%[p],  #3, mul vl]"        "\n"
288*f6217f89SApple OSS Distributions                 "ldr     p4, [%[p],  #4, mul vl]"        "\n"
289*f6217f89SApple OSS Distributions                 "ldr     p5, [%[p],  #5, mul vl]"        "\n"
290*f6217f89SApple OSS Distributions                 "ldr     p6, [%[p],  #6, mul vl]"        "\n"
291*f6217f89SApple OSS Distributions                 "ldr     p7, [%[p],  #7, mul vl]"        "\n"
292*f6217f89SApple OSS Distributions                 "ldr     p8, [%[p],  #8, mul vl]"        "\n"
293*f6217f89SApple OSS Distributions                 "ldr     p9, [%[p],  #9, mul vl]"        "\n"
294*f6217f89SApple OSS Distributions                 "ldr    p10, [%[p], #10, mul vl]"        "\n"
295*f6217f89SApple OSS Distributions                 "ldr    p11, [%[p], #11, mul vl]"        "\n"
296*f6217f89SApple OSS Distributions                 "ldr    p12, [%[p], #12, mul vl]"        "\n"
297*f6217f89SApple OSS Distributions                 "ldr    p13, [%[p], #13, mul vl]"        "\n"
298*f6217f89SApple OSS Distributions                 "ldr    p14, [%[p], #14, mul vl]"        "\n"
299*f6217f89SApple OSS Distributions                 "ldr    p15, [%[p], #15, mul vl]"        "\n"
300*f6217f89SApple OSS Distributions                 :
301*f6217f89SApple OSS Distributions                 : [p] "r"(p)
302*f6217f89SApple OSS Distributions         );
303*f6217f89SApple OSS Distributions 
304*f6217f89SApple OSS Distributions 	if (sme_zt0_size()) {
305*f6217f89SApple OSS Distributions 		const uint8_t *zt0 = const_sme_zt0(addr);
306*f6217f89SApple OSS Distributions 		asm volatile (
307*f6217f89SApple OSS Distributions                         "ldr	zt0, [%[zt0]]"
308*f6217f89SApple OSS Distributions                         :
309*f6217f89SApple OSS Distributions                         : [zt0] "r"(zt0)
310*f6217f89SApple OSS Distributions                 );
311*f6217f89SApple OSS Distributions 	}
312*f6217f89SApple OSS Distributions 
313*f6217f89SApple OSS Distributions 	__builtin_arm_wsr64("TPIDR2_EL0", get_sme_tpidr2_el0(addr));
314*f6217f89SApple OSS Distributions }
315*f6217f89SApple OSS Distributions 
316*f6217f89SApple OSS Distributions static void
sme_store_data(void * addr)317*f6217f89SApple OSS Distributions sme_store_data(void *addr)
318*f6217f89SApple OSS Distributions {
319*f6217f89SApple OSS Distributions 	uint8_t *za = sme_za(addr);
320*f6217f89SApple OSS Distributions 	uint8_t *z = sme_z(addr);
321*f6217f89SApple OSS Distributions 	uint8_t *p = sme_p(addr);
322*f6217f89SApple OSS Distributions 	uint16_t svl_b = arm_sme_svl_b();
323*f6217f89SApple OSS Distributions 
324*f6217f89SApple OSS Distributions 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
325*f6217f89SApple OSS Distributions 		asm volatile (
326*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
327*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
328*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
329*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
330*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
331*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
332*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
333*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
334*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
335*f6217f89SApple OSS Distributions                         "str    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
336*f6217f89SApple OSS Distributions                         "str    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
337*f6217f89SApple OSS Distributions                         "str    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
338*f6217f89SApple OSS Distributions                         "str    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
339*f6217f89SApple OSS Distributions                         "str    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
340*f6217f89SApple OSS Distributions                         "str    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
341*f6217f89SApple OSS Distributions                         "str    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
342*f6217f89SApple OSS Distributions                         :
343*f6217f89SApple OSS Distributions                         : [i] "r"(i),
344*f6217f89SApple OSS Distributions                           [addr] "r"(za + (i * svl_b))
345*f6217f89SApple OSS Distributions                 );
346*f6217f89SApple OSS Distributions 	}
347*f6217f89SApple OSS Distributions 
348*f6217f89SApple OSS Distributions 	asm volatile (
349*f6217f89SApple OSS Distributions                 "str    z0, [%[z],   #0, mul vl]"        "\n"
350*f6217f89SApple OSS Distributions                 "str    z1, [%[z],   #1, mul vl]"        "\n"
351*f6217f89SApple OSS Distributions                 "str    z2, [%[z],   #2, mul vl]"        "\n"
352*f6217f89SApple OSS Distributions                 "str    z3, [%[z],   #3, mul vl]"        "\n"
353*f6217f89SApple OSS Distributions                 "str    z4, [%[z],   #4, mul vl]"        "\n"
354*f6217f89SApple OSS Distributions                 "str    z5, [%[z],   #5, mul vl]"        "\n"
355*f6217f89SApple OSS Distributions                 "str    z6, [%[z],   #6, mul vl]"        "\n"
356*f6217f89SApple OSS Distributions                 "str    z7, [%[z],   #7, mul vl]"        "\n"
357*f6217f89SApple OSS Distributions                 "str    z8, [%[z],   #8, mul vl]"        "\n"
358*f6217f89SApple OSS Distributions                 "str    z9, [%[z],   #9, mul vl]"        "\n"
359*f6217f89SApple OSS Distributions                 "str   z10, [%[z],  #10, mul vl]"        "\n"
360*f6217f89SApple OSS Distributions                 "str   z11, [%[z],  #11, mul vl]"        "\n"
361*f6217f89SApple OSS Distributions                 "str   z12, [%[z],  #12, mul vl]"        "\n"
362*f6217f89SApple OSS Distributions                 "str   z13, [%[z],  #13, mul vl]"        "\n"
363*f6217f89SApple OSS Distributions                 "str   z14, [%[z],  #14, mul vl]"        "\n"
364*f6217f89SApple OSS Distributions                 "str   z15, [%[z],  #15, mul vl]"        "\n"
365*f6217f89SApple OSS Distributions                 "str   z16, [%[z],  #16, mul vl]"        "\n"
366*f6217f89SApple OSS Distributions                 "str   z17, [%[z],  #17, mul vl]"        "\n"
367*f6217f89SApple OSS Distributions                 "str   z18, [%[z],  #18, mul vl]"        "\n"
368*f6217f89SApple OSS Distributions                 "str   z19, [%[z],  #19, mul vl]"        "\n"
369*f6217f89SApple OSS Distributions                 "str   z20, [%[z],  #20, mul vl]"        "\n"
370*f6217f89SApple OSS Distributions                 "str   z21, [%[z],  #21, mul vl]"        "\n"
371*f6217f89SApple OSS Distributions                 "str   z22, [%[z],  #22, mul vl]"        "\n"
372*f6217f89SApple OSS Distributions                 "str   z23, [%[z],  #23, mul vl]"        "\n"
373*f6217f89SApple OSS Distributions                 "str   z24, [%[z],  #24, mul vl]"        "\n"
374*f6217f89SApple OSS Distributions                 "str   z25, [%[z],  #25, mul vl]"        "\n"
375*f6217f89SApple OSS Distributions                 "str   z26, [%[z],  #26, mul vl]"        "\n"
376*f6217f89SApple OSS Distributions                 "str   z27, [%[z],  #27, mul vl]"        "\n"
377*f6217f89SApple OSS Distributions                 "str   z28, [%[z],  #28, mul vl]"        "\n"
378*f6217f89SApple OSS Distributions                 "str   z29, [%[z],  #29, mul vl]"        "\n"
379*f6217f89SApple OSS Distributions                 "str   z30, [%[z],  #30, mul vl]"        "\n"
380*f6217f89SApple OSS Distributions                 "str   z31, [%[z],  #31, mul vl]"        "\n"
381*f6217f89SApple OSS Distributions                 :
382*f6217f89SApple OSS Distributions                 : [z] "r"(z)
383*f6217f89SApple OSS Distributions         );
384*f6217f89SApple OSS Distributions 
385*f6217f89SApple OSS Distributions 	asm volatile (
386*f6217f89SApple OSS Distributions                 "str     p0, [%[p],  #0, mul vl]"        "\n"
387*f6217f89SApple OSS Distributions                 "str     p1, [%[p],  #1, mul vl]"        "\n"
388*f6217f89SApple OSS Distributions                 "str     p2, [%[p],  #2, mul vl]"        "\n"
389*f6217f89SApple OSS Distributions                 "str     p3, [%[p],  #3, mul vl]"        "\n"
390*f6217f89SApple OSS Distributions                 "str     p4, [%[p],  #4, mul vl]"        "\n"
391*f6217f89SApple OSS Distributions                 "str     p5, [%[p],  #5, mul vl]"        "\n"
392*f6217f89SApple OSS Distributions                 "str     p6, [%[p],  #6, mul vl]"        "\n"
393*f6217f89SApple OSS Distributions                 "str     p7, [%[p],  #7, mul vl]"        "\n"
394*f6217f89SApple OSS Distributions                 "str     p8, [%[p],  #8, mul vl]"        "\n"
395*f6217f89SApple OSS Distributions                 "str     p9, [%[p],  #9, mul vl]"        "\n"
396*f6217f89SApple OSS Distributions                 "str    p10, [%[p], #10, mul vl]"        "\n"
397*f6217f89SApple OSS Distributions                 "str    p11, [%[p], #11, mul vl]"        "\n"
398*f6217f89SApple OSS Distributions                 "str    p12, [%[p], #12, mul vl]"        "\n"
399*f6217f89SApple OSS Distributions                 "str    p13, [%[p], #13, mul vl]"        "\n"
400*f6217f89SApple OSS Distributions                 "str    p14, [%[p], #14, mul vl]"        "\n"
401*f6217f89SApple OSS Distributions                 "str    p15, [%[p], #15, mul vl]"        "\n"
402*f6217f89SApple OSS Distributions                 :
403*f6217f89SApple OSS Distributions                 : [p] "r"(p)
404*f6217f89SApple OSS Distributions         );
405*f6217f89SApple OSS Distributions 
406*f6217f89SApple OSS Distributions 	if (sme_zt0_size()) {
407*f6217f89SApple OSS Distributions 		uint8_t *zt0 = sme_zt0(addr);
408*f6217f89SApple OSS Distributions 		asm volatile (
409*f6217f89SApple OSS Distributions                         "str	zt0, [%[zt0]]"
410*f6217f89SApple OSS Distributions                         :
411*f6217f89SApple OSS Distributions                         : [zt0] "r"(zt0)
412*f6217f89SApple OSS Distributions                 );
413*f6217f89SApple OSS Distributions 	}
414*f6217f89SApple OSS Distributions 
415*f6217f89SApple OSS Distributions 	set_sme_tpidr2_el0(addr, __builtin_arm_rsr64("TPIDR2_EL0"));
416*f6217f89SApple OSS Distributions }
417*f6217f89SApple OSS Distributions 
418*f6217f89SApple OSS Distributions static kern_return_t
sme_thread_get_state(thread_act_t thread,void * addr)419*f6217f89SApple OSS Distributions sme_thread_get_state(thread_act_t thread, void *addr)
420*f6217f89SApple OSS Distributions {
421*f6217f89SApple OSS Distributions 	uint8_t *za = sme_za(addr);
422*f6217f89SApple OSS Distributions 	uint8_t *z = sme_z(addr);
423*f6217f89SApple OSS Distributions 	uint8_t *p = sme_p(addr);
424*f6217f89SApple OSS Distributions 	uint16_t svl_b = arm_sme_svl_b();
425*f6217f89SApple OSS Distributions 
426*f6217f89SApple OSS Distributions 	arm_sme_state_t sme_state;
427*f6217f89SApple OSS Distributions 	mach_msg_type_number_t sme_count = ARM_SME_STATE_COUNT;
428*f6217f89SApple OSS Distributions 	kern_return_t err = thread_get_state(thread, ARM_SME_STATE, (thread_state_t)&sme_state, &sme_count);
429*f6217f89SApple OSS Distributions 	if (err) {
430*f6217f89SApple OSS Distributions 		return err;
431*f6217f89SApple OSS Distributions 	}
432*f6217f89SApple OSS Distributions 	set_sme_tpidr2_el0(addr, sme_state.__tpidr2_el0);
433*f6217f89SApple OSS Distributions 
434*f6217f89SApple OSS Distributions 	arm_sme_za_state_t za_state;
435*f6217f89SApple OSS Distributions 	mach_msg_type_number_t za_count = ARM_SME_ZA_STATE_COUNT;
436*f6217f89SApple OSS Distributions 	err = thread_get_state(thread, ARM_SME_ZA_STATE1, (thread_state_t)&za_state, &za_count);
437*f6217f89SApple OSS Distributions 	if (err) {
438*f6217f89SApple OSS Distributions 		return err;
439*f6217f89SApple OSS Distributions 	}
440*f6217f89SApple OSS Distributions 
441*f6217f89SApple OSS Distributions 	arm_sve_z_state_t z_state1, z_state2;
442*f6217f89SApple OSS Distributions 	mach_msg_type_number_t z_streaming_count = ARM_SVE_Z_STATE_COUNT;
443*f6217f89SApple OSS Distributions 	err = thread_get_state(thread, ARM_SVE_Z_STATE1, (thread_state_t)&z_state1, &z_streaming_count);
444*f6217f89SApple OSS Distributions 	if (err) {
445*f6217f89SApple OSS Distributions 		return err;
446*f6217f89SApple OSS Distributions 	}
447*f6217f89SApple OSS Distributions 	err = thread_get_state(thread, ARM_SVE_Z_STATE2, (thread_state_t)&z_state2, &z_streaming_count);
448*f6217f89SApple OSS Distributions 	if (err) {
449*f6217f89SApple OSS Distributions 		return err;
450*f6217f89SApple OSS Distributions 	}
451*f6217f89SApple OSS Distributions 
452*f6217f89SApple OSS Distributions 	arm_sve_p_state_t p_state;
453*f6217f89SApple OSS Distributions 	mach_msg_type_number_t p_streaming_count = ARM_SVE_P_STATE_COUNT;
454*f6217f89SApple OSS Distributions 	err = thread_get_state(thread, ARM_SVE_P_STATE, (thread_state_t)&p_state, &p_streaming_count);
455*f6217f89SApple OSS Distributions 	if (err) {
456*f6217f89SApple OSS Distributions 		return err;
457*f6217f89SApple OSS Distributions 	}
458*f6217f89SApple OSS Distributions 
459*f6217f89SApple OSS Distributions 	memcpy(za, za_state.__za, svl_b * svl_b);
460*f6217f89SApple OSS Distributions 
461*f6217f89SApple OSS Distributions 	size_t z_elem_size = svl_b;
462*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
463*f6217f89SApple OSS Distributions 		memcpy(z, z_state1.__z[i], z_elem_size);
464*f6217f89SApple OSS Distributions 		z += z_elem_size;
465*f6217f89SApple OSS Distributions 	}
466*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
467*f6217f89SApple OSS Distributions 		memcpy(z, z_state2.__z[i], z_elem_size);
468*f6217f89SApple OSS Distributions 		z += z_elem_size;
469*f6217f89SApple OSS Distributions 	}
470*f6217f89SApple OSS Distributions 
471*f6217f89SApple OSS Distributions 	size_t p_elem_size = svl_b / 8;
472*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
473*f6217f89SApple OSS Distributions 		memcpy(p, p_state.__p[i], p_elem_size);
474*f6217f89SApple OSS Distributions 		p += p_elem_size;
475*f6217f89SApple OSS Distributions 	}
476*f6217f89SApple OSS Distributions 
477*f6217f89SApple OSS Distributions 	if (sme_zt0_size()) {
478*f6217f89SApple OSS Distributions 		uint8_t *zt0 = sme_zt0(addr);
479*f6217f89SApple OSS Distributions 
480*f6217f89SApple OSS Distributions 		arm_sme2_state_t sme2_state;
481*f6217f89SApple OSS Distributions 		mach_msg_type_number_t sme2_count = ARM_SME2_STATE_COUNT;
482*f6217f89SApple OSS Distributions 		err = thread_get_state(thread, ARM_SME2_STATE, (thread_state_t)&sme2_state, &sme2_count);
483*f6217f89SApple OSS Distributions 		if (err) {
484*f6217f89SApple OSS Distributions 			return err;
485*f6217f89SApple OSS Distributions 		}
486*f6217f89SApple OSS Distributions 
487*f6217f89SApple OSS Distributions 		memcpy(zt0, sme2_state.__zt0, sizeof(sme2_state.__zt0));
488*f6217f89SApple OSS Distributions 	}
489*f6217f89SApple OSS Distributions 
490*f6217f89SApple OSS Distributions 	return KERN_SUCCESS;
491*f6217f89SApple OSS Distributions }
492*f6217f89SApple OSS Distributions 
493*f6217f89SApple OSS Distributions static kern_return_t
sme_thread_set_state(thread_act_t thread,const void * addr)494*f6217f89SApple OSS Distributions sme_thread_set_state(thread_act_t thread, const void *addr)
495*f6217f89SApple OSS Distributions {
496*f6217f89SApple OSS Distributions 	const uint8_t *za = const_sme_za(addr);
497*f6217f89SApple OSS Distributions 	const uint8_t *z = const_sme_z(addr);
498*f6217f89SApple OSS Distributions 	const uint8_t *p = const_sme_p(addr);
499*f6217f89SApple OSS Distributions 	uint16_t svl_b = arm_sme_svl_b();
500*f6217f89SApple OSS Distributions 
501*f6217f89SApple OSS Distributions 	arm_sme_state_t sme_state;
502*f6217f89SApple OSS Distributions 	sme_state.__svcr = 0x3;
503*f6217f89SApple OSS Distributions 	sme_state.__svl_b = svl_b;
504*f6217f89SApple OSS Distributions 	sme_state.__tpidr2_el0 = get_sme_tpidr2_el0(addr);
505*f6217f89SApple OSS Distributions 
506*f6217f89SApple OSS Distributions 	arm_sme_za_state_t za_state;
507*f6217f89SApple OSS Distributions 	memcpy(za_state.__za, za, svl_b * svl_b);
508*f6217f89SApple OSS Distributions 
509*f6217f89SApple OSS Distributions 	arm_sve_z_state_t z_state1, z_state2;
510*f6217f89SApple OSS Distributions 	size_t z_elem_size = svl_b;
511*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
512*f6217f89SApple OSS Distributions 		memcpy(z_state1.__z[i], z, z_elem_size);
513*f6217f89SApple OSS Distributions 		z += z_elem_size;
514*f6217f89SApple OSS Distributions 	}
515*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
516*f6217f89SApple OSS Distributions 		memcpy(z_state2.__z[i], z, z_elem_size);
517*f6217f89SApple OSS Distributions 		z += z_elem_size;
518*f6217f89SApple OSS Distributions 	}
519*f6217f89SApple OSS Distributions 
520*f6217f89SApple OSS Distributions 	arm_sve_p_state_t p_state;
521*f6217f89SApple OSS Distributions 	size_t p_elem_size = svl_b / 8;
522*f6217f89SApple OSS Distributions 	for (int i = 0; i < 16; i++) {
523*f6217f89SApple OSS Distributions 		memcpy(p_state.__p[i], p, p_elem_size);
524*f6217f89SApple OSS Distributions 		p += p_elem_size;
525*f6217f89SApple OSS Distributions 	}
526*f6217f89SApple OSS Distributions 
527*f6217f89SApple OSS Distributions 	kern_return_t err = thread_set_state(thread, ARM_SME_STATE, (thread_state_t)&sme_state, ARM_SME_STATE_COUNT);
528*f6217f89SApple OSS Distributions 	if (err) {
529*f6217f89SApple OSS Distributions 		return err;
530*f6217f89SApple OSS Distributions 	}
531*f6217f89SApple OSS Distributions 
532*f6217f89SApple OSS Distributions 	err = thread_set_state(thread, ARM_SVE_Z_STATE1, (thread_state_t)&z_state1, ARM_SVE_Z_STATE_COUNT);
533*f6217f89SApple OSS Distributions 	if (err) {
534*f6217f89SApple OSS Distributions 		return err;
535*f6217f89SApple OSS Distributions 	}
536*f6217f89SApple OSS Distributions 
537*f6217f89SApple OSS Distributions 	err = thread_set_state(thread, ARM_SVE_Z_STATE2, (thread_state_t)&z_state2, ARM_SVE_Z_STATE_COUNT);
538*f6217f89SApple OSS Distributions 	if (err) {
539*f6217f89SApple OSS Distributions 		return err;
540*f6217f89SApple OSS Distributions 	}
541*f6217f89SApple OSS Distributions 
542*f6217f89SApple OSS Distributions 	err = thread_set_state(thread, ARM_SVE_P_STATE, (thread_state_t)&p_state, ARM_SVE_P_STATE_COUNT);
543*f6217f89SApple OSS Distributions 	if (err) {
544*f6217f89SApple OSS Distributions 		return err;
545*f6217f89SApple OSS Distributions 	}
546*f6217f89SApple OSS Distributions 
547*f6217f89SApple OSS Distributions 	err = thread_set_state(thread, ARM_SME_ZA_STATE1, (thread_state_t)&za_state, ARM_SME_ZA_STATE_COUNT);
548*f6217f89SApple OSS Distributions 	if (err) {
549*f6217f89SApple OSS Distributions 		return err;
550*f6217f89SApple OSS Distributions 	}
551*f6217f89SApple OSS Distributions 
552*f6217f89SApple OSS Distributions 	if (sme_zt0_size()) {
553*f6217f89SApple OSS Distributions 		const uint8_t *zt0 = const_sme_zt0(addr);
554*f6217f89SApple OSS Distributions 
555*f6217f89SApple OSS Distributions 		arm_sme2_state_t sme2_state;
556*f6217f89SApple OSS Distributions 		memcpy(sme2_state.__zt0, zt0, sizeof(sme2_state.__zt0));
557*f6217f89SApple OSS Distributions 
558*f6217f89SApple OSS Distributions 		err = thread_set_state(thread, ARM_SME2_STATE, (thread_state_t)&sme2_state, ARM_SME2_STATE_COUNT);
559*f6217f89SApple OSS Distributions 		if (err) {
560*f6217f89SApple OSS Distributions 			return err;
561*f6217f89SApple OSS Distributions 		}
562*f6217f89SApple OSS Distributions 	}
563*f6217f89SApple OSS Distributions 
564*f6217f89SApple OSS Distributions 	return KERN_SUCCESS;
565*f6217f89SApple OSS Distributions }
566*f6217f89SApple OSS Distributions 
567*f6217f89SApple OSS Distributions const struct arm_matrix_operations sme_operations = {
568*f6217f89SApple OSS Distributions 	.name = "SME",
569*f6217f89SApple OSS Distributions 
570*f6217f89SApple OSS Distributions 	.data_size = sme_data_size,
571*f6217f89SApple OSS Distributions 	.alloc_data = sme_alloc_data,
572*f6217f89SApple OSS Distributions 
573*f6217f89SApple OSS Distributions 	.is_available = sme_is_available,
574*f6217f89SApple OSS Distributions 	.start = sme_start,
575*f6217f89SApple OSS Distributions 	.stop = sme_stop,
576*f6217f89SApple OSS Distributions 
577*f6217f89SApple OSS Distributions 	.load_one_vector = sme_load_one_vector,
578*f6217f89SApple OSS Distributions 	.load_data = sme_load_data,
579*f6217f89SApple OSS Distributions 	.store_data = sme_store_data,
580*f6217f89SApple OSS Distributions 
581*f6217f89SApple OSS Distributions 	.thread_get_state = sme_thread_get_state,
582*f6217f89SApple OSS Distributions 	.thread_set_state = sme_thread_set_state,
583*f6217f89SApple OSS Distributions };
584