1*e3723e1fSApple OSS Distributions /*
2*e3723e1fSApple OSS Distributions * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3*e3723e1fSApple OSS Distributions *
4*e3723e1fSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5*e3723e1fSApple OSS Distributions *
6*e3723e1fSApple OSS Distributions * This file contains Original Code and/or Modifications of Original Code
7*e3723e1fSApple OSS Distributions * as defined in and that are subject to the Apple Public Source License
8*e3723e1fSApple OSS Distributions * Version 2.0 (the 'License'). You may not use this file except in
9*e3723e1fSApple OSS Distributions * compliance with the License. The rights granted to you under the License
10*e3723e1fSApple OSS Distributions * may not be used to create, or enable the creation or redistribution of,
11*e3723e1fSApple OSS Distributions * unlawful or unlicensed copies of an Apple operating system, or to
12*e3723e1fSApple OSS Distributions * circumvent, violate, or enable the circumvention or violation of, any
13*e3723e1fSApple OSS Distributions * terms of an Apple operating system software license agreement.
14*e3723e1fSApple OSS Distributions *
15*e3723e1fSApple OSS Distributions * Please obtain a copy of the License at
16*e3723e1fSApple OSS Distributions * http://www.opensource.apple.com/apsl/ and read it before using this file.
17*e3723e1fSApple OSS Distributions *
18*e3723e1fSApple OSS Distributions * The Original Code and all software distributed under the License are
19*e3723e1fSApple OSS Distributions * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20*e3723e1fSApple OSS Distributions * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21*e3723e1fSApple OSS Distributions * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22*e3723e1fSApple OSS Distributions * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23*e3723e1fSApple OSS Distributions * Please see the License for the specific language governing rights and
24*e3723e1fSApple OSS Distributions * limitations under the License.
25*e3723e1fSApple OSS Distributions *
26*e3723e1fSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27*e3723e1fSApple OSS Distributions */
28*e3723e1fSApple OSS Distributions
29*e3723e1fSApple OSS Distributions #include <mach/thread_act.h>
30*e3723e1fSApple OSS Distributions #include <stdint.h>
31*e3723e1fSApple OSS Distributions #include <stdlib.h>
32*e3723e1fSApple OSS Distributions #include <sys/sysctl.h>
33*e3723e1fSApple OSS Distributions
34*e3723e1fSApple OSS Distributions #include "arm_matrix.h"
35*e3723e1fSApple OSS Distributions
36*e3723e1fSApple OSS Distributions const static unsigned int SME_Z_VECTORS = 32;
37*e3723e1fSApple OSS Distributions const static unsigned int SME_P_VECTORS = 16;
38*e3723e1fSApple OSS Distributions
39*e3723e1fSApple OSS Distributions static unsigned int
sme_version(void)40*e3723e1fSApple OSS Distributions sme_version(void)
41*e3723e1fSApple OSS Distributions {
42*e3723e1fSApple OSS Distributions static unsigned int ret = 0;
43*e3723e1fSApple OSS Distributions static bool already_read = false;
44*e3723e1fSApple OSS Distributions
45*e3723e1fSApple OSS Distributions if (!already_read) {
46*e3723e1fSApple OSS Distributions size_t size = sizeof(unsigned int);
47*e3723e1fSApple OSS Distributions unsigned int feat_sme, feat_sme2;
48*e3723e1fSApple OSS Distributions sysctlbyname("hw.optional.arm.FEAT_SME", &feat_sme, &size, NULL, 0);
49*e3723e1fSApple OSS Distributions sysctlbyname("hw.optional.arm.FEAT_SME2", &feat_sme2, &size, NULL, 0);
50*e3723e1fSApple OSS Distributions
51*e3723e1fSApple OSS Distributions if (feat_sme2) {
52*e3723e1fSApple OSS Distributions ret = 2;
53*e3723e1fSApple OSS Distributions } else if (feat_sme) {
54*e3723e1fSApple OSS Distributions ret = 1;
55*e3723e1fSApple OSS Distributions } else {
56*e3723e1fSApple OSS Distributions ret = 0;
57*e3723e1fSApple OSS Distributions }
58*e3723e1fSApple OSS Distributions
59*e3723e1fSApple OSS Distributions already_read = true;
60*e3723e1fSApple OSS Distributions }
61*e3723e1fSApple OSS Distributions
62*e3723e1fSApple OSS Distributions return ret;
63*e3723e1fSApple OSS Distributions }
64*e3723e1fSApple OSS Distributions
65*e3723e1fSApple OSS Distributions static uint16_t
arm_sme_svl_b(void)66*e3723e1fSApple OSS Distributions arm_sme_svl_b(void)
67*e3723e1fSApple OSS Distributions {
68*e3723e1fSApple OSS Distributions uint64_t ret = 0;
69*e3723e1fSApple OSS Distributions asm volatile (
70*e3723e1fSApple OSS Distributions "rdsvl %[ret], #1"
71*e3723e1fSApple OSS Distributions : [ret] "=r"(ret)
72*e3723e1fSApple OSS Distributions );
73*e3723e1fSApple OSS Distributions return (uint16_t)ret;
74*e3723e1fSApple OSS Distributions }
75*e3723e1fSApple OSS Distributions
76*e3723e1fSApple OSS Distributions static size_t
sme_za_size(void)77*e3723e1fSApple OSS Distributions sme_za_size(void)
78*e3723e1fSApple OSS Distributions {
79*e3723e1fSApple OSS Distributions return arm_sme_svl_b() * arm_sme_svl_b();
80*e3723e1fSApple OSS Distributions }
81*e3723e1fSApple OSS Distributions
82*e3723e1fSApple OSS Distributions static size_t
sme_z_size(void)83*e3723e1fSApple OSS Distributions sme_z_size(void)
84*e3723e1fSApple OSS Distributions {
85*e3723e1fSApple OSS Distributions return arm_sme_svl_b() * SME_Z_VECTORS;
86*e3723e1fSApple OSS Distributions }
87*e3723e1fSApple OSS Distributions
88*e3723e1fSApple OSS Distributions static size_t
sme_p_size(void)89*e3723e1fSApple OSS Distributions sme_p_size(void)
90*e3723e1fSApple OSS Distributions {
91*e3723e1fSApple OSS Distributions return arm_sme_svl_b() * SME_P_VECTORS / 8;
92*e3723e1fSApple OSS Distributions }
93*e3723e1fSApple OSS Distributions
94*e3723e1fSApple OSS Distributions static size_t
sme_zt0_size(void)95*e3723e1fSApple OSS Distributions sme_zt0_size(void)
96*e3723e1fSApple OSS Distributions {
97*e3723e1fSApple OSS Distributions if (sme_version() >= 2) {
98*e3723e1fSApple OSS Distributions return 64;
99*e3723e1fSApple OSS Distributions } else {
100*e3723e1fSApple OSS Distributions return 0;
101*e3723e1fSApple OSS Distributions }
102*e3723e1fSApple OSS Distributions }
103*e3723e1fSApple OSS Distributions
104*e3723e1fSApple OSS Distributions static size_t
sme_tpidr2_size(void)105*e3723e1fSApple OSS Distributions sme_tpidr2_size(void)
106*e3723e1fSApple OSS Distributions {
107*e3723e1fSApple OSS Distributions return sizeof(uint64_t);
108*e3723e1fSApple OSS Distributions }
109*e3723e1fSApple OSS Distributions
110*e3723e1fSApple OSS Distributions static inline uint8_t *
sme_za(void * addr)111*e3723e1fSApple OSS Distributions sme_za(void *addr)
112*e3723e1fSApple OSS Distributions {
113*e3723e1fSApple OSS Distributions return addr;
114*e3723e1fSApple OSS Distributions }
115*e3723e1fSApple OSS Distributions
116*e3723e1fSApple OSS Distributions static inline const uint8_t *
const_sme_za(const void * addr)117*e3723e1fSApple OSS Distributions const_sme_za(const void *addr)
118*e3723e1fSApple OSS Distributions {
119*e3723e1fSApple OSS Distributions return addr;
120*e3723e1fSApple OSS Distributions }
121*e3723e1fSApple OSS Distributions
122*e3723e1fSApple OSS Distributions static inline uint8_t *
sme_z(void * addr)123*e3723e1fSApple OSS Distributions sme_z(void *addr)
124*e3723e1fSApple OSS Distributions {
125*e3723e1fSApple OSS Distributions return sme_za(addr) + sme_za_size();
126*e3723e1fSApple OSS Distributions }
127*e3723e1fSApple OSS Distributions
128*e3723e1fSApple OSS Distributions static inline const uint8_t *
const_sme_z(const void * addr)129*e3723e1fSApple OSS Distributions const_sme_z(const void *addr)
130*e3723e1fSApple OSS Distributions {
131*e3723e1fSApple OSS Distributions return const_sme_za(addr) + sme_za_size();
132*e3723e1fSApple OSS Distributions }
133*e3723e1fSApple OSS Distributions
134*e3723e1fSApple OSS Distributions static inline uint8_t *
sme_p(void * addr)135*e3723e1fSApple OSS Distributions sme_p(void *addr)
136*e3723e1fSApple OSS Distributions {
137*e3723e1fSApple OSS Distributions return sme_z(addr) + sme_z_size();
138*e3723e1fSApple OSS Distributions }
139*e3723e1fSApple OSS Distributions
140*e3723e1fSApple OSS Distributions static inline const uint8_t *
const_sme_p(const void * addr)141*e3723e1fSApple OSS Distributions const_sme_p(const void *addr)
142*e3723e1fSApple OSS Distributions {
143*e3723e1fSApple OSS Distributions return const_sme_z(addr) + sme_z_size();
144*e3723e1fSApple OSS Distributions }
145*e3723e1fSApple OSS Distributions
146*e3723e1fSApple OSS Distributions static inline uint8_t *
sme_zt0(void * addr)147*e3723e1fSApple OSS Distributions sme_zt0(void *addr)
148*e3723e1fSApple OSS Distributions {
149*e3723e1fSApple OSS Distributions return sme_p(addr) + sme_p_size();
150*e3723e1fSApple OSS Distributions }
151*e3723e1fSApple OSS Distributions
152*e3723e1fSApple OSS Distributions static inline const uint8_t *
const_sme_zt0(const void * addr)153*e3723e1fSApple OSS Distributions const_sme_zt0(const void *addr)
154*e3723e1fSApple OSS Distributions {
155*e3723e1fSApple OSS Distributions return const_sme_p(addr) + sme_p_size();
156*e3723e1fSApple OSS Distributions }
157*e3723e1fSApple OSS Distributions
158*e3723e1fSApple OSS Distributions static size_t
sme_data_size(void)159*e3723e1fSApple OSS Distributions sme_data_size(void)
160*e3723e1fSApple OSS Distributions {
161*e3723e1fSApple OSS Distributions return sme_za_size() + sme_z_size() + sme_p_size() + sme_zt0_size() + sme_tpidr2_size();
162*e3723e1fSApple OSS Distributions }
163*e3723e1fSApple OSS Distributions
164*e3723e1fSApple OSS Distributions static inline void
set_sme_tpidr2_el0(void * addr,uint64_t val)165*e3723e1fSApple OSS Distributions set_sme_tpidr2_el0(void *addr, uint64_t val)
166*e3723e1fSApple OSS Distributions {
167*e3723e1fSApple OSS Distributions uint64_t *ptr = (uint64_t *)(sme_zt0(addr) + sme_zt0_size());
168*e3723e1fSApple OSS Distributions *ptr = val;
169*e3723e1fSApple OSS Distributions }
170*e3723e1fSApple OSS Distributions
171*e3723e1fSApple OSS Distributions static inline uint64_t
get_sme_tpidr2_el0(const void * addr)172*e3723e1fSApple OSS Distributions get_sme_tpidr2_el0(const void *addr)
173*e3723e1fSApple OSS Distributions {
174*e3723e1fSApple OSS Distributions const uint64_t *ptr = (const uint64_t *)(const_sme_zt0(addr) + sme_zt0_size());
175*e3723e1fSApple OSS Distributions return *ptr;
176*e3723e1fSApple OSS Distributions }
177*e3723e1fSApple OSS Distributions
178*e3723e1fSApple OSS Distributions static void *
sme_alloc_data(void)179*e3723e1fSApple OSS Distributions sme_alloc_data(void)
180*e3723e1fSApple OSS Distributions {
181*e3723e1fSApple OSS Distributions return malloc(sme_data_size());
182*e3723e1fSApple OSS Distributions }
183*e3723e1fSApple OSS Distributions
184*e3723e1fSApple OSS Distributions static bool
sme_is_available(void)185*e3723e1fSApple OSS Distributions sme_is_available(void)
186*e3723e1fSApple OSS Distributions {
187*e3723e1fSApple OSS Distributions return sme_version() > 0;
188*e3723e1fSApple OSS Distributions }
189*e3723e1fSApple OSS Distributions
190*e3723e1fSApple OSS Distributions static void
sme_start(void)191*e3723e1fSApple OSS Distributions sme_start(void)
192*e3723e1fSApple OSS Distributions {
193*e3723e1fSApple OSS Distributions asm volatile ("smstart");
194*e3723e1fSApple OSS Distributions }
195*e3723e1fSApple OSS Distributions
196*e3723e1fSApple OSS Distributions static void
sme_stop(void)197*e3723e1fSApple OSS Distributions sme_stop(void)
198*e3723e1fSApple OSS Distributions {
199*e3723e1fSApple OSS Distributions asm volatile ("smstop");
200*e3723e1fSApple OSS Distributions }
201*e3723e1fSApple OSS Distributions
202*e3723e1fSApple OSS Distributions static void
sme_load_one_vector(const void * addr)203*e3723e1fSApple OSS Distributions sme_load_one_vector(const void *addr)
204*e3723e1fSApple OSS Distributions {
205*e3723e1fSApple OSS Distributions asm volatile (
206*e3723e1fSApple OSS Distributions "mov w12, #0" "\n"
207*e3723e1fSApple OSS Distributions "ldr za[w12, #0], [%[addr]]" "\n"
208*e3723e1fSApple OSS Distributions :
209*e3723e1fSApple OSS Distributions : [addr] "r"(addr)
210*e3723e1fSApple OSS Distributions : "w12"
211*e3723e1fSApple OSS Distributions );
212*e3723e1fSApple OSS Distributions }
213*e3723e1fSApple OSS Distributions
214*e3723e1fSApple OSS Distributions static void
sme_load_data(const void * addr)215*e3723e1fSApple OSS Distributions sme_load_data(const void *addr)
216*e3723e1fSApple OSS Distributions {
217*e3723e1fSApple OSS Distributions const uint8_t *za = const_sme_za(addr);
218*e3723e1fSApple OSS Distributions const uint8_t *z = const_sme_z(addr);
219*e3723e1fSApple OSS Distributions const uint8_t *p = const_sme_p(addr);
220*e3723e1fSApple OSS Distributions uint16_t svl_b = arm_sme_svl_b();
221*e3723e1fSApple OSS Distributions
222*e3723e1fSApple OSS Distributions for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
223*e3723e1fSApple OSS Distributions asm volatile (
224*e3723e1fSApple OSS Distributions "ldr za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
225*e3723e1fSApple OSS Distributions "ldr za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
226*e3723e1fSApple OSS Distributions "ldr za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
227*e3723e1fSApple OSS Distributions "ldr za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
228*e3723e1fSApple OSS Distributions "ldr za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
229*e3723e1fSApple OSS Distributions "ldr za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
230*e3723e1fSApple OSS Distributions "ldr za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
231*e3723e1fSApple OSS Distributions "ldr za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
232*e3723e1fSApple OSS Distributions "ldr za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
233*e3723e1fSApple OSS Distributions "ldr za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
234*e3723e1fSApple OSS Distributions "ldr za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
235*e3723e1fSApple OSS Distributions "ldr za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
236*e3723e1fSApple OSS Distributions "ldr za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
237*e3723e1fSApple OSS Distributions "ldr za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
238*e3723e1fSApple OSS Distributions "ldr za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
239*e3723e1fSApple OSS Distributions "ldr za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
240*e3723e1fSApple OSS Distributions :
241*e3723e1fSApple OSS Distributions : [i] "r"(i),
242*e3723e1fSApple OSS Distributions [addr] "r"(za + (i * svl_b))
243*e3723e1fSApple OSS Distributions );
244*e3723e1fSApple OSS Distributions }
245*e3723e1fSApple OSS Distributions
246*e3723e1fSApple OSS Distributions asm volatile (
247*e3723e1fSApple OSS Distributions "ldr z0, [%[z], #0, mul vl]" "\n"
248*e3723e1fSApple OSS Distributions "ldr z1, [%[z], #1, mul vl]" "\n"
249*e3723e1fSApple OSS Distributions "ldr z2, [%[z], #2, mul vl]" "\n"
250*e3723e1fSApple OSS Distributions "ldr z3, [%[z], #3, mul vl]" "\n"
251*e3723e1fSApple OSS Distributions "ldr z4, [%[z], #4, mul vl]" "\n"
252*e3723e1fSApple OSS Distributions "ldr z5, [%[z], #5, mul vl]" "\n"
253*e3723e1fSApple OSS Distributions "ldr z6, [%[z], #6, mul vl]" "\n"
254*e3723e1fSApple OSS Distributions "ldr z7, [%[z], #7, mul vl]" "\n"
255*e3723e1fSApple OSS Distributions "ldr z8, [%[z], #8, mul vl]" "\n"
256*e3723e1fSApple OSS Distributions "ldr z9, [%[z], #9, mul vl]" "\n"
257*e3723e1fSApple OSS Distributions "ldr z10, [%[z], #10, mul vl]" "\n"
258*e3723e1fSApple OSS Distributions "ldr z11, [%[z], #11, mul vl]" "\n"
259*e3723e1fSApple OSS Distributions "ldr z12, [%[z], #12, mul vl]" "\n"
260*e3723e1fSApple OSS Distributions "ldr z13, [%[z], #13, mul vl]" "\n"
261*e3723e1fSApple OSS Distributions "ldr z14, [%[z], #14, mul vl]" "\n"
262*e3723e1fSApple OSS Distributions "ldr z15, [%[z], #15, mul vl]" "\n"
263*e3723e1fSApple OSS Distributions "ldr z16, [%[z], #16, mul vl]" "\n"
264*e3723e1fSApple OSS Distributions "ldr z17, [%[z], #17, mul vl]" "\n"
265*e3723e1fSApple OSS Distributions "ldr z18, [%[z], #18, mul vl]" "\n"
266*e3723e1fSApple OSS Distributions "ldr z19, [%[z], #19, mul vl]" "\n"
267*e3723e1fSApple OSS Distributions "ldr z20, [%[z], #20, mul vl]" "\n"
268*e3723e1fSApple OSS Distributions "ldr z21, [%[z], #21, mul vl]" "\n"
269*e3723e1fSApple OSS Distributions "ldr z22, [%[z], #22, mul vl]" "\n"
270*e3723e1fSApple OSS Distributions "ldr z23, [%[z], #23, mul vl]" "\n"
271*e3723e1fSApple OSS Distributions "ldr z24, [%[z], #24, mul vl]" "\n"
272*e3723e1fSApple OSS Distributions "ldr z25, [%[z], #25, mul vl]" "\n"
273*e3723e1fSApple OSS Distributions "ldr z26, [%[z], #26, mul vl]" "\n"
274*e3723e1fSApple OSS Distributions "ldr z27, [%[z], #27, mul vl]" "\n"
275*e3723e1fSApple OSS Distributions "ldr z28, [%[z], #28, mul vl]" "\n"
276*e3723e1fSApple OSS Distributions "ldr z29, [%[z], #29, mul vl]" "\n"
277*e3723e1fSApple OSS Distributions "ldr z30, [%[z], #30, mul vl]" "\n"
278*e3723e1fSApple OSS Distributions "ldr z31, [%[z], #31, mul vl]" "\n"
279*e3723e1fSApple OSS Distributions :
280*e3723e1fSApple OSS Distributions : [z] "r"(z)
281*e3723e1fSApple OSS Distributions );
282*e3723e1fSApple OSS Distributions
283*e3723e1fSApple OSS Distributions asm volatile (
284*e3723e1fSApple OSS Distributions "ldr p0, [%[p], #0, mul vl]" "\n"
285*e3723e1fSApple OSS Distributions "ldr p1, [%[p], #1, mul vl]" "\n"
286*e3723e1fSApple OSS Distributions "ldr p2, [%[p], #2, mul vl]" "\n"
287*e3723e1fSApple OSS Distributions "ldr p3, [%[p], #3, mul vl]" "\n"
288*e3723e1fSApple OSS Distributions "ldr p4, [%[p], #4, mul vl]" "\n"
289*e3723e1fSApple OSS Distributions "ldr p5, [%[p], #5, mul vl]" "\n"
290*e3723e1fSApple OSS Distributions "ldr p6, [%[p], #6, mul vl]" "\n"
291*e3723e1fSApple OSS Distributions "ldr p7, [%[p], #7, mul vl]" "\n"
292*e3723e1fSApple OSS Distributions "ldr p8, [%[p], #8, mul vl]" "\n"
293*e3723e1fSApple OSS Distributions "ldr p9, [%[p], #9, mul vl]" "\n"
294*e3723e1fSApple OSS Distributions "ldr p10, [%[p], #10, mul vl]" "\n"
295*e3723e1fSApple OSS Distributions "ldr p11, [%[p], #11, mul vl]" "\n"
296*e3723e1fSApple OSS Distributions "ldr p12, [%[p], #12, mul vl]" "\n"
297*e3723e1fSApple OSS Distributions "ldr p13, [%[p], #13, mul vl]" "\n"
298*e3723e1fSApple OSS Distributions "ldr p14, [%[p], #14, mul vl]" "\n"
299*e3723e1fSApple OSS Distributions "ldr p15, [%[p], #15, mul vl]" "\n"
300*e3723e1fSApple OSS Distributions :
301*e3723e1fSApple OSS Distributions : [p] "r"(p)
302*e3723e1fSApple OSS Distributions );
303*e3723e1fSApple OSS Distributions
304*e3723e1fSApple OSS Distributions if (sme_zt0_size()) {
305*e3723e1fSApple OSS Distributions const uint8_t *zt0 = const_sme_zt0(addr);
306*e3723e1fSApple OSS Distributions asm volatile (
307*e3723e1fSApple OSS Distributions "ldr zt0, [%[zt0]]"
308*e3723e1fSApple OSS Distributions :
309*e3723e1fSApple OSS Distributions : [zt0] "r"(zt0)
310*e3723e1fSApple OSS Distributions );
311*e3723e1fSApple OSS Distributions }
312*e3723e1fSApple OSS Distributions
313*e3723e1fSApple OSS Distributions __builtin_arm_wsr64("TPIDR2_EL0", get_sme_tpidr2_el0(addr));
314*e3723e1fSApple OSS Distributions }
315*e3723e1fSApple OSS Distributions
316*e3723e1fSApple OSS Distributions static void
sme_store_data(void * addr)317*e3723e1fSApple OSS Distributions sme_store_data(void *addr)
318*e3723e1fSApple OSS Distributions {
319*e3723e1fSApple OSS Distributions uint8_t *za = sme_za(addr);
320*e3723e1fSApple OSS Distributions uint8_t *z = sme_z(addr);
321*e3723e1fSApple OSS Distributions uint8_t *p = sme_p(addr);
322*e3723e1fSApple OSS Distributions uint16_t svl_b = arm_sme_svl_b();
323*e3723e1fSApple OSS Distributions
324*e3723e1fSApple OSS Distributions for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
325*e3723e1fSApple OSS Distributions asm volatile (
326*e3723e1fSApple OSS Distributions "str za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
327*e3723e1fSApple OSS Distributions "str za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
328*e3723e1fSApple OSS Distributions "str za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
329*e3723e1fSApple OSS Distributions "str za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
330*e3723e1fSApple OSS Distributions "str za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
331*e3723e1fSApple OSS Distributions "str za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
332*e3723e1fSApple OSS Distributions "str za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
333*e3723e1fSApple OSS Distributions "str za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
334*e3723e1fSApple OSS Distributions "str za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
335*e3723e1fSApple OSS Distributions "str za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
336*e3723e1fSApple OSS Distributions "str za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
337*e3723e1fSApple OSS Distributions "str za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
338*e3723e1fSApple OSS Distributions "str za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
339*e3723e1fSApple OSS Distributions "str za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
340*e3723e1fSApple OSS Distributions "str za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
341*e3723e1fSApple OSS Distributions "str za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
342*e3723e1fSApple OSS Distributions :
343*e3723e1fSApple OSS Distributions : [i] "r"(i),
344*e3723e1fSApple OSS Distributions [addr] "r"(za + (i * svl_b))
345*e3723e1fSApple OSS Distributions );
346*e3723e1fSApple OSS Distributions }
347*e3723e1fSApple OSS Distributions
348*e3723e1fSApple OSS Distributions asm volatile (
349*e3723e1fSApple OSS Distributions "str z0, [%[z], #0, mul vl]" "\n"
350*e3723e1fSApple OSS Distributions "str z1, [%[z], #1, mul vl]" "\n"
351*e3723e1fSApple OSS Distributions "str z2, [%[z], #2, mul vl]" "\n"
352*e3723e1fSApple OSS Distributions "str z3, [%[z], #3, mul vl]" "\n"
353*e3723e1fSApple OSS Distributions "str z4, [%[z], #4, mul vl]" "\n"
354*e3723e1fSApple OSS Distributions "str z5, [%[z], #5, mul vl]" "\n"
355*e3723e1fSApple OSS Distributions "str z6, [%[z], #6, mul vl]" "\n"
356*e3723e1fSApple OSS Distributions "str z7, [%[z], #7, mul vl]" "\n"
357*e3723e1fSApple OSS Distributions "str z8, [%[z], #8, mul vl]" "\n"
358*e3723e1fSApple OSS Distributions "str z9, [%[z], #9, mul vl]" "\n"
359*e3723e1fSApple OSS Distributions "str z10, [%[z], #10, mul vl]" "\n"
360*e3723e1fSApple OSS Distributions "str z11, [%[z], #11, mul vl]" "\n"
361*e3723e1fSApple OSS Distributions "str z12, [%[z], #12, mul vl]" "\n"
362*e3723e1fSApple OSS Distributions "str z13, [%[z], #13, mul vl]" "\n"
363*e3723e1fSApple OSS Distributions "str z14, [%[z], #14, mul vl]" "\n"
364*e3723e1fSApple OSS Distributions "str z15, [%[z], #15, mul vl]" "\n"
365*e3723e1fSApple OSS Distributions "str z16, [%[z], #16, mul vl]" "\n"
366*e3723e1fSApple OSS Distributions "str z17, [%[z], #17, mul vl]" "\n"
367*e3723e1fSApple OSS Distributions "str z18, [%[z], #18, mul vl]" "\n"
368*e3723e1fSApple OSS Distributions "str z19, [%[z], #19, mul vl]" "\n"
369*e3723e1fSApple OSS Distributions "str z20, [%[z], #20, mul vl]" "\n"
370*e3723e1fSApple OSS Distributions "str z21, [%[z], #21, mul vl]" "\n"
371*e3723e1fSApple OSS Distributions "str z22, [%[z], #22, mul vl]" "\n"
372*e3723e1fSApple OSS Distributions "str z23, [%[z], #23, mul vl]" "\n"
373*e3723e1fSApple OSS Distributions "str z24, [%[z], #24, mul vl]" "\n"
374*e3723e1fSApple OSS Distributions "str z25, [%[z], #25, mul vl]" "\n"
375*e3723e1fSApple OSS Distributions "str z26, [%[z], #26, mul vl]" "\n"
376*e3723e1fSApple OSS Distributions "str z27, [%[z], #27, mul vl]" "\n"
377*e3723e1fSApple OSS Distributions "str z28, [%[z], #28, mul vl]" "\n"
378*e3723e1fSApple OSS Distributions "str z29, [%[z], #29, mul vl]" "\n"
379*e3723e1fSApple OSS Distributions "str z30, [%[z], #30, mul vl]" "\n"
380*e3723e1fSApple OSS Distributions "str z31, [%[z], #31, mul vl]" "\n"
381*e3723e1fSApple OSS Distributions :
382*e3723e1fSApple OSS Distributions : [z] "r"(z)
383*e3723e1fSApple OSS Distributions );
384*e3723e1fSApple OSS Distributions
385*e3723e1fSApple OSS Distributions asm volatile (
386*e3723e1fSApple OSS Distributions "str p0, [%[p], #0, mul vl]" "\n"
387*e3723e1fSApple OSS Distributions "str p1, [%[p], #1, mul vl]" "\n"
388*e3723e1fSApple OSS Distributions "str p2, [%[p], #2, mul vl]" "\n"
389*e3723e1fSApple OSS Distributions "str p3, [%[p], #3, mul vl]" "\n"
390*e3723e1fSApple OSS Distributions "str p4, [%[p], #4, mul vl]" "\n"
391*e3723e1fSApple OSS Distributions "str p5, [%[p], #5, mul vl]" "\n"
392*e3723e1fSApple OSS Distributions "str p6, [%[p], #6, mul vl]" "\n"
393*e3723e1fSApple OSS Distributions "str p7, [%[p], #7, mul vl]" "\n"
394*e3723e1fSApple OSS Distributions "str p8, [%[p], #8, mul vl]" "\n"
395*e3723e1fSApple OSS Distributions "str p9, [%[p], #9, mul vl]" "\n"
396*e3723e1fSApple OSS Distributions "str p10, [%[p], #10, mul vl]" "\n"
397*e3723e1fSApple OSS Distributions "str p11, [%[p], #11, mul vl]" "\n"
398*e3723e1fSApple OSS Distributions "str p12, [%[p], #12, mul vl]" "\n"
399*e3723e1fSApple OSS Distributions "str p13, [%[p], #13, mul vl]" "\n"
400*e3723e1fSApple OSS Distributions "str p14, [%[p], #14, mul vl]" "\n"
401*e3723e1fSApple OSS Distributions "str p15, [%[p], #15, mul vl]" "\n"
402*e3723e1fSApple OSS Distributions :
403*e3723e1fSApple OSS Distributions : [p] "r"(p)
404*e3723e1fSApple OSS Distributions );
405*e3723e1fSApple OSS Distributions
406*e3723e1fSApple OSS Distributions if (sme_zt0_size()) {
407*e3723e1fSApple OSS Distributions uint8_t *zt0 = sme_zt0(addr);
408*e3723e1fSApple OSS Distributions asm volatile (
409*e3723e1fSApple OSS Distributions "str zt0, [%[zt0]]"
410*e3723e1fSApple OSS Distributions :
411*e3723e1fSApple OSS Distributions : [zt0] "r"(zt0)
412*e3723e1fSApple OSS Distributions );
413*e3723e1fSApple OSS Distributions }
414*e3723e1fSApple OSS Distributions
415*e3723e1fSApple OSS Distributions set_sme_tpidr2_el0(addr, __builtin_arm_rsr64("TPIDR2_EL0"));
416*e3723e1fSApple OSS Distributions }
417*e3723e1fSApple OSS Distributions
418*e3723e1fSApple OSS Distributions static kern_return_t
sme_thread_get_state(thread_act_t thread,void * addr)419*e3723e1fSApple OSS Distributions sme_thread_get_state(thread_act_t thread, void *addr)
420*e3723e1fSApple OSS Distributions {
421*e3723e1fSApple OSS Distributions uint8_t *za = sme_za(addr);
422*e3723e1fSApple OSS Distributions uint8_t *z = sme_z(addr);
423*e3723e1fSApple OSS Distributions uint8_t *p = sme_p(addr);
424*e3723e1fSApple OSS Distributions uint16_t svl_b = arm_sme_svl_b();
425*e3723e1fSApple OSS Distributions
426*e3723e1fSApple OSS Distributions arm_sme_state_t sme_state;
427*e3723e1fSApple OSS Distributions mach_msg_type_number_t sme_count = ARM_SME_STATE_COUNT;
428*e3723e1fSApple OSS Distributions kern_return_t err = thread_get_state(thread, ARM_SME_STATE, (thread_state_t)&sme_state, &sme_count);
429*e3723e1fSApple OSS Distributions if (err) {
430*e3723e1fSApple OSS Distributions return err;
431*e3723e1fSApple OSS Distributions }
432*e3723e1fSApple OSS Distributions set_sme_tpidr2_el0(addr, sme_state.__tpidr2_el0);
433*e3723e1fSApple OSS Distributions
434*e3723e1fSApple OSS Distributions arm_sme_za_state_t za_state;
435*e3723e1fSApple OSS Distributions mach_msg_type_number_t za_count = ARM_SME_ZA_STATE_COUNT;
436*e3723e1fSApple OSS Distributions err = thread_get_state(thread, ARM_SME_ZA_STATE1, (thread_state_t)&za_state, &za_count);
437*e3723e1fSApple OSS Distributions if (err) {
438*e3723e1fSApple OSS Distributions return err;
439*e3723e1fSApple OSS Distributions }
440*e3723e1fSApple OSS Distributions
441*e3723e1fSApple OSS Distributions arm_sve_z_state_t z_state1, z_state2;
442*e3723e1fSApple OSS Distributions mach_msg_type_number_t z_streaming_count = ARM_SVE_Z_STATE_COUNT;
443*e3723e1fSApple OSS Distributions err = thread_get_state(thread, ARM_SVE_Z_STATE1, (thread_state_t)&z_state1, &z_streaming_count);
444*e3723e1fSApple OSS Distributions if (err) {
445*e3723e1fSApple OSS Distributions return err;
446*e3723e1fSApple OSS Distributions }
447*e3723e1fSApple OSS Distributions err = thread_get_state(thread, ARM_SVE_Z_STATE2, (thread_state_t)&z_state2, &z_streaming_count);
448*e3723e1fSApple OSS Distributions if (err) {
449*e3723e1fSApple OSS Distributions return err;
450*e3723e1fSApple OSS Distributions }
451*e3723e1fSApple OSS Distributions
452*e3723e1fSApple OSS Distributions arm_sve_p_state_t p_state;
453*e3723e1fSApple OSS Distributions mach_msg_type_number_t p_streaming_count = ARM_SVE_P_STATE_COUNT;
454*e3723e1fSApple OSS Distributions err = thread_get_state(thread, ARM_SVE_P_STATE, (thread_state_t)&p_state, &p_streaming_count);
455*e3723e1fSApple OSS Distributions if (err) {
456*e3723e1fSApple OSS Distributions return err;
457*e3723e1fSApple OSS Distributions }
458*e3723e1fSApple OSS Distributions
459*e3723e1fSApple OSS Distributions memcpy(za, za_state.__za, svl_b * svl_b);
460*e3723e1fSApple OSS Distributions
461*e3723e1fSApple OSS Distributions size_t z_elem_size = svl_b;
462*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
463*e3723e1fSApple OSS Distributions memcpy(z, z_state1.__z[i], z_elem_size);
464*e3723e1fSApple OSS Distributions z += z_elem_size;
465*e3723e1fSApple OSS Distributions }
466*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
467*e3723e1fSApple OSS Distributions memcpy(z, z_state2.__z[i], z_elem_size);
468*e3723e1fSApple OSS Distributions z += z_elem_size;
469*e3723e1fSApple OSS Distributions }
470*e3723e1fSApple OSS Distributions
471*e3723e1fSApple OSS Distributions size_t p_elem_size = svl_b / 8;
472*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
473*e3723e1fSApple OSS Distributions memcpy(p, p_state.__p[i], p_elem_size);
474*e3723e1fSApple OSS Distributions p += p_elem_size;
475*e3723e1fSApple OSS Distributions }
476*e3723e1fSApple OSS Distributions
477*e3723e1fSApple OSS Distributions if (sme_zt0_size()) {
478*e3723e1fSApple OSS Distributions uint8_t *zt0 = sme_zt0(addr);
479*e3723e1fSApple OSS Distributions
480*e3723e1fSApple OSS Distributions arm_sme2_state_t sme2_state;
481*e3723e1fSApple OSS Distributions mach_msg_type_number_t sme2_count = ARM_SME2_STATE_COUNT;
482*e3723e1fSApple OSS Distributions err = thread_get_state(thread, ARM_SME2_STATE, (thread_state_t)&sme2_state, &sme2_count);
483*e3723e1fSApple OSS Distributions if (err) {
484*e3723e1fSApple OSS Distributions return err;
485*e3723e1fSApple OSS Distributions }
486*e3723e1fSApple OSS Distributions
487*e3723e1fSApple OSS Distributions memcpy(zt0, sme2_state.__zt0, sizeof(sme2_state.__zt0));
488*e3723e1fSApple OSS Distributions }
489*e3723e1fSApple OSS Distributions
490*e3723e1fSApple OSS Distributions return KERN_SUCCESS;
491*e3723e1fSApple OSS Distributions }
492*e3723e1fSApple OSS Distributions
493*e3723e1fSApple OSS Distributions static kern_return_t
sme_thread_set_state(thread_act_t thread,const void * addr)494*e3723e1fSApple OSS Distributions sme_thread_set_state(thread_act_t thread, const void *addr)
495*e3723e1fSApple OSS Distributions {
496*e3723e1fSApple OSS Distributions const uint8_t *za = const_sme_za(addr);
497*e3723e1fSApple OSS Distributions const uint8_t *z = const_sme_z(addr);
498*e3723e1fSApple OSS Distributions const uint8_t *p = const_sme_p(addr);
499*e3723e1fSApple OSS Distributions uint16_t svl_b = arm_sme_svl_b();
500*e3723e1fSApple OSS Distributions
501*e3723e1fSApple OSS Distributions arm_sme_state_t sme_state;
502*e3723e1fSApple OSS Distributions sme_state.__svcr = 0x3;
503*e3723e1fSApple OSS Distributions sme_state.__svl_b = svl_b;
504*e3723e1fSApple OSS Distributions sme_state.__tpidr2_el0 = get_sme_tpidr2_el0(addr);
505*e3723e1fSApple OSS Distributions
506*e3723e1fSApple OSS Distributions arm_sme_za_state_t za_state;
507*e3723e1fSApple OSS Distributions memcpy(za_state.__za, za, svl_b * svl_b);
508*e3723e1fSApple OSS Distributions
509*e3723e1fSApple OSS Distributions arm_sve_z_state_t z_state1, z_state2;
510*e3723e1fSApple OSS Distributions size_t z_elem_size = svl_b;
511*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
512*e3723e1fSApple OSS Distributions memcpy(z_state1.__z[i], z, z_elem_size);
513*e3723e1fSApple OSS Distributions z += z_elem_size;
514*e3723e1fSApple OSS Distributions }
515*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
516*e3723e1fSApple OSS Distributions memcpy(z_state2.__z[i], z, z_elem_size);
517*e3723e1fSApple OSS Distributions z += z_elem_size;
518*e3723e1fSApple OSS Distributions }
519*e3723e1fSApple OSS Distributions
520*e3723e1fSApple OSS Distributions arm_sve_p_state_t p_state;
521*e3723e1fSApple OSS Distributions size_t p_elem_size = svl_b / 8;
522*e3723e1fSApple OSS Distributions for (int i = 0; i < 16; i++) {
523*e3723e1fSApple OSS Distributions memcpy(p_state.__p[i], p, p_elem_size);
524*e3723e1fSApple OSS Distributions p += p_elem_size;
525*e3723e1fSApple OSS Distributions }
526*e3723e1fSApple OSS Distributions
527*e3723e1fSApple OSS Distributions kern_return_t err = thread_set_state(thread, ARM_SME_STATE, (thread_state_t)&sme_state, ARM_SME_STATE_COUNT);
528*e3723e1fSApple OSS Distributions if (err) {
529*e3723e1fSApple OSS Distributions return err;
530*e3723e1fSApple OSS Distributions }
531*e3723e1fSApple OSS Distributions
532*e3723e1fSApple OSS Distributions err = thread_set_state(thread, ARM_SVE_Z_STATE1, (thread_state_t)&z_state1, ARM_SVE_Z_STATE_COUNT);
533*e3723e1fSApple OSS Distributions if (err) {
534*e3723e1fSApple OSS Distributions return err;
535*e3723e1fSApple OSS Distributions }
536*e3723e1fSApple OSS Distributions
537*e3723e1fSApple OSS Distributions err = thread_set_state(thread, ARM_SVE_Z_STATE2, (thread_state_t)&z_state2, ARM_SVE_Z_STATE_COUNT);
538*e3723e1fSApple OSS Distributions if (err) {
539*e3723e1fSApple OSS Distributions return err;
540*e3723e1fSApple OSS Distributions }
541*e3723e1fSApple OSS Distributions
542*e3723e1fSApple OSS Distributions err = thread_set_state(thread, ARM_SVE_P_STATE, (thread_state_t)&p_state, ARM_SVE_P_STATE_COUNT);
543*e3723e1fSApple OSS Distributions if (err) {
544*e3723e1fSApple OSS Distributions return err;
545*e3723e1fSApple OSS Distributions }
546*e3723e1fSApple OSS Distributions
547*e3723e1fSApple OSS Distributions err = thread_set_state(thread, ARM_SME_ZA_STATE1, (thread_state_t)&za_state, ARM_SME_ZA_STATE_COUNT);
548*e3723e1fSApple OSS Distributions if (err) {
549*e3723e1fSApple OSS Distributions return err;
550*e3723e1fSApple OSS Distributions }
551*e3723e1fSApple OSS Distributions
552*e3723e1fSApple OSS Distributions if (sme_zt0_size()) {
553*e3723e1fSApple OSS Distributions const uint8_t *zt0 = const_sme_zt0(addr);
554*e3723e1fSApple OSS Distributions
555*e3723e1fSApple OSS Distributions arm_sme2_state_t sme2_state;
556*e3723e1fSApple OSS Distributions memcpy(sme2_state.__zt0, zt0, sizeof(sme2_state.__zt0));
557*e3723e1fSApple OSS Distributions
558*e3723e1fSApple OSS Distributions err = thread_set_state(thread, ARM_SME2_STATE, (thread_state_t)&sme2_state, ARM_SME2_STATE_COUNT);
559*e3723e1fSApple OSS Distributions if (err) {
560*e3723e1fSApple OSS Distributions return err;
561*e3723e1fSApple OSS Distributions }
562*e3723e1fSApple OSS Distributions }
563*e3723e1fSApple OSS Distributions
564*e3723e1fSApple OSS Distributions return KERN_SUCCESS;
565*e3723e1fSApple OSS Distributions }
566*e3723e1fSApple OSS Distributions
567*e3723e1fSApple OSS Distributions const struct arm_matrix_operations sme_operations = {
568*e3723e1fSApple OSS Distributions .name = "SME",
569*e3723e1fSApple OSS Distributions
570*e3723e1fSApple OSS Distributions .data_size = sme_data_size,
571*e3723e1fSApple OSS Distributions .alloc_data = sme_alloc_data,
572*e3723e1fSApple OSS Distributions
573*e3723e1fSApple OSS Distributions .is_available = sme_is_available,
574*e3723e1fSApple OSS Distributions .start = sme_start,
575*e3723e1fSApple OSS Distributions .stop = sme_stop,
576*e3723e1fSApple OSS Distributions
577*e3723e1fSApple OSS Distributions .load_one_vector = sme_load_one_vector,
578*e3723e1fSApple OSS Distributions .load_data = sme_load_data,
579*e3723e1fSApple OSS Distributions .store_data = sme_store_data,
580*e3723e1fSApple OSS Distributions
581*e3723e1fSApple OSS Distributions .thread_get_state = sme_thread_get_state,
582*e3723e1fSApple OSS Distributions .thread_set_state = sme_thread_set_state,
583*e3723e1fSApple OSS Distributions };
584