1 /*
2 * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/misc_protos.h>
30 #include <arm64/proc_reg.h>
31 #include <libkern/section_keywords.h>
32
33 SECURITY_READ_ONLY_LATE(arm_sme_version_t) sme_version = 0;
34 SECURITY_READ_ONLY_LATE(int) sme_max_svl_b = 0;
35
36 /**
37 * Returns the version of SME supported on this platform.
38 *
39 * In contrast to the compile-time HAS_ARM_FEAT_SME/HAS_ARM_FEAT_SME2 checks
40 * that indicate compiler support, arm_sme_version() is a runtime check that
41 * indicates actual processor support.
42 *
43 * @return the highest SME ISA version supported on this platform
44 * (where ARM_SME_UNSUPPORTED or 0 indicates no SME support)
45 */
46 arm_sme_version_t
arm_sme_version(void)47 arm_sme_version(void)
48 {
49 return sme_version;
50 }
51
52 #if HAS_ARM_FEAT_SME
53
54 #include <kern/cpu_data.h>
55 #include <kern/thread.h>
56
57 static arm_sme_version_t
arm_sme_probe_version(void)58 arm_sme_probe_version(void)
59 {
60 uint64_t aa64pfr1_el1 = __builtin_arm_rsr64("ID_AA64PFR1_EL1");
61 uint64_t aa64pfr1_el1_sme = aa64pfr1_el1 & ID_AA64PFR1_EL1_SME_MASK;
62
63 if (aa64pfr1_el1_sme < ID_AA64PFR1_EL1_SME_EN) {
64 return ARM_SME_UNSUPPORTED;
65 }
66
67 uint64_t aa64smfr0_el1 = __builtin_arm_rsr64("ID_AA64SMFR0_EL1");
68 uint64_t aa64smfr0_el1_smever = aa64smfr0_el1 & ID_AA64SMFR0_EL1_SMEver_MASK;
69
70 switch (aa64smfr0_el1_smever) {
71 case ID_AA64SMFR0_EL1_SMEver_SME:
72 return ARM_FEAT_SME;
73
74 case ID_AA64SMFR0_EL1_SMEver_SME2:
75 return ARM_FEAT_SME2;
76
77 default:
78 return ARM_FEAT_SME2;
79 }
80 }
81
82 #if !APPLEVIRTUALPLATFORM
83 __assert_only
84 #endif
85 static const unsigned int SME_MAX_SVL_B = 64;
86
87 void
arm_sme_init(bool is_boot_cpu)88 arm_sme_init(bool is_boot_cpu)
89 {
90 if (is_boot_cpu) {
91 sme_version = arm_sme_probe_version();
92 }
93
94 if (!sme_version) {
95 return;
96 }
97
98 /* enable SME at EL1 only */
99 uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
100 cpacr_el1 &= ~CPACR_SMEN_MASK;
101 cpacr_el1 |= CPACR_SMEN_EL0_TRAP;
102 __builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
103 __builtin_arm_isb(ISB_SY);
104
105 #if APPLEVIRTUALPLATFORM
106 uint64_t smcr_el1 = SMCR_EL1_LEN((SME_MAX_SVL_B / 16) - 1);
107 #else
108 uint64_t smcr_el1 = SMCR_EL1_LEN(~0);
109 #endif
110 #if HAS_ARM_FEAT_SME2
111 /* enable ZT0 access */
112 smcr_el1 |= SMCR_EL1_EZT0;
113 #endif
114 /*
115 * Request the highest possible SVL and read back the actual SVL.
116 * ARM guarantees these accesses will occur in program order.
117 */
118 __builtin_arm_wsr64("SMCR_EL1", smcr_el1);
119 if (is_boot_cpu) {
120 sme_max_svl_b = arm_sme_svl_b();
121 }
122
123 /* disable SME prioritization */
124 const uint64_t smpri_el1 = SMPRI_EL1_PRIORITY(0);
125 __builtin_arm_wsr64("SMPRI_EL1", smpri_el1);
126
127 __builtin_arm_wsr64("TPIDR2_EL0", 0);
128 }
129
130 /**
131 * Returns the streaming SVE vector length. The total size of the ZA array is
132 * SVL_B x SVL_B bytes.
133 *
134 * @return the number of 8-bit elements in a streaming SVE vector
135 */
136 uint16_t
arm_sme_svl_b(void)137 arm_sme_svl_b(void)
138 {
139 uint64_t ret = 0;
140 asm volatile (
141 "rdsvl %[ret], #1"
142 : [ret] "=r"(ret)
143 );
144
145 assert(__builtin_popcountll(ret) == 1);
146 assert(ret >= 16);
147 assert(ret <= SME_MAX_SVL_B);
148
149 return (uint16_t)ret;
150 }
151
152 /**
153 * Save the current CPU's ZA array to the provided storage space.
154 *
155 * @param sme_ss destination ZA storage
156 * @param svl_b SVL corresponding to sme_ss, in bytes
157 */
158 void
arm_save_sme_za(arm_sme_context_t * sme_ss,uint16_t svl_b)159 arm_save_sme_za(arm_sme_context_t *sme_ss, uint16_t svl_b)
160 {
161 uint8_t *za = arm_sme_za(sme_ss, svl_b);
162 /*
163 * SME adds ldr and str variants convenient for context-switching ZA:
164 *
165 * <ldr|str> za[<Wv>, #<imm>], [<Xn>, #<imm>, mul vl]
166 *
167 * If we view ZA as a 2D array with dimensions SVL_B x SVL_B, then these
168 * instructions copy data between ZA[<Wv> + <imm>][] and an SVL_B-sized
169 * block of memory starting at address <Xn> + <imm> * SVL_B.
170 *
171 * <imm> is between 0-15, so we can perform up to 16 copies before
172 * updating <Wv> and <Xn>. <Wv> also must be one of W12-W15. This is
173 * an unusual restriction for AArch64 that can't be represented with
174 * extended asm register constraints, so we need to manually constrain
175 * this operand with the register keyword.
176 */
177 for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
178 asm volatile (
179 "str za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
180 "str za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
181 "str za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
182 "str za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
183 "str za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
184 "str za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
185 "str za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
186 "str za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
187 "str za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
188 "str za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
189 "str za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
190 "str za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
191 "str za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
192 "str za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
193 "str za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
194 "str za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
195 :
196 : [i] "r"(i),
197 [addr] "r"(za + (i * svl_b))
198 );
199 }
200 }
201
202 /**
203 * Load the current CPU's ZA array from the provided storage space.
204 *
205 * @param sme_ss source ZA storage
206 * @param svl_b SVL corresponding to sme_ss, in bytes
207 */
208 void
arm_load_sme_za(const arm_sme_context_t * sme_ss,uint16_t svl_b)209 arm_load_sme_za(const arm_sme_context_t *sme_ss, uint16_t svl_b)
210 {
211 const uint8_t *za = const_arm_sme_za(sme_ss, svl_b);
212 for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
213 asm volatile (
214 "ldr za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
215 "ldr za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
216 "ldr za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
217 "ldr za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
218 "ldr za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
219 "ldr za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
220 "ldr za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
221 "ldr za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
222 "ldr za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
223 "ldr za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
224 "ldr za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
225 "ldr za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
226 "ldr za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
227 "ldr za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
228 "ldr za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
229 "ldr za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
230 :
231 : [i] "r"(i),
232 [addr] "r"(za + (i * svl_b))
233 );
234 }
235 }
236
237 /**
238 * Configures CPACR_EL1 to trap or enable SME instructions at EL0.
239 *
240 * The caller does not need to issue any instruction barriers;
241 * arm_context_switch_requires_sync() is automatically invoked if needed.
242 *
243 * @param trap_enabled whether to trap SME instructions at EL0
244 */
245 void
arm_sme_trap_at_el0(bool trap_enabled)246 arm_sme_trap_at_el0(bool trap_enabled)
247 {
248 uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
249 unsigned int prev_mode = (unsigned int)(cpacr_el1 & CPACR_SMEN_MASK);
250 unsigned int new_mode = trap_enabled ? CPACR_SMEN_EL0_TRAP : CPACR_SMEN_ENABLE;
251
252 if (prev_mode != new_mode) {
253 cpacr_el1 &= ~CPACR_SMEN_MASK;
254 cpacr_el1 |= new_mode;
255 __builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
256 arm_context_switch_requires_sync();
257 }
258 }
259
260 /**
261 * Returns whether the current thread has an active SME context.
262 */
263 boolean_t
arm_sme_is_active(void)264 arm_sme_is_active(void)
265 {
266 /* Kernel entry clobbers SVCR.SM, so check the saved state instead of live register state */
267 arm_sme_saved_state_t *sme_ss = machine_thread_get_sme_state(current_thread());
268 return sme_ss && (sme_ss->svcr & (SVCR_SM | SVCR_ZA));
269 }
270
271 #if HAS_ARM_FEAT_SME2
272 /**
273 * Save the current CPU's ZT0 array to the provided storage space.
274 *
275 * @param sme_ss destination ZT0 storage
276 */
277 void
arm_save_sme_zt0(arm_sme_context_t * sme_ss)278 arm_save_sme_zt0(arm_sme_context_t *sme_ss)
279 {
280 asm volatile (
281 "str zt0, [%[addr]]"
282 :
283 : [addr] "r"(sme_ss->zt0)
284 );
285 }
286
287 /**
288 * Load the current CPU's ZT0 array from the provided storage space.
289 *
290 * @param sme_ss source ZT0 storage
291 */
292 void
arm_load_sme_zt0(const arm_sme_context_t * sme_ss)293 arm_load_sme_zt0(const arm_sme_context_t *sme_ss)
294 {
295 asm volatile (
296 "ldr zt0, [%[addr]]"
297 :
298 : [addr] "r"(sme_ss->zt0)
299 );
300 }
301 #endif /* HAS_ARM_FEAT_SME2 */
302
303 /**
304 * Save the current CPU's ZA and ZT0 arrays to the provided storage space.
305 *
306 * If this CPU does not support SME2, ZT0 storage is zeroed out instead.
307 *
308 * @param sme_ss destination storage
309 * @param svl_b SVL corresponding to sme_ss, in bytes
310 */
311 void
arm_save_sme_za_zt0(arm_sme_context_t * sme_ss,uint16_t svl_b)312 arm_save_sme_za_zt0(arm_sme_context_t *sme_ss, uint16_t svl_b)
313 {
314 arm_save_sme_za(sme_ss, svl_b);
315 #if HAS_ARM_FEAT_SME2
316 if (arm_sme_version() >= 2) {
317 arm_save_sme_zt0(sme_ss);
318 }
319 #else
320 if (0) {
321 }
322 #endif
323 else {
324 bzero(sme_ss->zt0, sizeof(sme_ss->zt0));
325 }
326 }
327
328 /**
329 * Load the current CPU's ZA and ZT0 arrays from the provided storage space.
330 *
331 * If this CPU does not support SME2, ZT0 storage is ignored.
332 *
333 * @param sme_ss source storage
334 * @param svl_b SVL corresponding to sme_ss, in bytes
335 */
336 void
arm_load_sme_za_zt0(const arm_sme_context_t * sme_ss,uint16_t svl_b)337 arm_load_sme_za_zt0(const arm_sme_context_t *sme_ss, uint16_t svl_b)
338 {
339 arm_load_sme_za(sme_ss, svl_b);
340 #if HAS_ARM_FEAT_SME2
341 if (arm_sme_version() >= 2) {
342 arm_load_sme_zt0(sme_ss);
343 }
344 #endif
345 }
346
347 #endif /* HAS_ARM_FEAT_SME */
348