1 /*
2 * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/misc_protos.h>
30 #include <arm64/proc_reg.h>
31 #include <libkern/section_keywords.h>
32
33 SECURITY_READ_ONLY_LATE(unsigned int) sme_version = 0;
34
35 /**
36 * Returns the version of SME supported on this platform.
37 *
38 * In contrast to the compile-time HAS_ARM_FEAT_SME/HAS_ARM_FEAT_SME2 checks
39 * that indicate compiler support, arm_sme_version() is a runtime check that
40 * indicates actual processor support.
41 *
42 * @return the highest SME ISA version supported on this platform
43 * (where 0 indicates no SME support)
44 */
45 unsigned int
arm_sme_version(void)46 arm_sme_version(void)
47 {
48 return sme_version;
49 }
50
51 #if HAS_ARM_FEAT_SME
52
53 #include <kern/cpu_data.h>
54 #include <kern/thread.h>
55
56 void
arm_sme_init(bool is_boot_cpu)57 arm_sme_init(bool is_boot_cpu)
58 {
59 if (is_boot_cpu) {
60 uint64_t aa64pfr1_el1 = __builtin_arm_rsr64("ID_AA64PFR1_EL1");
61 sme_version = (aa64pfr1_el1 & ID_AA64PFR1_EL1_SME_MASK) >> ID_AA64PFR1_EL1_SME_OFFSET;
62 }
63
64 if (!sme_version) {
65 return;
66 }
67
68 /* enable SME at EL1 only */
69 uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
70 cpacr_el1 &= ~CPACR_SMEN_MASK;
71 cpacr_el1 |= CPACR_SMEN_EL0_TRAP;
72 __builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
73 __builtin_arm_isb(ISB_SY);
74
75 /* set vector length to max supported by hardware */
76 uint64_t smcr_el1 = SMCR_EL1_LEN(~0);
77 #ifdef APPLEH16
78 /*
79 * fastsim bug: rdar://96247932 (SME streaming vector length seems to be uncapped)
80 *
81 * SME saved-state with the max-size SVL is too large to use with the
82 * zone allocator. H16G hardware is expected to cap SVL at 64 bytes.
83 */
84 const unsigned int H16_SME_SVL_B = 64;
85 smcr_el1 = SMCR_EL1_LEN((H16_SME_SVL_B / 16) - 1);
86 #endif
87 #if HAS_ARM_FEAT_SME2
88 /* enable ZT0 access */
89 smcr_el1 |= SMCR_EL1_EZT0;
90 #endif
91 __builtin_arm_wsr64("SMCR_EL1", smcr_el1);
92
93 /* disable SME prioritization */
94 const uint64_t smpri_el1 = SMPRI_EL1_PRIORITY(0);
95 __builtin_arm_wsr64("SMPRI_EL1", smpri_el1);
96
97 __builtin_arm_wsr64("TPIDR2_EL0", 0);
98 }
99
100 /**
101 * Returns the streaming SVE vector length. The total size of the ZA array is
102 * SVL_B x SVL_B bytes.
103 *
104 * @return the number of 8-bit elements in a streaming SVE vector
105 */
106 uint16_t
arm_sme_svl_b(void)107 arm_sme_svl_b(void)
108 {
109 uint64_t ret = 0;
110 asm volatile (
111 "rdsvl %[ret], #1"
112 : [ret] "=r"(ret)
113 );
114
115 assert(__builtin_popcountll(ret) == 1);
116 assert(ret >= 16);
117 assert(ret <= 256);
118
119 return (uint16_t)ret;
120 }
121
122 /**
123 * Save the current CPU's ZA array to the provided storage space.
124 *
125 * @param sme_ss destination ZA storage
126 * @param svl_b SVL corresponding to sme_ss, in bytes
127 */
128 void
arm_save_sme_za(arm_sme_context_t * sme_ss,uint16_t svl_b)129 arm_save_sme_za(arm_sme_context_t *sme_ss, uint16_t svl_b)
130 {
131 uint8_t *za = arm_sme_za(sme_ss, svl_b);
132 /*
133 * SME adds ldr and str variants convenient for context-switching ZA:
134 *
135 * <ldr|str> za[<Wv>, #<imm>], [<Xn>, #<imm>, mul vl]
136 *
137 * If we view ZA as a 2D array with dimensions SVL_B x SVL_B, then these
138 * instructions copy data between ZA[<Wv> + <imm>][] and an SVL_B-sized
139 * block of memory starting at address <Xn> + <imm> * SVL_B.
140 *
141 * <imm> is between 0-15, so we can perform up to 16 copies before
142 * updating <Wv> and <Xn>. <Wv> also must be one of W12-W15. This is
143 * an unusual restriction for AArch64 that can't be represented with
144 * extended asm register constraints, so we need to manually constrain
145 * this operand with the register keyword.
146 */
147 for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
148 asm volatile (
149 "str za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
150 "str za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
151 "str za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
152 "str za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
153 "str za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
154 "str za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
155 "str za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
156 "str za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
157 "str za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
158 "str za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
159 "str za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
160 "str za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
161 "str za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
162 "str za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
163 "str za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
164 "str za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
165 :
166 : [i] "r"(i),
167 [addr] "r"(za + (i * svl_b))
168 );
169 }
170 }
171
172 /**
173 * Load the current CPU's ZA array from the provided storage space.
174 *
175 * @param sme_ss source ZA storage
176 * @param svl_b SVL corresponding to sme_ss, in bytes
177 */
178 void
arm_load_sme_za(const arm_sme_context_t * sme_ss,uint16_t svl_b)179 arm_load_sme_za(const arm_sme_context_t *sme_ss, uint16_t svl_b)
180 {
181 const uint8_t *za = const_arm_sme_za(sme_ss, svl_b);
182 for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
183 asm volatile (
184 "ldr za[%w[i], #0], [%[addr], #0, mul vl]" "\n"
185 "ldr za[%w[i], #1], [%[addr], #1, mul vl]" "\n"
186 "ldr za[%w[i], #2], [%[addr], #2, mul vl]" "\n"
187 "ldr za[%w[i], #3], [%[addr], #3, mul vl]" "\n"
188 "ldr za[%w[i], #4], [%[addr], #4, mul vl]" "\n"
189 "ldr za[%w[i], #5], [%[addr], #5, mul vl]" "\n"
190 "ldr za[%w[i], #6], [%[addr], #6, mul vl]" "\n"
191 "ldr za[%w[i], #7], [%[addr], #7, mul vl]" "\n"
192 "ldr za[%w[i], #8], [%[addr], #8, mul vl]" "\n"
193 "ldr za[%w[i], #9], [%[addr], #9, mul vl]" "\n"
194 "ldr za[%w[i], #10], [%[addr], #10, mul vl]" "\n"
195 "ldr za[%w[i], #11], [%[addr], #11, mul vl]" "\n"
196 "ldr za[%w[i], #12], [%[addr], #12, mul vl]" "\n"
197 "ldr za[%w[i], #13], [%[addr], #13, mul vl]" "\n"
198 "ldr za[%w[i], #14], [%[addr], #14, mul vl]" "\n"
199 "ldr za[%w[i], #15], [%[addr], #15, mul vl]" "\n"
200 :
201 : [i] "r"(i),
202 [addr] "r"(za + (i * svl_b))
203 );
204 }
205 }
206
207 /**
208 * Configures CPACR_EL1 to trap or enable SME instructions at EL0.
209 *
210 * The caller does not need to issue any instruction barriers;
211 * arm_context_switch_requires_sync() is automatically invoked if needed.
212 *
213 * @param trap_enabled whether to trap SME instructions at EL0
214 */
215 void
arm_sme_trap_at_el0(bool trap_enabled)216 arm_sme_trap_at_el0(bool trap_enabled)
217 {
218 uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
219 unsigned int prev_mode = (unsigned int)(cpacr_el1 & CPACR_SMEN_MASK);
220 unsigned int new_mode = trap_enabled ? CPACR_SMEN_EL0_TRAP : CPACR_SMEN_ENABLE;
221
222 if (prev_mode != new_mode) {
223 cpacr_el1 &= ~CPACR_SMEN_MASK;
224 cpacr_el1 |= new_mode;
225 __builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
226 arm_context_switch_requires_sync();
227 }
228 }
229
230 /**
231 * Returns whether the current thread has an active SME context.
232 */
233 boolean_t
arm_sme_is_active(void)234 arm_sme_is_active(void)
235 {
236 /* Kernel entry clobbers SVCR.SM, so check the saved state instead of live register state */
237 arm_sme_saved_state_t *sme_ss = machine_thread_get_sme_state(current_thread());
238 return sme_ss && (sme_ss->svcr & (SVCR_SM | SVCR_ZA));
239 }
240
241 #if HAS_ARM_FEAT_SME2
242 /**
243 * Save the current CPU's ZT0 array to the provided storage space.
244 *
245 * @param sme_ss destination ZT0 storage
246 */
247 void
arm_save_sme_zt0(arm_sme_context_t * sme_ss)248 arm_save_sme_zt0(arm_sme_context_t *sme_ss)
249 {
250 asm volatile (
251 "str zt0, [%[addr]]"
252 :
253 : [addr] "r"(sme_ss->zt0)
254 );
255 }
256
257 /**
258 * Load the current CPU's ZT0 array from the provided storage space.
259 *
260 * @param sme_ss source ZT0 storage
261 */
262 void
arm_load_sme_zt0(const arm_sme_context_t * sme_ss)263 arm_load_sme_zt0(const arm_sme_context_t *sme_ss)
264 {
265 asm volatile (
266 "ldr zt0, [%[addr]]"
267 :
268 : [addr] "r"(sme_ss->zt0)
269 );
270 }
271 #endif /* HAS_ARM_FEAT_SME2 */
272
273 /**
274 * Save the current CPU's ZA and ZT0 arrays to the provided storage space.
275 *
276 * If this CPU does not support SME2, ZT0 storage is zeroed out instead.
277 *
278 * @param sme_ss destination storage
279 * @param svl_b SVL corresponding to sme_ss, in bytes
280 */
281 void
arm_save_sme_za_zt0(arm_sme_context_t * sme_ss,uint16_t svl_b)282 arm_save_sme_za_zt0(arm_sme_context_t *sme_ss, uint16_t svl_b)
283 {
284 arm_save_sme_za(sme_ss, svl_b);
285 #if HAS_ARM_FEAT_SME2
286 if (arm_sme_version() >= 2) {
287 arm_save_sme_zt0(sme_ss);
288 }
289 #else
290 if (0) {
291 }
292 #endif
293 else {
294 bzero(sme_ss->zt0, sizeof(sme_ss->zt0));
295 }
296 }
297
298 /**
299 * Load the current CPU's ZA and ZT0 arrays from the provided storage space.
300 *
301 * If this CPU does not support SME2, ZT0 storage is ignored.
302 *
303 * @param sme_ss source storage
304 * @param svl_b SVL corresponding to sme_ss, in bytes
305 */
306 void
arm_load_sme_za_zt0(const arm_sme_context_t * sme_ss,uint16_t svl_b)307 arm_load_sme_za_zt0(const arm_sme_context_t *sme_ss, uint16_t svl_b)
308 {
309 arm_load_sme_za(sme_ss, svl_b);
310 #if HAS_ARM_FEAT_SME2
311 if (arm_sme_version() >= 2) {
312 arm_load_sme_zt0(sme_ss);
313 }
314 #endif
315 }
316
317 #endif /* HAS_ARM_FEAT_SME */
318