xref: /xnu-11215.81.4/osfmk/arm64/sme.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm/misc_protos.h>
30 #include <arm64/proc_reg.h>
31 #include <libkern/section_keywords.h>
32 
33 SECURITY_READ_ONLY_LATE(unsigned int) sme_version = 0;
34 
35 /**
36  * Returns the version of SME supported on this platform.
37  *
38  * In contrast to the compile-time HAS_ARM_FEAT_SME/HAS_ARM_FEAT_SME2 checks
39  * that indicate compiler support, arm_sme_version() is a runtime check that
40  * indicates actual processor support.
41  *
42  * @return the highest SME ISA version supported on this platform
43  * (where 0 indicates no SME support)
44  */
45 unsigned int
arm_sme_version(void)46 arm_sme_version(void)
47 {
48 	return sme_version;
49 }
50 
51 #if HAS_ARM_FEAT_SME
52 
53 #include <kern/cpu_data.h>
54 #include <kern/thread.h>
55 
56 void
arm_sme_init(bool is_boot_cpu)57 arm_sme_init(bool is_boot_cpu)
58 {
59 	if (is_boot_cpu) {
60 		uint64_t aa64pfr1_el1 = __builtin_arm_rsr64("ID_AA64PFR1_EL1");
61 		sme_version = (aa64pfr1_el1 & ID_AA64PFR1_EL1_SME_MASK) >> ID_AA64PFR1_EL1_SME_OFFSET;
62 	}
63 
64 	if (!sme_version) {
65 		return;
66 	}
67 
68 	/* enable SME at EL1 only */
69 	uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
70 	cpacr_el1 &= ~CPACR_SMEN_MASK;
71 	cpacr_el1 |= CPACR_SMEN_EL0_TRAP;
72 	__builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
73 	__builtin_arm_isb(ISB_SY);
74 
75 	/* set vector length to max supported by hardware */
76 	uint64_t smcr_el1 = SMCR_EL1_LEN(~0);
77 #ifdef APPLEH16
78 	/*
79 	 * fastsim bug: rdar://96247932 (SME streaming vector length seems to be uncapped)
80 	 *
81 	 * SME saved-state with the max-size SVL is too large to use with the
82 	 * zone allocator.  H16G hardware is expected to cap SVL at 64 bytes.
83 	 */
84 	const unsigned int H16_SME_SVL_B = 64;
85 	smcr_el1 = SMCR_EL1_LEN((H16_SME_SVL_B / 16) - 1);
86 #endif
87 #if HAS_ARM_FEAT_SME2
88 	/* enable ZT0 access */
89 	smcr_el1 |= SMCR_EL1_EZT0;
90 #endif
91 	__builtin_arm_wsr64("SMCR_EL1", smcr_el1);
92 
93 	/* disable SME prioritization */
94 	const uint64_t smpri_el1 = SMPRI_EL1_PRIORITY(0);
95 	__builtin_arm_wsr64("SMPRI_EL1", smpri_el1);
96 
97 	__builtin_arm_wsr64("TPIDR2_EL0", 0);
98 }
99 
100 /**
101  * Returns the streaming SVE vector length.  The total size of the ZA array is
102  * SVL_B x SVL_B bytes.
103  *
104  * @return the number of 8-bit elements in a streaming SVE vector
105  */
106 uint16_t
arm_sme_svl_b(void)107 arm_sme_svl_b(void)
108 {
109 	uint64_t ret = 0;
110 	asm volatile (
111                 "rdsvl	%[ret], #1"
112                 : [ret] "=r"(ret)
113         );
114 
115 	assert(__builtin_popcountll(ret) == 1);
116 	assert(ret >= 16);
117 	assert(ret <= 256);
118 
119 	return (uint16_t)ret;
120 }
121 
122 /**
123  * Save the current CPU's ZA array to the provided storage space.
124  *
125  * @param sme_ss destination ZA storage
126  * @param svl_b SVL corresponding to sme_ss, in bytes
127  */
128 void
arm_save_sme_za(arm_sme_context_t * sme_ss,uint16_t svl_b)129 arm_save_sme_za(arm_sme_context_t *sme_ss, uint16_t svl_b)
130 {
131 	uint8_t *za = arm_sme_za(sme_ss, svl_b);
132 	/*
133 	 * SME adds ldr and str variants convenient for context-switching ZA:
134 	 *
135 	 *   <ldr|str> za[<Wv>, #<imm>], [<Xn>, #<imm>, mul vl]
136 	 *
137 	 * If we view ZA as a 2D array with dimensions SVL_B x SVL_B, then these
138 	 * instructions copy data between ZA[<Wv> + <imm>][] and an SVL_B-sized
139 	 * block of memory starting at address <Xn> + <imm> * SVL_B.
140 	 *
141 	 * <imm> is between 0-15, so we can perform up to 16 copies before
142 	 * updating <Wv> and <Xn>.  <Wv> also must be one of W12-W15.  This is
143 	 * an unusual restriction for AArch64 that can't be represented with
144 	 * extended asm register constraints, so we need to manually constrain
145 	 * this operand with the register keyword.
146 	 */
147 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
148 		asm volatile (
149                         "str    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
150                         "str    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
151                         "str    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
152                         "str    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
153                         "str    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
154                         "str    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
155                         "str    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
156                         "str    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
157                         "str    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
158                         "str    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
159                         "str    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
160                         "str    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
161                         "str    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
162                         "str    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
163                         "str    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
164                         "str    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
165                         :
166                         : [i] "r"(i),
167                           [addr] "r"(za + (i * svl_b))
168                 );
169 	}
170 }
171 
172 /**
173  * Load the current CPU's ZA array from the provided storage space.
174  *
175  * @param sme_ss source ZA storage
176  * @param svl_b SVL corresponding to sme_ss, in bytes
177  */
178 void
arm_load_sme_za(const arm_sme_context_t * sme_ss,uint16_t svl_b)179 arm_load_sme_za(const arm_sme_context_t *sme_ss, uint16_t svl_b)
180 {
181 	const uint8_t *za = const_arm_sme_za(sme_ss, svl_b);
182 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
183 		asm volatile (
184                         "ldr    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
185                         "ldr    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
186                         "ldr    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
187                         "ldr    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
188                         "ldr    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
189                         "ldr    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
190                         "ldr    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
191                         "ldr    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
192                         "ldr    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
193                         "ldr    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
194                         "ldr    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
195                         "ldr    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
196                         "ldr    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
197                         "ldr    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
198                         "ldr    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
199                         "ldr    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
200                         :
201                         : [i] "r"(i),
202                           [addr] "r"(za + (i * svl_b))
203                 );
204 	}
205 }
206 
207 /**
208  * Configures CPACR_EL1 to trap or enable SME instructions at EL0.
209  *
210  * The caller does not need to issue any instruction barriers;
211  * arm_context_switch_requires_sync() is automatically invoked if needed.
212  *
213  * @param trap_enabled whether to trap SME instructions at EL0
214  */
215 void
arm_sme_trap_at_el0(bool trap_enabled)216 arm_sme_trap_at_el0(bool trap_enabled)
217 {
218 	uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
219 	unsigned int prev_mode = (unsigned int)(cpacr_el1 & CPACR_SMEN_MASK);
220 	unsigned int new_mode = trap_enabled ? CPACR_SMEN_EL0_TRAP : CPACR_SMEN_ENABLE;
221 
222 	if (prev_mode != new_mode) {
223 		cpacr_el1 &= ~CPACR_SMEN_MASK;
224 		cpacr_el1 |= new_mode;
225 		__builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
226 		arm_context_switch_requires_sync();
227 	}
228 }
229 
230 /**
231  * Returns whether the current thread has an active SME context.
232  */
233 boolean_t
arm_sme_is_active(void)234 arm_sme_is_active(void)
235 {
236 	/* Kernel entry clobbers SVCR.SM, so check the saved state instead of live register state */
237 	arm_sme_saved_state_t *sme_ss = machine_thread_get_sme_state(current_thread());
238 	return sme_ss && (sme_ss->svcr & (SVCR_SM | SVCR_ZA));
239 }
240 
241 #if HAS_ARM_FEAT_SME2
242 /**
243  * Save the current CPU's ZT0 array to the provided storage space.
244  *
245  * @param sme_ss destination ZT0 storage
246  */
247 void
arm_save_sme_zt0(arm_sme_context_t * sme_ss)248 arm_save_sme_zt0(arm_sme_context_t *sme_ss)
249 {
250 	asm volatile (
251                 "str zt0, [%[addr]]"
252                 :
253                 : [addr] "r"(sme_ss->zt0)
254         );
255 }
256 
257 /**
258  * Load the current CPU's ZT0 array from the provided storage space.
259  *
260  * @param sme_ss source ZT0 storage
261  */
262 void
arm_load_sme_zt0(const arm_sme_context_t * sme_ss)263 arm_load_sme_zt0(const arm_sme_context_t *sme_ss)
264 {
265 	asm volatile (
266                 "ldr	zt0, [%[addr]]"
267                 :
268                 : [addr] "r"(sme_ss->zt0)
269         );
270 }
271 #endif /* HAS_ARM_FEAT_SME2 */
272 
273 /**
274  * Save the current CPU's ZA and ZT0 arrays to the provided storage space.
275  *
276  * If this CPU does not support SME2, ZT0 storage is zeroed out instead.
277  *
278  * @param sme_ss destination storage
279  * @param svl_b SVL corresponding to sme_ss, in bytes
280  */
281 void
arm_save_sme_za_zt0(arm_sme_context_t * sme_ss,uint16_t svl_b)282 arm_save_sme_za_zt0(arm_sme_context_t *sme_ss, uint16_t svl_b)
283 {
284 	arm_save_sme_za(sme_ss, svl_b);
285 #if HAS_ARM_FEAT_SME2
286 	if (arm_sme_version() >= 2) {
287 		arm_save_sme_zt0(sme_ss);
288 	}
289 #else
290 	if (0) {
291 	}
292 #endif
293 	else {
294 		bzero(sme_ss->zt0, sizeof(sme_ss->zt0));
295 	}
296 }
297 
298 /**
299  * Load the current CPU's ZA and ZT0 arrays from the provided storage space.
300  *
301  * If this CPU does not support SME2, ZT0 storage is ignored.
302  *
303  * @param sme_ss source storage
304  * @param svl_b SVL corresponding to sme_ss, in bytes
305  */
306 void
arm_load_sme_za_zt0(const arm_sme_context_t * sme_ss,uint16_t svl_b)307 arm_load_sme_za_zt0(const arm_sme_context_t *sme_ss, uint16_t svl_b)
308 {
309 	arm_load_sme_za(sme_ss, svl_b);
310 #if HAS_ARM_FEAT_SME2
311 	if (arm_sme_version() >= 2) {
312 		arm_load_sme_zt0(sme_ss);
313 	}
314 #endif
315 }
316 
317 #endif /* HAS_ARM_FEAT_SME */
318