xref: /xnu-12377.1.9/osfmk/arm64/sme.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2022 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm/misc_protos.h>
30 #include <arm64/proc_reg.h>
31 #include <libkern/section_keywords.h>
32 
33 SECURITY_READ_ONLY_LATE(arm_sme_version_t) sme_version = 0;
34 SECURITY_READ_ONLY_LATE(int) sme_max_svl_b = 0;
35 
36 /**
37  * Returns the version of SME supported on this platform.
38  *
39  * In contrast to the compile-time HAS_ARM_FEAT_SME/HAS_ARM_FEAT_SME2 checks
40  * that indicate compiler support, arm_sme_version() is a runtime check that
41  * indicates actual processor support.
42  *
43  * @return the highest SME ISA version supported on this platform
44  * (where ARM_SME_UNSUPPORTED or 0 indicates no SME support)
45  */
46 arm_sme_version_t
arm_sme_version(void)47 arm_sme_version(void)
48 {
49 	return sme_version;
50 }
51 
52 #if HAS_ARM_FEAT_SME
53 
54 #include <kern/cpu_data.h>
55 #include <kern/thread.h>
56 
57 static arm_sme_version_t
arm_sme_probe_version(void)58 arm_sme_probe_version(void)
59 {
60 	uint64_t aa64pfr1_el1 = __builtin_arm_rsr64("ID_AA64PFR1_EL1");
61 	uint64_t aa64pfr1_el1_sme = aa64pfr1_el1 & ID_AA64PFR1_EL1_SME_MASK;
62 
63 	if (aa64pfr1_el1_sme < ID_AA64PFR1_EL1_SME_EN) {
64 		return ARM_SME_UNSUPPORTED;
65 	}
66 
67 	uint64_t aa64smfr0_el1 = __builtin_arm_rsr64("ID_AA64SMFR0_EL1");
68 	uint64_t aa64smfr0_el1_smever = aa64smfr0_el1 & ID_AA64SMFR0_EL1_SMEver_MASK;
69 
70 	switch (aa64smfr0_el1_smever) {
71 	case ID_AA64SMFR0_EL1_SMEver_SME:
72 		return ARM_FEAT_SME;
73 
74 	case ID_AA64SMFR0_EL1_SMEver_SME2:
75 		return ARM_FEAT_SME2;
76 
77 	default:
78 		return ARM_FEAT_SME2;
79 	}
80 }
81 
82 #if !APPLEVIRTUALPLATFORM
83 __assert_only
84 #endif
85 static const unsigned int SME_MAX_SVL_B = 64;
86 
87 void
arm_sme_init(bool is_boot_cpu)88 arm_sme_init(bool is_boot_cpu)
89 {
90 	if (is_boot_cpu) {
91 		sme_version = arm_sme_probe_version();
92 	}
93 
94 	if (!sme_version) {
95 		return;
96 	}
97 
98 	/* enable SME at EL1 only */
99 	uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
100 	cpacr_el1 &= ~CPACR_SMEN_MASK;
101 	cpacr_el1 |= CPACR_SMEN_EL0_TRAP;
102 	__builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
103 	__builtin_arm_isb(ISB_SY);
104 
105 #if APPLEVIRTUALPLATFORM
106 	uint64_t smcr_el1 = SMCR_EL1_LEN((SME_MAX_SVL_B / 16) - 1);
107 #else
108 	uint64_t smcr_el1 = SMCR_EL1_LEN(~0);
109 #endif
110 #if HAS_ARM_FEAT_SME2
111 	/* enable ZT0 access */
112 	smcr_el1 |= SMCR_EL1_EZT0;
113 #endif
114 	/*
115 	 * Request the highest possible SVL and read back the actual SVL.
116 	 * ARM guarantees these accesses will occur in program order.
117 	 */
118 	__builtin_arm_wsr64("SMCR_EL1", smcr_el1);
119 	if (is_boot_cpu) {
120 		sme_max_svl_b = arm_sme_svl_b();
121 	}
122 
123 	/* disable SME prioritization */
124 	const uint64_t smpri_el1 = SMPRI_EL1_PRIORITY(0);
125 	__builtin_arm_wsr64("SMPRI_EL1", smpri_el1);
126 
127 	__builtin_arm_wsr64("TPIDR2_EL0", 0);
128 }
129 
130 /**
131  * Returns the streaming SVE vector length.  The total size of the ZA array is
132  * SVL_B x SVL_B bytes.
133  *
134  * @return the number of 8-bit elements in a streaming SVE vector
135  */
136 uint16_t
arm_sme_svl_b(void)137 arm_sme_svl_b(void)
138 {
139 	uint64_t ret = 0;
140 	asm volatile (
141                 "rdsvl	%[ret], #1"
142                 : [ret] "=r"(ret)
143         );
144 
145 	assert(__builtin_popcountll(ret) == 1);
146 	assert(ret >= 16);
147 	assert(ret <= SME_MAX_SVL_B);
148 
149 	return (uint16_t)ret;
150 }
151 
152 /**
153  * Save the current CPU's ZA array to the provided storage space.
154  *
155  * @param sme_ss destination ZA storage
156  * @param svl_b SVL corresponding to sme_ss, in bytes
157  */
158 void
arm_save_sme_za(arm_sme_context_t * sme_ss,uint16_t svl_b)159 arm_save_sme_za(arm_sme_context_t *sme_ss, uint16_t svl_b)
160 {
161 	uint8_t *za = arm_sme_za(sme_ss, svl_b);
162 	/*
163 	 * SME adds ldr and str variants convenient for context-switching ZA:
164 	 *
165 	 *   <ldr|str> za[<Wv>, #<imm>], [<Xn>, #<imm>, mul vl]
166 	 *
167 	 * If we view ZA as a 2D array with dimensions SVL_B x SVL_B, then these
168 	 * instructions copy data between ZA[<Wv> + <imm>][] and an SVL_B-sized
169 	 * block of memory starting at address <Xn> + <imm> * SVL_B.
170 	 *
171 	 * <imm> is between 0-15, so we can perform up to 16 copies before
172 	 * updating <Wv> and <Xn>.  <Wv> also must be one of W12-W15.  This is
173 	 * an unusual restriction for AArch64 that can't be represented with
174 	 * extended asm register constraints, so we need to manually constrain
175 	 * this operand with the register keyword.
176 	 */
177 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
178 		asm volatile (
179                         "str    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
180                         "str    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
181                         "str    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
182                         "str    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
183                         "str    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
184                         "str    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
185                         "str    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
186                         "str    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
187                         "str    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
188                         "str    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
189                         "str    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
190                         "str    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
191                         "str    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
192                         "str    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
193                         "str    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
194                         "str    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
195                         :
196                         : [i] "r"(i),
197                           [addr] "r"(za + (i * svl_b))
198                 );
199 	}
200 }
201 
202 /**
203  * Load the current CPU's ZA array from the provided storage space.
204  *
205  * @param sme_ss source ZA storage
206  * @param svl_b SVL corresponding to sme_ss, in bytes
207  */
208 void
arm_load_sme_za(const arm_sme_context_t * sme_ss,uint16_t svl_b)209 arm_load_sme_za(const arm_sme_context_t *sme_ss, uint16_t svl_b)
210 {
211 	const uint8_t *za = const_arm_sme_za(sme_ss, svl_b);
212 	for (register uint16_t i asm("w12") = 0; i < svl_b; i += 16) {
213 		asm volatile (
214                         "ldr    za[%w[i],  #0], [%[addr],  #0, mul vl]"   "\n"
215                         "ldr    za[%w[i],  #1], [%[addr],  #1, mul vl]"   "\n"
216                         "ldr    za[%w[i],  #2], [%[addr],  #2, mul vl]"   "\n"
217                         "ldr    za[%w[i],  #3], [%[addr],  #3, mul vl]"   "\n"
218                         "ldr    za[%w[i],  #4], [%[addr],  #4, mul vl]"   "\n"
219                         "ldr    za[%w[i],  #5], [%[addr],  #5, mul vl]"   "\n"
220                         "ldr    za[%w[i],  #6], [%[addr],  #6, mul vl]"   "\n"
221                         "ldr    za[%w[i],  #7], [%[addr],  #7, mul vl]"   "\n"
222                         "ldr    za[%w[i],  #8], [%[addr],  #8, mul vl]"   "\n"
223                         "ldr    za[%w[i],  #9], [%[addr],  #9, mul vl]"   "\n"
224                         "ldr    za[%w[i], #10], [%[addr], #10, mul vl]"   "\n"
225                         "ldr    za[%w[i], #11], [%[addr], #11, mul vl]"   "\n"
226                         "ldr    za[%w[i], #12], [%[addr], #12, mul vl]"   "\n"
227                         "ldr    za[%w[i], #13], [%[addr], #13, mul vl]"   "\n"
228                         "ldr    za[%w[i], #14], [%[addr], #14, mul vl]"   "\n"
229                         "ldr    za[%w[i], #15], [%[addr], #15, mul vl]"   "\n"
230                         :
231                         : [i] "r"(i),
232                           [addr] "r"(za + (i * svl_b))
233                 );
234 	}
235 }
236 
237 /**
238  * Configures CPACR_EL1 to trap or enable SME instructions at EL0.
239  *
240  * The caller does not need to issue any instruction barriers;
241  * arm_context_switch_requires_sync() is automatically invoked if needed.
242  *
243  * @param trap_enabled whether to trap SME instructions at EL0
244  */
245 void
arm_sme_trap_at_el0(bool trap_enabled)246 arm_sme_trap_at_el0(bool trap_enabled)
247 {
248 	uint64_t cpacr_el1 = __builtin_arm_rsr64("CPACR_EL1");
249 	unsigned int prev_mode = (unsigned int)(cpacr_el1 & CPACR_SMEN_MASK);
250 	unsigned int new_mode = trap_enabled ? CPACR_SMEN_EL0_TRAP : CPACR_SMEN_ENABLE;
251 
252 	if (prev_mode != new_mode) {
253 		cpacr_el1 &= ~CPACR_SMEN_MASK;
254 		cpacr_el1 |= new_mode;
255 		__builtin_arm_wsr64("CPACR_EL1", cpacr_el1);
256 		arm_context_switch_requires_sync();
257 	}
258 }
259 
260 /**
261  * Returns whether the current thread has an active SME context.
262  */
263 boolean_t
arm_sme_is_active(void)264 arm_sme_is_active(void)
265 {
266 	/* Kernel entry clobbers SVCR.SM, so check the saved state instead of live register state */
267 	arm_sme_saved_state_t *sme_ss = machine_thread_get_sme_state(current_thread());
268 	return sme_ss && (sme_ss->svcr & (SVCR_SM | SVCR_ZA));
269 }
270 
271 #if HAS_ARM_FEAT_SME2
272 /**
273  * Save the current CPU's ZT0 array to the provided storage space.
274  *
275  * @param sme_ss destination ZT0 storage
276  */
277 void
arm_save_sme_zt0(arm_sme_context_t * sme_ss)278 arm_save_sme_zt0(arm_sme_context_t *sme_ss)
279 {
280 	asm volatile (
281                 "str zt0, [%[addr]]"
282                 :
283                 : [addr] "r"(sme_ss->zt0)
284         );
285 }
286 
287 /**
288  * Load the current CPU's ZT0 array from the provided storage space.
289  *
290  * @param sme_ss source ZT0 storage
291  */
292 void
arm_load_sme_zt0(const arm_sme_context_t * sme_ss)293 arm_load_sme_zt0(const arm_sme_context_t *sme_ss)
294 {
295 	asm volatile (
296                 "ldr	zt0, [%[addr]]"
297                 :
298                 : [addr] "r"(sme_ss->zt0)
299         );
300 }
301 #endif /* HAS_ARM_FEAT_SME2 */
302 
303 /**
304  * Save the current CPU's ZA and ZT0 arrays to the provided storage space.
305  *
306  * If this CPU does not support SME2, ZT0 storage is zeroed out instead.
307  *
308  * @param sme_ss destination storage
309  * @param svl_b SVL corresponding to sme_ss, in bytes
310  */
311 void
arm_save_sme_za_zt0(arm_sme_context_t * sme_ss,uint16_t svl_b)312 arm_save_sme_za_zt0(arm_sme_context_t *sme_ss, uint16_t svl_b)
313 {
314 	arm_save_sme_za(sme_ss, svl_b);
315 #if HAS_ARM_FEAT_SME2
316 	if (arm_sme_version() >= 2) {
317 		arm_save_sme_zt0(sme_ss);
318 	}
319 #else
320 	if (0) {
321 	}
322 #endif
323 	else {
324 		bzero(sme_ss->zt0, sizeof(sme_ss->zt0));
325 	}
326 }
327 
328 /**
329  * Load the current CPU's ZA and ZT0 arrays from the provided storage space.
330  *
331  * If this CPU does not support SME2, ZT0 storage is ignored.
332  *
333  * @param sme_ss source storage
334  * @param svl_b SVL corresponding to sme_ss, in bytes
335  */
336 void
arm_load_sme_za_zt0(const arm_sme_context_t * sme_ss,uint16_t svl_b)337 arm_load_sme_za_zt0(const arm_sme_context_t *sme_ss, uint16_t svl_b)
338 {
339 	arm_load_sme_za(sme_ss, svl_b);
340 #if HAS_ARM_FEAT_SME2
341 	if (arm_sme_version() >= 2) {
342 		arm_load_sme_zt0(sme_ss);
343 	}
344 #endif
345 }
346 
347 #endif /* HAS_ARM_FEAT_SME */
348