xref: /xnu-10063.121.3/tests/arm_cpu_capabilities.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2020 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <darwintest.h>
30 #include <machine/cpu_capabilities.h>
31 #include <sys/sysctl.h>
32 
33 #include "exc_helpers.h"
34 
35 T_GLOBAL_META(
36 	T_META_NAMESPACE("xnu.arm"),
37 	T_META_RADAR_COMPONENT_NAME("xnu"),
38 	T_META_RADAR_COMPONENT_VERSION("arm"),
39 	T_META_OWNER("sdooher"),
40 	T_META_RUN_CONCURRENTLY(true),
41 	T_META_TAG("SoCSpecific")
42 	);
43 
44 static volatile bool cap_usable;
45 
46 static size_t
bad_instruction_handler(mach_port_t task __unused,mach_port_t thread __unused,exception_type_t type __unused,mach_exception_data_t codes __unused)47 bad_instruction_handler(mach_port_t task __unused, mach_port_t thread __unused,
48     exception_type_t type __unused, mach_exception_data_t codes __unused)
49 {
50 	cap_usable = false;
51 	return 4;
52 }
53 
54 static void
try_fp16(void)55 try_fp16(void)
56 {
57 	asm volatile (
58                 "fmov	h0, #0" "\n"
59                 :
60                 :
61                 : "v0"
62         );
63 }
64 
65 static void
try_atomics(void)66 try_atomics(void)
67 {
68 	uint64_t dword;
69 	asm volatile (
70                 "swp	xzr, xzr, [%[dword]]"
71                 :
72                 : [dword]"r"(&dword)
73         );
74 }
75 
76 static void
try_crc32(void)77 try_crc32(void)
78 {
79 	asm volatile ( "crc32b	wzr, wzr, wzr");
80 }
81 
82 static void
try_fhm(void)83 try_fhm(void)
84 {
85 	asm volatile (
86                 "fmov	d0, #0"                 "\n"
87                 "fmlal	v0.2s, v0.2h, v0.2h"    "\n"
88                 :
89                 :
90                 : "v0"
91         );
92 }
93 
94 static void
try_sha512(void)95 try_sha512(void)
96 {
97 	asm volatile (
98                 "fmov		d0, #0"                 "\n"
99                 "fmov		d1, #0"                 "\n"
100                 "sha512h	q0, q0, v0.2d"          "\n"
101                 :
102                 :
103                 : "v0"
104         );
105 }
106 
107 static void
try_sha3(void)108 try_sha3(void)
109 {
110 	asm volatile (
111                 "fmov	d0, #0"                         "\n"
112                 "fmov	d1, #0"                         "\n"
113                 "eor3	v0.16b, v0.16b, v0.16b, v0.16b" "\n"
114                 :
115                 :
116                 : "v0"
117         );
118 }
119 
120 static void
try_sha1(void)121 try_sha1(void)
122 {
123 	asm volatile (
124                 "fmov		s0, #0"         "\n"
125                 "sha1h		s0, s0"         "\n"
126                 :
127                 :
128                 : "v0"
129         );
130 }
131 
132 static void
try_pmull(void)133 try_pmull(void)
134 {
135 	asm volatile (
136                 "fmov	d0, #0"                 "\n"
137                 "pmull	v0.1q, v0.1d, v0.1d"    "\n"
138                 :
139                 :
140                 : "v0"
141         );
142 }
143 
144 static void
try_aes(void)145 try_aes(void)
146 {
147 	asm volatile (
148                 "fmov		d0, #0"                 "\n"
149                 "fmov		d1, #0"                 "\n"
150                 "aesd		v0.16B, v0.16B"         "\n"
151                 :
152                 :
153                 : "v0"
154         );
155 }
156 
157 
158 static void
try_sha256(void)159 try_sha256(void)
160 {
161 	asm volatile (
162                 "fmov           d0, #0"                 "\n"
163                 "fmov           d1, #0"                 "\n"
164                 "sha256h        q0, q0, v0.4s"          "\n"
165                 :
166                 :
167                 : "v0"
168         );
169 }
170 
171 
172 static void
try_compnum(void)173 try_compnum(void)
174 {
175 	asm volatile (
176                 "fmov	d0, #0"                         "\n"
177                 "fcadd	v0.2s, v0.2s, v0.2s, #90"       "\n"
178                 :
179                 :
180                 : "v0"
181         );
182 }
183 
184 
185 static void
try_flagm(void)186 try_flagm(void)
187 {
188 	asm volatile (
189                 "cfinv"        "\n"
190                 "cfinv"        "\n"
191         );
192 }
193 
194 static void
try_flagm2(void)195 try_flagm2(void)
196 {
197 	asm volatile (
198                 "axflag"        "\n"
199                 "xaflag"        "\n"
200         );
201 }
202 
203 static void
try_dotprod(void)204 try_dotprod(void)
205 {
206 	asm volatile (
207                 "udot v0.4S,v1.16B,v2.16B"
208                 :
209                 :
210                 : "v0"
211         );
212 }
213 
214 static void
try_rdm(void)215 try_rdm(void)
216 {
217 	asm volatile (
218                 "sqrdmlah s0, s1, s2"
219                 :
220                 :
221                 : "s0"
222         );
223 }
224 
225 static void
try_sb(void)226 try_sb(void)
227 {
228 	asm volatile (
229                 "sb"
230         );
231 }
232 
233 static void
try_frintts(void)234 try_frintts(void)
235 {
236 	asm volatile (
237                 "frint32x s0, s0"
238                 :
239                 :
240                 : "s0"
241         );
242 }
243 
244 static void
try_jscvt(void)245 try_jscvt(void)
246 {
247 	asm volatile (
248                 "fmov	d0, #0"      "\n"
249                 "fjcvtzs w1, d0"     "\n"
250                 :
251                 :
252                 : "w1", "d0"
253         );
254 }
255 
256 static void
try_pauth(void)257 try_pauth(void)
258 {
259 	asm volatile (
260                 "pacga x0, x0, x0"
261                 :
262                 :
263                 : "x0"
264         );
265 }
266 
267 static void
try_dpb(void)268 try_dpb(void)
269 {
270 	int x;
271 	asm volatile (
272                 "dc cvap, %0"
273                 :
274                 : "r" (&x)
275         );
276 }
277 
278 static void
try_dpb2(void)279 try_dpb2(void)
280 {
281 	int x;
282 	asm volatile (
283                 "dc cvadp, %0"
284                 :
285                 : "r" (&x)
286         );
287 }
288 
289 static void
try_lrcpc(void)290 try_lrcpc(void)
291 {
292 	int x;
293 	asm volatile (
294                 "ldaprb w0, [%0]"
295                 :
296                 : "r" (&x)
297                 : "w0"
298         );
299 }
300 
301 static void
try_lrcpc2(void)302 try_lrcpc2(void)
303 {
304 	int x;
305 	asm volatile (
306                 "ldapurb w0, [%0]"
307                 :
308                 : "r" (&x)
309                 : "w0"
310         );
311 }
312 
313 
314 static void
try_specres(void)315 try_specres(void)
316 {
317 	int x;
318 	asm volatile (
319                 "cfp rctx, %0"
320                 :
321                 : "r" (&x)
322         );
323 }
324 
325 static void
try_bf16(void)326 try_bf16(void)
327 {
328 	asm volatile (
329                 "bfdot v0.4S,v1.8H,v2.8H"
330                 :
331                 :
332                 : "v0"
333         );
334 }
335 
336 static void
try_i8mm(void)337 try_i8mm(void)
338 {
339 	asm volatile (
340                 "sudot v0.4S,v1.16B,v2.4B[0]"
341                 :
342                 :
343                 : "v0"
344         );
345 }
346 
347 static void
try_ecv(void)348 try_ecv(void)
349 {
350 	/*
351 	 * These registers are present only when FEAT_ECV is implemented.
352 	 * Otherwise, direct accesses to CNTPCTSS_EL0 or CNTVCTSS_EL0 are UNDEFINED.
353 	 */
354 	(void)__builtin_arm_rsr64("CNTPCTSS_EL0");
355 	(void)__builtin_arm_rsr64("CNTVCTSS_EL0");
356 }
357 
358 static void
try_afp(void)359 try_afp(void)
360 {
361 	/*
362 	 * FEAT_AFP can be detected via three new FPCR bits which were
363 	 * previously marked read-as-zero.
364 	 */
365 	const uint64_t FPCR_AFP_FLAGS = (1 << 0) | (1 << 1) | (1 << 2);
366 
367 	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
368 	__builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AFP_FLAGS);
369 	uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
370 	__builtin_arm_wsr64("FPCR", old_fpcr);
371 
372 	if ((new_fpcr & FPCR_AFP_FLAGS) != FPCR_AFP_FLAGS) {
373 		cap_usable = false;
374 	}
375 }
376 
377 static void
try_rpres(void)378 try_rpres(void)
379 {
380 	/*
381 	 * When FEAT_RPRES is enabled via FPCR.AH, floating-point reciprocal
382 	 * estimate instructions increase precision from 8 mantissa bits to 12
383 	 * mantissa bits.  This can be detected by estimating 1/10.0 (which has
384 	 * no exact floating-point representation) and checking bits 11-14.
385 	 */
386 	const uint64_t FPCR_AH = (1 << 1);
387 	const uint32_t EXTRA_MANTISSA_BITS = (0xf << 11);
388 
389 	uint32_t recip;
390 	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
391 	__builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AH);
392 	asm volatile (
393                 "fmov	s0, #10.0"      "\n"
394                 "frecpe s0, s0"         "\n"
395                 "fmov   %w0, s0"        "\n"
396                 : "=r"(recip)
397                 :
398                 : "s0"
399         );
400 	__builtin_arm_wsr64("FPCR", old_fpcr);
401 
402 	if ((recip & EXTRA_MANTISSA_BITS) == 0) {
403 		cap_usable = false;
404 	}
405 }
406 
407 
408 static void
try_fpexcp(void)409 try_fpexcp(void)
410 {
411 	/* FP Exceptions are supported if all exceptions bit can be set. */
412 	const uint64_t flags = (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 15);
413 
414 	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
415 	__builtin_arm_wsr64("FPCR", old_fpcr | flags);
416 	uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
417 	__builtin_arm_wsr64("FPCR", old_fpcr);
418 
419 	if ((new_fpcr & flags) != flags) {
420 		cap_usable = false;
421 	}
422 }
423 
424 static void
try_dit(void)425 try_dit(void)
426 {
427 	asm volatile (
428                 "msr DIT, x0"
429                 :
430                 :
431                 : "x0"
432         );
433 }
434 
435 static mach_port_t exc_port;
436 
437 static void
test_cpu_capability(const char * cap_name,uint64_t cap_flag,bool has_commpage_entry,const char * cap_sysctl,void (* try_cpu_capability)(void))438 test_cpu_capability(const char *cap_name, uint64_t cap_flag, bool has_commpage_entry, const char *cap_sysctl, void (*try_cpu_capability)(void))
439 {
440 	uint64_t caps = _get_cpu_capabilities();
441 	bool has_cap_flag = (caps & cap_flag);
442 
443 	int sysctl_val;
444 	bool has_sysctl_flag = 0;
445 	if (cap_sysctl != NULL) {
446 		size_t sysctl_size = sizeof(sysctl_val);
447 		int err = sysctlbyname(cap_sysctl, &sysctl_val, &sysctl_size, NULL, 0);
448 		has_sysctl_flag = (err == 0 && sysctl_val > 0);
449 	}
450 
451 	bool has_capability = has_commpage_entry ? has_cap_flag : has_sysctl_flag;
452 
453 	if (!has_commpage_entry && cap_sysctl == NULL) {
454 		T_FAIL("Tested capability must have either sysctl or commpage flag");
455 		return;
456 	}
457 
458 	if (has_commpage_entry && cap_sysctl != NULL) {
459 		T_EXPECT_EQ(has_cap_flag, has_sysctl_flag, "%s commpage flag matches sysctl flag", cap_name);
460 	}
461 
462 	if (try_cpu_capability != NULL) {
463 		cap_usable = true;
464 		try_cpu_capability();
465 		T_EXPECT_EQ(has_capability, cap_usable, "%s capability matches actual usability", cap_name);
466 	}
467 }
468 
469 T_DECL(cpu_capabilities, "Verify ARM CPU capabilities") {
470 	exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
471 	repeat_exception_handler(exc_port, bad_instruction_handler);
472 
473 	test_cpu_capability("FP16 (deprecated sysctl)", kHasFeatFP16, true, "hw.optional.neon_fp16", NULL);
474 	test_cpu_capability("FP16", kHasFeatFP16, true, "hw.optional.arm.FEAT_FP16", try_fp16);
475 	test_cpu_capability("LSE (deprecated sysctl)", kHasFeatLSE, true, "hw.optional.armv8_1_atomics", NULL);
476 	test_cpu_capability("LSE", kHasFeatLSE, true, "hw.optional.arm.FEAT_LSE", try_atomics);
477 	test_cpu_capability("CRC32", kHasARMv8Crc32, true, "hw.optional.armv8_crc32", try_crc32);
478 	test_cpu_capability("FHM (deprecated sysctl)", kHasFeatFHM, true, "hw.optional.armv8_2_fhm", NULL);
479 	test_cpu_capability("FHM", kHasFeatFHM, true, "hw.optional.arm.FEAT_FHM", try_fhm);
480 	test_cpu_capability("SHA512", kHasFeatSHA512, true, "hw.optional.armv8_2_sha512", try_sha512);
481 	test_cpu_capability("SHA3", kHasFeatSHA3, true, "hw.optional.armv8_2_sha3", try_sha3);
482 	test_cpu_capability("AES", kHasFeatAES, true, "hw.optional.arm.FEAT_AES", try_aes);
483 	test_cpu_capability("SHA1", kHasFeatSHA1, true, "hw.optional.arm.FEAT_SHA1", try_sha1);
484 	test_cpu_capability("SHA256", kHasFeatSHA256, true, "hw.optional.arm.FEAT_SHA256", try_sha256);
485 	test_cpu_capability("PMULL", kHasFeatPMULL, true, "hw.optional.arm.FEAT_PMULL", try_pmull);
486 	test_cpu_capability("FCMA (deprecated sysctl)", kHasFeatFCMA, true, "hw.optional.armv8_3_compnum", NULL);
487 	test_cpu_capability("FCMA", kHasFeatFCMA, true, "hw.optional.arm.FEAT_FCMA", try_compnum);
488 	test_cpu_capability("FlagM", kHasFEATFlagM, true, "hw.optional.arm.FEAT_FlagM", try_flagm);
489 	test_cpu_capability("FlagM2", kHasFEATFlagM2, true, "hw.optional.arm.FEAT_FlagM2", try_flagm2);
490 	test_cpu_capability("DotProd", kHasFeatDotProd, true, "hw.optional.arm.FEAT_DotProd", try_dotprod);
491 	test_cpu_capability("RDM", kHasFeatRDM, true, "hw.optional.arm.FEAT_RDM", try_rdm);
492 	test_cpu_capability("SB", kHasFeatSB, true, "hw.optional.arm.FEAT_SB", try_sb);
493 	test_cpu_capability("FRINTTS", kHasFeatFRINTTS, true, "hw.optional.arm.FEAT_FRINTTS", try_frintts);
494 	test_cpu_capability("JSCVT", kHasFeatJSCVT, true, "hw.optional.arm.FEAT_JSCVT", try_jscvt);
495 	test_cpu_capability("PAuth", kHasFeatPAuth, true, "hw.optional.arm.FEAT_PAuth", try_pauth);
496 	test_cpu_capability("DBP", kHasFeatDPB, true, "hw.optional.arm.FEAT_DPB", try_dpb);
497 	test_cpu_capability("DBP2", kHasFeatDPB2, true, "hw.optional.arm.FEAT_DPB2", try_dpb2);
498 	test_cpu_capability("SPECRES", kHasFeatSPECRES, true, "hw.optional.arm.FEAT_SPECRES", try_specres);
499 	test_cpu_capability("LRCPC", kHasFeatLRCPC, true, "hw.optional.arm.FEAT_LRCPC", try_lrcpc);
500 	test_cpu_capability("LRCPC2", kHasFeatLRCPC2, true, "hw.optional.arm.FEAT_LRCPC2", try_lrcpc2);
501 	test_cpu_capability("AFP", kHasFeatAFP, true, "hw.optional.arm.FEAT_AFP", try_afp);
502 	test_cpu_capability("DIT", kHasFeatDIT, true, "hw.optional.arm.FEAT_DIT", try_dit);
503 	test_cpu_capability("FP16", kHasFP_SyncExceptions, true, "hw.optional.arm.FP_SyncExceptions", try_fpexcp);
504 
505 	// The following features do not have a commpage entry
506 	test_cpu_capability("BF16", 0, false, "hw.optional.arm.FEAT_BF16", try_bf16);
507 	test_cpu_capability("I8MM", 0, false, "hw.optional.arm.FEAT_I8MM", try_i8mm);
508 	test_cpu_capability("ECV", 0, false, "hw.optional.arm.FEAT_ECV", try_ecv);
509 	test_cpu_capability("RPRES", 0, false, "hw.optional.arm.FEAT_RPRES", try_rpres);
510 
511 	// The following features do not add instructions or registers to test for the presence of
512 	test_cpu_capability("LSE2", kHasFeatLSE2, true, "hw.optional.arm.FEAT_LSE2", NULL);
513 	test_cpu_capability("CSV2", kHasFeatCSV2, true, "hw.optional.arm.FEAT_CSV2", NULL);
514 	test_cpu_capability("CSV3", kHasFeatCSV3, true, "hw.optional.arm.FEAT_CSV3", NULL);
515 }
516