xref: /xnu-12377.81.4/tests/arm_matrix.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /**
29  * On devices that support it, this test ensures that a mach exception is
30  * generated when a matrix-math exception is triggered, and that the
31  * matrix register file is correctly preserved or zeroed on context switch.
32  */
33 
34 /*
35  * IMPLEMENTATION NOTE:
36  *
37  * This test code goes to some unusual lengths to avoid calling out to libc or
38  * libdarwintest while the CPU is in streaming SVE mode (i.e., between
39  * ops->start() and ops->stop()).  Both of these libraries are built with SIMD
40  * instructions that will cause the test executable to crash while in streaming
41  * SVE mode.
42  *
43  * Ordinarily this is the wrong way to solve this problem.  Functions that use
44  * streaming SVE mode should have annotations telling the compiler so, and the
45  * compiler will automatically generate appropriate interworking code.  However
46  * this interworking code will stash SME state to memory and temporarily exit
47  * streaming SVE mode.  We're specifically testing how xnu manages live SME
48  * register state, so we can't let the compiler stash and disable this state
49  * behind our backs.
50  */
51 
52 #ifdef __arm64__
53 #include <mach/error.h>
54 #endif /* __arm64__ */
55 
56 #include <darwintest.h>
57 #include <pthread.h>
58 #include <stdlib.h>
59 #include <mach/mach.h>
60 #include <mach/thread_act.h>
61 #include <mach/thread_status.h>
62 #include <mach/exception.h>
63 #include <machine/cpu_capabilities.h>
64 #include <sys/types.h>
65 #include <sys/sysctl.h>
66 #include <sys/wait.h>
67 
68 #include "arm_matrix.h"
69 #include "exc_helpers.h"
70 #include "test_utils.h"
71 
72 T_GLOBAL_META(
73 	T_META_NAMESPACE("xnu.arm"),
74 	T_META_RADAR_COMPONENT_NAME("xnu"),
75 	T_META_RADAR_COMPONENT_VERSION("arm"),
76 	T_META_OWNER("ghackmann"),
77 	T_META_RUN_CONCURRENTLY(true)
78 	);
79 
80 #ifdef __arm64__
81 
82 #ifndef EXC_ARM_SME_DISALLOWED
83 #define EXC_ARM_SME_DISALLOWED 2
84 #endif
85 
86 /* Whether we caught the EXC_BAD_INSTRUCTION mach exception or not. */
87 static volatile bool mach_exc_caught = false;
88 
89 static size_t
bad_instruction_exception_handler(__unused mach_port_t task,__unused mach_port_t thread,exception_type_t type,mach_exception_data_t codes,__unused uint64_t exception_pc)90 bad_instruction_exception_handler(
91 	__unused mach_port_t task,
92 	__unused mach_port_t thread,
93 	exception_type_t type,
94 	mach_exception_data_t codes,
95 	__unused uint64_t exception_pc)
96 {
97 	T_QUIET; T_ASSERT_EQ(type, EXC_BAD_INSTRUCTION, "Caught an EXC_BAD_INSTRUCTION exception");
98 	T_QUIET; T_ASSERT_EQ(codes[0], (uint64_t)EXC_ARM_UNDEFINED, "The subcode is EXC_ARM_UNDEFINED");
99 
100 	mach_exc_caught = true;
101 	return 4;
102 }
103 #endif
104 
105 
106 #ifdef __arm64__
107 static void
test_matrix_not_started(const struct arm_matrix_operations * ops)108 test_matrix_not_started(const struct arm_matrix_operations *ops)
109 {
110 	if (!ops->is_available()) {
111 		T_SKIP("Running on non-%s target, skipping...", ops->name);
112 	}
113 
114 	mach_port_t exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
115 
116 	size_t size = ops->data_size();
117 	uint8_t *d = ops->alloc_data();
118 	bzero(d, size);
119 
120 	ops->start();
121 	ops->load_one_vector(d);
122 	ops->stop();
123 	T_PASS("%s instruction after start instruction should not cause an exception", ops->name);
124 
125 	mach_exc_caught = false;
126 	run_exception_handler(exc_port, bad_instruction_exception_handler);
127 	ops->load_one_vector(d);
128 	T_EXPECT_TRUE(mach_exc_caught, "%s instruction before start instruction should cause an exception", ops->name);
129 
130 	free(d);
131 }
132 #endif
133 
134 
135 T_DECL(sme_not_started,
136     "Test that SME instructions before smstart generate mach exceptions.")
137 {
138 #ifndef __arm64__
139 	T_SKIP("Running on non-arm64 target, skipping...");
140 #else
141 	test_matrix_not_started(&sme_operations);
142 #endif
143 }
144 
145 #ifdef __arm64__
146 struct test_thread;
147 typedef bool (*thread_fn_t)(struct test_thread const* thread);
148 
149 struct test_thread {
150 	pthread_t thread;
151 	pthread_t companion_thread;
152 	thread_fn_t thread_fn;
153 	uint32_t cpuid;
154 	uint32_t thread_id;
155 	const struct arm_matrix_operations *ops;
156 };
157 
158 static uint32_t barrier;
159 static pthread_cond_t barrier_cond = PTHREAD_COND_INITIALIZER;
160 static pthread_mutex_t barrier_lock = PTHREAD_MUTEX_INITIALIZER;
161 
162 static uint32_t end_barrier;
163 static pthread_cond_t end_barrier_cond = PTHREAD_COND_INITIALIZER;
164 static pthread_mutex_t end_barrier_lock = PTHREAD_MUTEX_INITIALIZER;
165 
166 static void
test_thread_barrier(void)167 test_thread_barrier(void)
168 {
169 	/* Wait for all threads to reach this barrier */
170 	pthread_mutex_lock(&barrier_lock);
171 	barrier--;
172 	if (barrier) {
173 		while (barrier) {
174 			pthread_cond_wait(&barrier_cond, &barrier_lock);
175 		}
176 	} else {
177 		pthread_cond_broadcast(&barrier_cond);
178 	}
179 	pthread_mutex_unlock(&barrier_lock);
180 }
181 
182 static void
test_thread_notify_exited(void)183 test_thread_notify_exited(void)
184 {
185 	pthread_mutex_lock(&end_barrier_lock);
186 	if (0 == --end_barrier) {
187 		pthread_cond_signal(&end_barrier_cond);
188 	}
189 	pthread_mutex_unlock(&end_barrier_lock);
190 }
191 
192 static void
wait_for_test_threads(void)193 wait_for_test_threads(void)
194 {
195 	pthread_mutex_lock(&end_barrier_lock);
196 	while (end_barrier) {
197 		pthread_cond_wait(&end_barrier_cond, &end_barrier_lock);
198 	}
199 	pthread_mutex_unlock(&end_barrier_lock);
200 }
201 
202 static uint32_t
ncpus(void)203 ncpus(void)
204 {
205 	uint32_t ncpu;
206 	size_t ncpu_size = sizeof(ncpu);
207 	int err = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
208 	T_QUIET; T_ASSERT_POSIX_ZERO(err, "Retrieved CPU count");
209 
210 	return ncpu;
211 }
212 
213 static int
thread_bind_cpu_unchecked(uint32_t cpuid)214 thread_bind_cpu_unchecked(uint32_t cpuid)
215 {
216 	/*
217 	 * libc's sysctl() implementation calls strlen(name), which is
218 	 * SIMD-accelerated.  Avoid this by directly invoking the libsyscall
219 	 * wrapper with namelen computed at compile time.
220 	 */
221 #define THREAD_BIND_CPU "kern.sched_thread_bind_cpu"
222 	extern int __sysctlbyname(const char *name, size_t namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
223 	const char *name = THREAD_BIND_CPU;
224 	size_t namelen = sizeof(THREAD_BIND_CPU) - 1;
225 	return __sysctlbyname(name, namelen, NULL, 0, &cpuid, sizeof(cpuid));
226 }
227 
228 static void
thread_bind_cpu(uint32_t cpuid)229 thread_bind_cpu(uint32_t cpuid)
230 {
231 	int err = thread_bind_cpu_unchecked(cpuid);
232 	T_QUIET; T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
233 }
234 
235 static void *
test_thread_shim(void * arg)236 test_thread_shim(void *arg)
237 {
238 	struct test_thread const *thread = arg;
239 
240 	thread_bind_cpu(thread->cpuid);
241 	bool const ret = thread->thread_fn(thread);
242 	test_thread_notify_exited();
243 	return (void *)(uintptr_t)ret;
244 }
245 
246 static void
test_on_each_cpu(thread_fn_t thread_fn,const struct arm_matrix_operations * ops,const char * desc)247 test_on_each_cpu(thread_fn_t thread_fn, const struct arm_matrix_operations *ops, const char *desc)
248 {
249 	uint32_t ncpu = ncpus();
250 	uint32_t nthreads = ncpu * 2;
251 	barrier = 1 /* This thread */ + nthreads;
252 	end_barrier = nthreads;
253 	struct test_thread *threads = calloc(nthreads, sizeof(threads[0]));
254 
255 	for (uint32_t i = 0; i < nthreads; i++) {
256 		threads[i].thread_fn = thread_fn;
257 		threads[i].cpuid = i % ncpu;
258 		threads[i].thread_id = i;
259 		threads[i].ops = ops;
260 
261 		int const err = pthread_create(&threads[i].thread, NULL, test_thread_shim, &threads[i]);
262 		T_QUIET; T_ASSERT_EQ(err, 0, "%s: created thread #%u", desc, i);
263 
264 		// The other of two threads under test pinned to the same CPU.
265 		threads[(ncpu + i) % nthreads].companion_thread = threads[i].thread;
266 	}
267 
268 	// Wait for all companion_threads to be set.
269 	test_thread_barrier();
270 
271 	// like pthread_join()ing all threads, but without the priority boosting shenanigans.
272 	wait_for_test_threads();
273 
274 	for (uint32_t i = 0; i < nthreads; i++) {
275 		void *thread_ret_ptr;
276 		int err = pthread_join(threads[i].thread, &thread_ret_ptr);
277 		T_QUIET; T_ASSERT_EQ(err, 0, "%s: joined thread #%u", desc, i);
278 
279 		bool thread_ret = (uintptr_t)thread_ret_ptr;
280 		if (thread_ret) {
281 			T_PASS("%s: thread #%u passed", desc, i);
282 		} else {
283 			T_FAIL("%s: thread #%u failed", desc, i);
284 		}
285 	}
286 
287 	free(threads);
288 }
289 
290 static bool
active_context_switch_thread(struct test_thread const * thread)291 active_context_switch_thread(struct test_thread const* thread)
292 {
293 	const struct arm_matrix_operations *ops = thread->ops;
294 	const uint32_t thread_id = thread->thread_id;
295 	size_t size = ops->data_size();
296 	uint8_t *d1 = ops->alloc_data();
297 	memset(d1, (char)thread_id, size);
298 
299 	uint8_t *d2 = ops->alloc_data();
300 
301 	test_thread_barrier();
302 
303 	// companion_thread will be valid only after the barrier.
304 	thread_t const companion_thread = pthread_mach_thread_np(thread->companion_thread);
305 	T_QUIET; T_ASSERT_NE(companion_thread, THREAD_NULL, "pthread_mach_thread_np");
306 
307 	bool ok = true;
308 	for (unsigned int i = 0; i < 100000 && ok; i++) {
309 		ops->start();
310 		ops->load_data(d1);
311 
312 		/*
313 		 * Rescheduling with the matrix registers active must preserve
314 		 * state, even after a context switch.
315 		 */
316 		thread_switch(companion_thread, SWITCH_OPTION_NONE, 0);
317 
318 		ops->store_data(d2);
319 		ops->stop();
320 
321 		if (memcmp(d1, d2, size)) {
322 			ok = false;
323 		}
324 	}
325 
326 	free(d2);
327 	free(d1);
328 	return ok;
329 }
330 
331 static bool
inactive_context_switch_thread(struct test_thread const * thread)332 inactive_context_switch_thread(struct test_thread const* thread)
333 {
334 	const struct arm_matrix_operations *ops = thread->ops;
335 	const uint32_t thread_id = thread->thread_id;
336 	size_t size = ops->data_size();
337 	uint8_t *d1 = ops->alloc_data();
338 	memset(d1, (char)thread_id, size);
339 
340 	uint8_t *d2 = ops->alloc_data();
341 
342 	test_thread_barrier();
343 
344 	// companion_thread will be valid only after the barrier.
345 	thread_t const companion_thread = pthread_mach_thread_np(thread->companion_thread);
346 	T_QUIET; T_ASSERT_NE(companion_thread, THREAD_NULL, "pthread_mach_thread_np");
347 
348 	bool ok = true;
349 	for (unsigned int i = 0; i < 100000 && ok; i++) {
350 		ops->start();
351 		ops->load_data(d1);
352 		ops->stop();
353 
354 		/*
355 		 * Rescheduling with the matrix registers inactive may preserve
356 		 * state or may zero it out.
357 		 */
358 		thread_switch(companion_thread, SWITCH_OPTION_NONE, 0);
359 
360 		ops->start();
361 		ops->store_data(d2);
362 		ops->stop();
363 
364 		for (size_t j = 0; j < size; j++) {
365 			if (d1[j] != d2[j] && d2[j] != 0) {
366 				ok = false;
367 			}
368 		}
369 	}
370 
371 	free(d2);
372 	free(d1);
373 	return ok;
374 }
375 
376 static void
test_thread_migration(const struct arm_matrix_operations * ops)377 test_thread_migration(const struct arm_matrix_operations *ops)
378 {
379 	size_t size = ops->data_size();
380 	uint8_t *d = ops->alloc_data();
381 	arc4random_buf(d, size);
382 
383 	uint32_t ncpu = ncpus();
384 	uint8_t *cpu_d[ncpu];
385 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
386 		cpu_d[cpuid] = ops->alloc_data();
387 		memset(cpu_d[cpuid], 0, size);
388 	}
389 
390 	ops->start();
391 	ops->load_data(d);
392 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
393 		int err = thread_bind_cpu_unchecked(cpuid);
394 		if (err) {
395 			ops->stop();
396 			T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
397 		}
398 		ops->store_data(cpu_d[cpuid]);
399 	}
400 	ops->stop();
401 
402 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
403 		int cmp = memcmp(d, cpu_d[cpuid], size);
404 		T_EXPECT_EQ(cmp, 0, "Matrix state migrated to CPU %u", cpuid);
405 		free(cpu_d[cpuid]);
406 	}
407 	free(d);
408 }
409 #endif
410 
411 
412 T_DECL(sme_context_switch,
413     "Test that SME contexts are migrated during context switch and do not leak between process contexts.",
414     T_META_BOOTARGS_SET("enable_skstb=1"),
415     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_SME2", 1),
416     XNU_T_META_SOC_SPECIFIC)
417 {
418 #ifndef __arm64__
419 	T_SKIP("Running on non-arm64 target, skipping...");
420 #else
421 	if (!sme_operations.is_available()) {
422 		T_SKIP("Running on non-SME target, skipping...");
423 	}
424 
425 	test_thread_migration(&sme_operations);
426 	test_on_each_cpu(active_context_switch_thread, &sme_operations, "SME context migrates when active");
427 	test_on_each_cpu(inactive_context_switch_thread, &sme_operations, "SME context does not leak across processes");
428 #endif
429 }
430 
431 
432 #if __arm64__
433 /*
434  * Sequence of events in thread_{get,set}_state test:
435  *
436  * 1. Parent creates child thread.
437  * 2. Child thread signals parent thread to proceed.
438  * 3. Parent populates child's matrix state registers via thread_set_state(),
439  *    and signals child thread to proceed.
440  * 4. Child arbitrarily updates each byte in its local matrix register state
441  *    by adding 1, and signals parent thread to proceed.
442  * 5. Parent reads back the child's updated matrix state with
443  *    thread_get_state(), and confirms that every byte has been modified as
444  *    expected.
445  */
446 static enum thread_state_test_state {
447 	INIT,
448 	CHILD_READY,
449 	PARENT_POPULATED_MATRIX_STATE,
450 	CHILD_UPDATED_MATRIX_STATE,
451 	DONE
452 } thread_state_test_state;
453 
454 static pthread_cond_t thread_state_test_cond = PTHREAD_COND_INITIALIZER;
455 static pthread_mutex_t thread_state_test_lock = PTHREAD_MUTEX_INITIALIZER;
456 
457 static void
wait_for_thread_state_test_state(enum thread_state_test_state state)458 wait_for_thread_state_test_state(enum thread_state_test_state state)
459 {
460 	pthread_mutex_lock(&thread_state_test_lock);
461 	while (thread_state_test_state != state) {
462 		pthread_cond_wait(&thread_state_test_cond, &thread_state_test_lock);
463 	}
464 	pthread_mutex_unlock(&thread_state_test_lock);
465 }
466 
467 static void
thread_set_state_test_state(enum thread_state_test_state state)468 thread_set_state_test_state(enum thread_state_test_state state)
469 {
470 	pthread_mutex_lock(&thread_state_test_lock);
471 	thread_state_test_state = state;
472 	pthread_cond_broadcast(&thread_state_test_cond);
473 	pthread_mutex_unlock(&thread_state_test_lock);
474 }
475 
476 static void *
test_matrix_thread_state_child(void * arg __unused)477 test_matrix_thread_state_child(void *arg __unused)
478 {
479 	const struct arm_matrix_operations *ops = arg;
480 
481 	size_t size = ops->data_size();
482 	uint8_t *d = ops->alloc_data();
483 
484 
485 	thread_set_state_test_state(CHILD_READY);
486 	wait_for_thread_state_test_state(PARENT_POPULATED_MATRIX_STATE);
487 	ops->store_data(d);
488 	for (size_t i = 0; i < size; i++) {
489 		d[i]++;
490 	}
491 	ops->load_data(d);
492 	thread_set_state_test_state(CHILD_UPDATED_MATRIX_STATE);
493 
494 	wait_for_thread_state_test_state(DONE);
495 	ops->stop();
496 	return NULL;
497 }
498 
499 static void
test_matrix_thread_state(const struct arm_matrix_operations * ops)500 test_matrix_thread_state(const struct arm_matrix_operations *ops)
501 {
502 	if (!ops->is_available()) {
503 		T_SKIP("Running on non-%s target, skipping...", ops->name);
504 	}
505 
506 	size_t size = ops->data_size();
507 	uint8_t *d = ops->alloc_data();
508 	arc4random_buf(d, size);
509 
510 	thread_state_test_state = INIT;
511 
512 	pthread_t thread;
513 #pragma clang diagnostic push
514 #pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
515 	void *arg = ops;
516 #pragma clang diagnostic pop
517 	int err = pthread_create(&thread, NULL, test_matrix_thread_state_child, arg);
518 	T_QUIET; T_ASSERT_EQ(err, 0, "pthread_create()");
519 
520 	mach_port_t mach_thread = pthread_mach_thread_np(thread);
521 	T_QUIET; T_ASSERT_NE(mach_thread, MACH_PORT_NULL, "pthread_mach_thread_np()");
522 
523 	wait_for_thread_state_test_state(CHILD_READY);
524 	kern_return_t kr = ops->thread_set_state(mach_thread, d);
525 	T_QUIET; T_ASSERT_EQ(kr, KERN_SUCCESS, "%s thread_set_state()", ops->name);
526 	thread_set_state_test_state(PARENT_POPULATED_MATRIX_STATE);
527 
528 	wait_for_thread_state_test_state(CHILD_UPDATED_MATRIX_STATE);
529 	uint8_t *thread_d = ops->alloc_data();
530 	kr = ops->thread_get_state(mach_thread, thread_d);
531 	T_QUIET; T_ASSERT_EQ(kr, KERN_SUCCESS, "%s thread_get_state()", ops->name);
532 	for (size_t i = 0; i < size; i++) {
533 		d[i]++;
534 	}
535 	T_EXPECT_EQ(memcmp(d, thread_d, size), 0, "thread_get_state() read expected %s data from child thread", ops->name);
536 
537 	thread_set_state_test_state(DONE);
538 	free(thread_d);
539 	free(d);
540 	pthread_join(thread, NULL);
541 }
542 
543 #endif
544 
545 #ifdef __arm64__
546 
547 T_DECL(sme_thread_state,
548     "Test thread_{get,set}_state with SME thread state.",
549     XNU_T_META_SOC_SPECIFIC)
550 {
551 	test_matrix_thread_state(&sme_operations);
552 }
553 
554 T_DECL(sme_exception_ports,
555     "Test that thread_set_exception_ports rejects SME thread-state flavors.",
556     XNU_T_META_SOC_SPECIFIC)
557 {
558 	mach_port_t exc_port;
559 	mach_port_t task = mach_task_self();
560 	mach_port_t thread = mach_thread_self();
561 
562 	kern_return_t kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port);
563 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Allocated mach exception port");
564 	kr = mach_port_insert_right(task, exc_port, exc_port, MACH_MSG_TYPE_MAKE_SEND);
565 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Inserted a SEND right into the exception port");
566 
567 	kr = thread_set_exception_ports(thread, EXC_MASK_ALL, exc_port, EXCEPTION_STATE, ARM_THREAD_STATE64);
568 	T_EXPECT_MACH_SUCCESS(kr, "thread_set_exception_ports accepts flavor %u", (unsigned int)ARM_THREAD_STATE64);
569 
570 	for (thread_state_flavor_t flavor = ARM_SME_STATE; flavor <= ARM_SME2_STATE; flavor++) {
571 		kr = thread_set_exception_ports(thread, EXC_MASK_ALL, exc_port, EXCEPTION_STATE, flavor);
572 		T_EXPECT_MACH_ERROR(kr, KERN_INVALID_ARGUMENT, "thread_set_exception_ports rejects flavor %u", (unsigned int)flavor);
573 	}
574 }
575 
576 T_DECL(sme_max_svl_b_sysctl,
577     "Test the hw.optional.arm.sme_max_svl_b sysctl",
578     XNU_T_META_SOC_SPECIFIC)
579 {
580 	unsigned int max_svl_b;
581 	size_t max_svl_b_size = sizeof(max_svl_b);
582 
583 	int err = sysctlbyname("hw.optional.arm.sme_max_svl_b", &max_svl_b, &max_svl_b_size, NULL, 0);
584 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "sysctlbyname(hw.optional.arm.sme_max_svl_b)");
585 	if (sme_operations.is_available()) {
586 		/* Architecturally SVL must be a power-of-two between 128 and 2048 bits */
587 		const unsigned int ARCH_MIN_SVL_B = 128 / 8;
588 		const unsigned int ARCH_MAX_SVL_B = 2048 / 8;
589 
590 		T_EXPECT_EQ(__builtin_popcount(max_svl_b), 1, "Maximum SVL_B is a power of 2");
591 		T_EXPECT_GE(max_svl_b, ARCH_MIN_SVL_B, "Maximum SVL_B >= architectural minimum");
592 		T_EXPECT_LE(max_svl_b, ARCH_MAX_SVL_B, "Maximum SVL_B <= architectural maximum");
593 	} else {
594 		T_EXPECT_EQ(max_svl_b, 0, "Maximum SVL_B is 0 when SME is unavailable");
595 	}
596 }
597 
598 static void
dup_and_check_matrix_state(const struct arm_matrix_operations * ops)599 dup_and_check_matrix_state(const struct arm_matrix_operations *ops)
600 {
601 	if (!ops->is_available()) {
602 		T_SKIP("Running on non-%s target, skipping...", ops->name);
603 	}
604 
605 	size_t size = ops->data_size();
606 	uint8_t *d_in = ops->alloc_data();
607 	uint8_t *d_out = ops->alloc_data();
608 	arc4random_buf(d_in, size);
609 
610 	ops->start();
611 	ops->load_data(d_in);
612 
613 	pid_t pid = fork();
614 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork()");
615 	if (pid == 0) {
616 		ops->store_data(d_out);
617 		ops->stop();
618 
619 		int cmp = memcmp(d_in, d_out, size);
620 		free(d_out);
621 		free(d_in);
622 		exit(cmp);
623 	}
624 
625 	ops->stop();
626 	free(d_out);
627 	free(d_in);
628 
629 	siginfo_t info;
630 	int err = waitid(P_PID, (id_t)pid, &info, WEXITED);
631 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "waitid()");
632 	T_QUIET; T_ASSERT_EQ(info.si_signo, SIGCHLD, "child exited");
633 	T_QUIET; T_ASSERT_EQ(info.si_code, CLD_EXITED, "child exited");
634 	int cmp = info.si_status;
635 
636 	T_EXPECT_EQ(cmp, 0, "%s state correctly duplicated during fork()", ops->name);
637 }
638 
639 
640 T_DECL(sme_thread_dup,
641     "Test duplicating SME thread saved-state",
642     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_SME2", 1),
643     XNU_T_META_SOC_SPECIFIC)
644 {
645 	/*
646 	 * libsystem has streaming-incompatible atfork handlers, so for this
647 	 * test we can only set SVCR.ZA.
648 	 */
649 	dup_and_check_matrix_state(&sme_za_operations);
650 }
651 
652 #endif /* __arm64__ */
653