xref: /xnu-11215.81.4/osfmk/kern/exclaves.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35 
36 #if CONFIG_EXCLAVES
37 
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #include <arm64/hv/hv_vm.h>
41 #include <arm64/hv/hv_vcpu.h>
42 #else
43 #error Invalid configuration
44 #endif /* CONFIG_SPTM */
45 
46 #include <arm/cpu_data_internal.h>
47 #include <arm/misc_protos.h>
48 #include <kern/epoch_sync.h>
49 #include <kern/ipc_kobject.h>
50 #include <kern/kalloc.h>
51 #include <kern/locks.h>
52 #include <kern/percpu.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
55 #include <kern/zalloc.h>
56 #include <kern/exclaves_stackshot.h>
57 #include <kern/exclaves_test_stackshot.h>
58 #include <vm/pmap.h>
59 #include <pexpert/pexpert.h>
60 
61 #include <mach/exclaves_l4.h>
62 #include <mach/mach_port.h>
63 
64 #include <Exclaves/Exclaves.h>
65 
66 #include <IOKit/IOBSD.h>
67 
68 #include <xnuproxy/messages.h>
69 
70 #include "exclaves_debug.h"
71 #include "exclaves_panic.h"
72 #include "exclaves_xnuproxy.h"
73 
74 /* External & generated headers */
75 #include <xrt_hosted_types/types.h>
76 
77 #if __has_include(<Tightbeam/tightbeam.h>)
78 #include <Tightbeam/tightbeam.h>
79 #include <Tightbeam/tightbeam_private.h>
80 #endif
81 
82 #include "exclaves_resource.h"
83 #include "exclaves_upcalls.h"
84 #include "exclaves_boot.h"
85 #include "exclaves_inspection.h"
86 #include "exclaves_memory.h"
87 #include "exclaves_internal.h"
88 
89 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
90 
91 /* Boot lock - only used here for assertions. */
92 extern lck_mtx_t exclaves_boot_lock;
93 
94 /*
95  * Sent/latest offset for updating exclaves clocks
96  */
97 typedef struct {
98 	union {
99 		/* atomic fields are used via atomic primitives */
100 		struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
101 		_Atomic unsigned __int128 a_u128;
102 		/* non-atomic fields are used via local variable. this is needed
103 		 * to avoid undefined behavior with an atomic struct or
104 		 * accessing atomic fields non-atomically */
105 		struct { uint64_t sent_offset, latest_offset; } u64;
106 		unsigned __int128 u128;
107 	};
108 } exclaves_clock_t;
109 
110 
111 /*
112  * Two clocks indexed by their type.
113  * This makes things easy to lookup.
114  */
115 static exclaves_clock_t exclaves_clock[] = {
116 	[EXCLAVES_CLOCK_ABSOLUTE] = {},
117 	[EXCLAVES_CLOCK_CONTINUOUS] = {},
118 };
119 
120 static kern_return_t
121 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
122 
123 static kern_return_t
124 exclaves_enter(void);
125 static kern_return_t
126 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
127 
128 static kern_return_t
129 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
130 OS_NORETURN OS_NOINLINE
131 static void
132 exclaves_wait_for_panic(void);
133 
134 static inline bool
135 exclaves_clocks_need_update(void);
136 
137 static kern_return_t
138 exclaves_scheduler_boot(void);
139 
140 static kern_return_t
141 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
142 
143 static kern_return_t
144 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
145     uint64_t offset);
146 
147 static kern_return_t
148 exclaves_scheduler_request_boot(void);
149 
150 
151 /*
152  * A static set of exclave epoch counters.
153  */
154 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
155 
os_atomic(uint64_t)156 static inline os_atomic(uint64_t) *
157 exclaves_get_queue_counter(const uint64_t id)
158 {
159 	return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
160 }
161 
os_atomic(uint64_t)162 static inline os_atomic(uint64_t) *
163 exclaves_get_thread_counter(const uint64_t id)
164 {
165 	return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
166 }
167 
168 
169 /* -------------------------------------------------------------------------- */
170 #pragma mark exclaves debug configuration
171 
172 #if DEVELOPMENT || DEBUG
173 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
174     exclaves_debug_show_errors);
175 
176 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
177     "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
178     0, TUNABLE_DT_NONE);
179 #else
180 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
181 #endif
182 
183 #endif /* CONFIG_EXCLAVES */
184 
185 /* -------------------------------------------------------------------------- */
186 #pragma mark userspace entry point
187 
188 #if CONFIG_EXCLAVES
189 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)190 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
191 {
192 	if (name != MACH_PORT_NULL) {
193 		/* Only accept MACH_PORT_NULL for now */
194 		return KERN_INVALID_CAPABILITY;
195 	}
196 
197 	/*
198 	 * As the boot operation itself happens outside the context of any
199 	 * conclave, it requires special privilege.
200 	 */
201 	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
202 		return KERN_DENIED;
203 	}
204 
205 	return exclaves_boot(stage);
206 }
207 #endif /* CONFIG_EXCLAVES */
208 
209 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)210 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
211 {
212 #if CONFIG_EXCLAVES
213 	kern_return_t kr = KERN_SUCCESS;
214 	int error = 0;
215 
216 	mach_port_name_t name = uap->name;
217 	exclaves_id_t identifier = uap->identifier;
218 	mach_vm_address_t ubuffer = uap->buffer;
219 	mach_vm_size_t usize = uap->size;
220 	mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
221 	mach_vm_size_t usize2 = uap->size2;
222 	mach_vm_size_t uoffset2 = uap->offset;
223 	mach_vm_address_t ustatus = uap->status;
224 
225 	task_t task = current_task();
226 
227 	/*
228 	 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
229 	 * with xnu. That header is not shared with userspace. Make sure that
230 	 * the retval userspace picks up is the same as the one
231 	 * xnu/ExclavePlatform thinks it is.
232 	 */
233 	assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
234 	    &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
235 
236 	uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
237 	uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
238 	if (flags != 0) {
239 		return KERN_INVALID_ARGUMENT;
240 	}
241 
242 	/*
243 	 * Deal with OP_BOOT up-front as it has slightly different restrictions
244 	 * than the other operations.
245 	 */
246 	if (operation == EXCLAVES_CTL_OP_BOOT) {
247 		return operation_boot(name, (uint32_t)identifier);
248 	}
249 
250 	/*
251 	 * All other operations are restricted to properly entitled tasks which
252 	 * can operate in the kernel domain, or those which have joined
253 	 * conclaves (which has its own entitlement check).
254 	 * If requirements are relaxed during development, tasks with no
255 	 * conclaves are also allowed.
256 	 */
257 	if (task_get_conclave(task) == NULL &&
258 	    !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
259 	    !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
260 		return KERN_DENIED;
261 	}
262 
263 	/*
264 	 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
265 	 * return immediately.
266 	 */
267 	kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
268 	if (kr != KERN_SUCCESS) {
269 		return kr;
270 	}
271 
272 	if (task_get_conclave(task) != NULL) {
273 		/*
274 		 * For calls from tasks that have joined conclaves, now wait until
275 		 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
276 		 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
277 		 * panic). This is a separate call to the one above because we
278 		 * need to distinguish EXCLAVECORE being not supported and
279 		 * still wait for EXCLAVEKIT to boot if it *is* supported.
280 		 */
281 		kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
282 		if (kr != KERN_SUCCESS) {
283 			return kr;
284 		}
285 	}
286 
287 	switch (operation) {
288 	case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
289 		if (name != MACH_PORT_NULL) {
290 			/* Only accept MACH_PORT_NULL for now */
291 			return KERN_INVALID_CAPABILITY;
292 		}
293 		if (ubuffer == USER_ADDR_NULL || usize == 0 ||
294 		    usize != Exclaves_L4_IpcBuffer_Size) {
295 			return KERN_INVALID_ARGUMENT;
296 		}
297 
298 
299 		Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
300 		/* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
301 		if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
302 			return error;
303 		}
304 		assert(ipcb != NULL);
305 		if ((error = copyin(ubuffer, ipcb, usize))) {
306 			return error;
307 		}
308 
309 		if (identifier >= CONCLAVE_SERVICE_MAX) {
310 			return KERN_INVALID_ARGUMENT;
311 		}
312 
313 		/*
314 		 * Verify that the service actually exists in the current
315 		 * domain.
316 		 */
317 		if (!exclaves_conclave_has_service(task_get_conclave(task),
318 		    identifier)) {
319 			return KERN_INVALID_ARGUMENT;
320 		}
321 
322 		kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
323 		error = copyout(ipcb, ubuffer, usize);
324 		/*
325 		 * Endpoint call to conclave may have trigger a stop upcall,
326 		 * check if stop upcall completion handler needs to run.
327 		 */
328 		task_stop_conclave_upcall_complete();
329 		if (error) {
330 			return error;
331 		}
332 		break;
333 	}
334 
335 	case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
336 		if (name != MACH_PORT_NULL) {
337 			/* Only accept MACH_PORT_NULL for now */
338 			return KERN_INVALID_CAPABILITY;
339 		}
340 
341 		size_t len = 0;
342 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
343 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
344 		    &len) != 0 || id_name[0] == '\0') {
345 			return KERN_INVALID_ARGUMENT;
346 		}
347 
348 		exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
349 		const exclaves_buffer_perm_t supported =
350 		    EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
351 		if ((perm & supported) == 0 || (perm & ~supported) != 0) {
352 			return KERN_INVALID_ARGUMENT;
353 		}
354 
355 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
356 		exclaves_resource_t *resource = NULL;
357 		kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
358 		    perm, &resource);
359 		if (kr != KERN_SUCCESS) {
360 			return kr;
361 		}
362 
363 		kr = exclaves_resource_create_port_name(resource,
364 		    current_space(), &name);
365 		if (kr != KERN_SUCCESS) {
366 			return kr;
367 		}
368 
369 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
370 		if (kr != KERN_SUCCESS) {
371 			mach_port_deallocate(current_space(), name);
372 			return kr;
373 		}
374 
375 		break;
376 	}
377 
378 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
379 		exclaves_resource_t *resource = NULL;
380 		kr = exclaves_resource_from_port_name(current_space(), name,
381 		    &resource);
382 		if (kr != KERN_SUCCESS) {
383 			return kr;
384 		}
385 
386 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
387 			exclaves_resource_release(resource);
388 			return KERN_INVALID_CAPABILITY;
389 		}
390 
391 		kr = exclaves_resource_shared_memory_copyin(resource,
392 		    ubuffer, usize, uoffset, usize2, uoffset2);
393 
394 		exclaves_resource_release(resource);
395 
396 		if (kr != KERN_SUCCESS) {
397 			return kr;
398 		}
399 		break;
400 	}
401 
402 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
403 		exclaves_resource_t *resource = NULL;
404 		kr = exclaves_resource_from_port_name(current_space(), name,
405 		    &resource);
406 		if (kr != KERN_SUCCESS) {
407 			return kr;
408 		}
409 
410 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
411 			exclaves_resource_release(resource);
412 			return KERN_INVALID_CAPABILITY;
413 		}
414 
415 		kr = exclaves_resource_shared_memory_copyout(resource,
416 		    ubuffer, usize, uoffset, usize2, uoffset2);
417 
418 		exclaves_resource_release(resource);
419 
420 		if (kr != KERN_SUCCESS) {
421 			return kr;
422 		}
423 		break;
424 	}
425 
426 	case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
427 		if (name != MACH_PORT_NULL) {
428 			/* Only accept MACH_PORT_NULL for now */
429 			return KERN_INVALID_CAPABILITY;
430 		}
431 		kr = task_launch_conclave(name);
432 
433 		/*
434 		 * Conclave launch call to may have trigger a stop upcall,
435 		 * check if stop upcall completion handler needs to run.
436 		 */
437 		task_stop_conclave_upcall_complete();
438 		break;
439 
440 	case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
441 		if (name != MACH_PORT_NULL) {
442 			/* Only accept MACH_PORT_NULL for now */
443 			return KERN_INVALID_CAPABILITY;
444 		}
445 		struct exclaves_resource_user uresource = {};
446 
447 		if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
448 		    (usize % sizeof(struct exclaves_resource_user) != 0)) {
449 			return KERN_INVALID_ARGUMENT;
450 		}
451 
452 		if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
453 		    (usize == 0 && ubuffer != USER_ADDR_NULL)) {
454 			return KERN_INVALID_ARGUMENT;
455 		}
456 
457 		if (ubuffer == USER_ADDR_NULL) {
458 			return KERN_INVALID_ARGUMENT;
459 		}
460 
461 		/* For the moment we only ever have to deal with one request. */
462 		if (usize != sizeof(struct exclaves_resource_user)) {
463 			return KERN_INVALID_ARGUMENT;
464 		}
465 		error = copyin(ubuffer, &uresource, usize);
466 		if (error) {
467 			return KERN_INVALID_ARGUMENT;
468 		}
469 
470 		const size_t name_buf_len = sizeof(uresource.r_name);
471 		if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
472 			return KERN_INVALID_ARGUMENT;
473 		}
474 
475 		/*
476 		 * Do the regular lookup first. If that fails, fallback to the
477 		 * DARWIN domain, finally fallback to the KERNEL domain.
478 		 */
479 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
480 		uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
481 
482 		if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
483 		    exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
484 			if (id == EXCLAVES_INVALID_ID) {
485 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
486 				    uresource.r_name);
487 			}
488 			if (id == EXCLAVES_INVALID_ID) {
489 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
490 				    uresource.r_name);
491 			}
492 		}
493 
494 		if (id == EXCLAVES_INVALID_ID) {
495 			return KERN_NOT_FOUND;
496 		}
497 
498 		uresource.r_id = id;
499 		uresource.r_port = MACH_PORT_NULL;
500 
501 		error = copyout(&uresource, ubuffer, usize);
502 		if (error) {
503 			return KERN_INVALID_ADDRESS;
504 		}
505 
506 		kr = KERN_SUCCESS;
507 		break;
508 	}
509 
510 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
511 		if (identifier == 0) {
512 			return KERN_INVALID_ARGUMENT;
513 		}
514 
515 		/* copy in string name */
516 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
517 		size_t done = 0;
518 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
519 			return KERN_INVALID_ARGUMENT;
520 		}
521 
522 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
523 		exclaves_resource_t *resource = NULL;
524 		kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
525 		    &resource);
526 		if (kr != KERN_SUCCESS) {
527 			return kr;
528 		}
529 
530 		kr = exclaves_resource_create_port_name(resource, current_space(),
531 		    &name);
532 		if (kr != KERN_SUCCESS) {
533 			return kr;
534 		}
535 
536 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
537 		if (kr != KERN_SUCCESS) {
538 			mach_port_deallocate(current_space(), name);
539 			return kr;
540 		}
541 
542 		break;
543 	}
544 
545 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
546 		exclaves_resource_t *resource;
547 
548 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
549 		if (kr != KERN_SUCCESS) {
550 			return kr;
551 		}
552 
553 		if (resource->r_type !=
554 		    XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
555 			exclaves_resource_release(resource);
556 			return KERN_INVALID_CAPABILITY;
557 		}
558 
559 		kr = exclaves_resource_audio_memory_copyout(resource,
560 		    ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
561 
562 		exclaves_resource_release(resource);
563 
564 		if (kr != KERN_SUCCESS) {
565 			return kr;
566 		}
567 
568 		break;
569 	}
570 
571 	case EXCLAVES_CTL_OP_SENSOR_CREATE: {
572 		if (identifier == 0) {
573 			return KERN_INVALID_ARGUMENT;
574 		}
575 
576 		/* copy in string name */
577 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
578 		size_t done = 0;
579 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
580 			return KERN_INVALID_ARGUMENT;
581 		}
582 
583 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
584 		exclaves_resource_t *resource = NULL;
585 		kr = exclaves_resource_sensor_open(domain, id_name, &resource);
586 		if (kr != KERN_SUCCESS) {
587 			return kr;
588 		}
589 
590 		kr = exclaves_resource_create_port_name(resource, current_space(),
591 		    &name);
592 		if (kr != KERN_SUCCESS) {
593 			return kr;
594 		}
595 
596 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
597 		if (kr != KERN_SUCCESS) {
598 			/* No senders drops the reference. */
599 			mach_port_deallocate(current_space(), name);
600 			return kr;
601 		}
602 
603 		break;
604 	}
605 
606 	case EXCLAVES_CTL_OP_SENSOR_START: {
607 		exclaves_resource_t *resource;
608 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
609 		if (kr != KERN_SUCCESS) {
610 			return kr;
611 		}
612 
613 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
614 			exclaves_resource_release(resource);
615 			return KERN_FAILURE;
616 		}
617 
618 		exclaves_sensor_status_t status;
619 		kr = exclaves_resource_sensor_start(resource, identifier, &status);
620 
621 		exclaves_resource_release(resource);
622 
623 		if (kr != KERN_SUCCESS) {
624 			return kr;
625 		}
626 
627 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
628 
629 		break;
630 	}
631 	case EXCLAVES_CTL_OP_SENSOR_STOP: {
632 		exclaves_resource_t *resource;
633 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
634 		if (kr != KERN_SUCCESS) {
635 			return kr;
636 		}
637 
638 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
639 			exclaves_resource_release(resource);
640 			return KERN_FAILURE;
641 		}
642 
643 		exclaves_sensor_status_t status;
644 		kr = exclaves_resource_sensor_stop(resource, identifier, &status);
645 
646 		exclaves_resource_release(resource);
647 
648 		if (kr != KERN_SUCCESS) {
649 			return kr;
650 		}
651 
652 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
653 
654 		break;
655 	}
656 	case EXCLAVES_CTL_OP_SENSOR_STATUS: {
657 		exclaves_resource_t *resource;
658 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
659 		if (kr != KERN_SUCCESS) {
660 			return kr;
661 		}
662 
663 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
664 			exclaves_resource_release(resource);
665 			return KERN_FAILURE;
666 		}
667 
668 
669 		exclaves_sensor_status_t status;
670 		kr = exclaves_resource_sensor_status(resource, identifier, &status);
671 
672 		exclaves_resource_release(resource);
673 
674 		if (kr != KERN_SUCCESS) {
675 			return kr;
676 		}
677 
678 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
679 		break;
680 	}
681 	case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
682 		exclaves_resource_t *notification_resource = NULL;
683 		mach_port_name_t port_name = MACH_PORT_NULL;
684 
685 		struct exclaves_resource_user *notification_resource_user = NULL;
686 		if (usize != sizeof(struct exclaves_resource_user)) {
687 			return KERN_INVALID_ARGUMENT;
688 		}
689 
690 		if (ubuffer == USER_ADDR_NULL) {
691 			return KERN_INVALID_ARGUMENT;
692 		}
693 
694 		notification_resource_user = (struct exclaves_resource_user *)
695 		    kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
696 
697 		error = copyin(ubuffer, notification_resource_user, usize);
698 		if (error) {
699 			kr = KERN_INVALID_ARGUMENT;
700 			goto notification_resource_lookup_out;
701 		}
702 
703 		const size_t name_buf_len = sizeof(notification_resource_user->r_name);
704 		if (strnlen(notification_resource_user->r_name, name_buf_len)
705 		    == name_buf_len) {
706 			kr = KERN_INVALID_ARGUMENT;
707 			goto notification_resource_lookup_out;
708 		}
709 
710 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
711 		kr = exclaves_notification_create(domain,
712 		    notification_resource_user->r_name, &notification_resource);
713 		if (kr != KERN_SUCCESS) {
714 			goto notification_resource_lookup_out;
715 		}
716 
717 		kr = exclaves_resource_create_port_name(notification_resource,
718 		    current_space(), &port_name);
719 		if (kr != KERN_SUCCESS) {
720 			goto notification_resource_lookup_out;
721 		}
722 		notification_resource_user->r_type = notification_resource->r_type;
723 		notification_resource_user->r_id = notification_resource->r_id;
724 		notification_resource_user->r_port = port_name;
725 		error = copyout(notification_resource_user, ubuffer, usize);
726 		if (error) {
727 			kr = KERN_INVALID_ADDRESS;
728 			goto notification_resource_lookup_out;
729 		}
730 
731 notification_resource_lookup_out:
732 		if (notification_resource_user != NULL) {
733 			kfree_data(notification_resource_user, usize);
734 		}
735 		if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
736 			mach_port_deallocate(current_space(), port_name);
737 		}
738 		break;
739 	}
740 
741 	default:
742 		kr = KERN_INVALID_ARGUMENT;
743 		break;
744 	}
745 
746 	return kr;
747 #else /* CONFIG_EXCLAVES */
748 #pragma unused(uap)
749 	return KERN_NOT_SUPPORTED;
750 #endif /* CONFIG_EXCLAVES */
751 }
752 
753 /* -------------------------------------------------------------------------- */
754 #pragma mark kernel entry points
755 
756 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)757 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
758     exclaves_tag_t *tag, exclaves_error_t *error)
759 {
760 #if CONFIG_EXCLAVES
761 	kern_return_t kr = KERN_SUCCESS;
762 	assert(port == IPC_PORT_NULL);
763 
764 	Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
765 	assert(ipcb != NULL);
766 
767 	exclaves_debug_printf(show_progress,
768 	    "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
769 	    endpoint_id, *tag);
770 
771 	ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
772 	kr = exclaves_endpoint_call_internal(port, endpoint_id);
773 	*tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
774 	*error = XNUPROXY_CR_RETVAL(ipcb);
775 
776 	exclaves_debug_printf(show_progress,
777 	    "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
778 	    "error 0x%llx\n", endpoint_id, *tag, *error);
779 
780 	return kr;
781 #else /* CONFIG_EXCLAVES */
782 #pragma unused(port, endpoint_id, tag, error)
783 	return KERN_NOT_SUPPORTED;
784 #endif /* CONFIG_EXCLAVES */
785 }
786 
787 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)788 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
789 {
790 #if CONFIG_EXCLAVES
791 	kern_return_t kr = KERN_SUCCESS;
792 	thread_t thread = current_thread();
793 
794 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
795 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
796 		kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
797 		if (kr != KERN_SUCCESS) {
798 			return kr;
799 		}
800 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
801 	}
802 	thread->th_exclaves_ipc_ctx.usecnt++;
803 
804 	if (out_ipc_buffer != NULL) {
805 		*out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
806 	}
807 	return KERN_SUCCESS;
808 #else /* CONFIG_EXCLAVES */
809 #pragma unused(out_ipc_buffer)
810 	return KERN_NOT_SUPPORTED;
811 #endif /* CONFIG_EXCLAVES */
812 }
813 
814 kern_return_t
exclaves_free_ipc_buffer(void)815 exclaves_free_ipc_buffer(void)
816 {
817 #if CONFIG_EXCLAVES
818 
819 	/* The inspection thread's cached buffer should never be freed */
820 	thread_t thread = current_thread();
821 
822 	/* Don't try to free unallocated contexts. */
823 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
824 		return KERN_SUCCESS;
825 	}
826 
827 	const thread_exclaves_inspection_flags_t iflags =
828 	    os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
829 	if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
830 		return KERN_SUCCESS;
831 	}
832 
833 	assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
834 	if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
835 		return KERN_SUCCESS;
836 	}
837 
838 	return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
839 #else /* CONFIG_EXCLAVES */
840 	return KERN_NOT_SUPPORTED;
841 #endif /* CONFIG_EXCLAVES */
842 }
843 
844 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)845 exclaves_thread_terminate(__unused thread_t thread)
846 {
847 	kern_return_t kr = KERN_SUCCESS;
848 
849 #if CONFIG_EXCLAVES
850 	assert(thread == current_thread());
851 	assert(thread->th_exclaves_intstate == 0);
852 	assert(thread->th_exclaves_state == 0);
853 	if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
854 		exclaves_debug_printf(show_progress,
855 		    "exclaves: thread_terminate freeing abandoned exclaves "
856 		    "ipc buffer\n");
857 		/* Unconditionally free context irrespective of usecount */
858 		thread->th_exclaves_ipc_ctx.usecnt = 0;
859 		kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
860 		assert(kr == KERN_SUCCESS);
861 	}
862 #else
863 #pragma unused(thread)
864 #endif /* CONFIG_EXCLAVES */
865 
866 	return kr;
867 }
868 
869 OS_CONST
870 void*
exclaves_get_ipc_buffer(void)871 exclaves_get_ipc_buffer(void)
872 {
873 #if CONFIG_EXCLAVES
874 	thread_t thread = current_thread();
875 	Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
876 
877 	return ipcb;
878 #else /* CONFIG_EXCLAVES */
879 	return NULL;
880 #endif /* CONFIG_EXCLAVES */
881 }
882 
883 #if CONFIG_EXCLAVES
884 
885 static void
bind_to_boot_core(void)886 bind_to_boot_core(void)
887 {
888 	/*
889 	 * First ensure the boot cluster isn't powered down preventing the
890 	 * thread from running at all.
891 	 */
892 	suspend_cluster_powerdown();
893 	const int cpu = ml_get_boot_cpu_number();
894 	processor_t processor = cpu_to_processor(cpu);
895 	assert3p(processor, !=, NULL);
896 	__assert_only processor_t old = thread_bind(processor);
897 	assert3p(old, ==, PROCESSOR_NULL);
898 	thread_block(THREAD_CONTINUE_NULL);
899 }
900 
901 static void
unbind_from_boot_core(void)902 unbind_from_boot_core(void)
903 {
904 	/* Unbind the thread from the boot CPU. */
905 	thread_bind(PROCESSOR_NULL);
906 	thread_block(THREAD_CONTINUE_NULL);
907 	resume_cluster_powerdown();
908 }
909 
910 extern kern_return_t exclaves_boot_early(void);
911 kern_return_t
exclaves_boot_early(void)912 exclaves_boot_early(void)
913 {
914 	kern_return_t kr = KERN_FAILURE;
915 	uint64_t boot_info = 0;
916 	bool early_enter = false;
917 
918 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
919 
920 	kr = exclaves_bootinfo(&boot_info, &early_enter);
921 	if (kr != KERN_SUCCESS) {
922 		exclaves_debug_printf(show_errors,
923 		    "exclaves: Get bootinfo failed\n");
924 		return kr;
925 	}
926 
927 	if (early_enter) {
928 		thread_t thread = current_thread();
929 		assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
930 
931 		bind_to_boot_core();
932 
933 		disable_preemption_without_measurements();
934 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
935 
936 		kr = exclaves_enter();
937 
938 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
939 		enable_preemption();
940 
941 		unbind_from_boot_core();
942 
943 		if (kr != KERN_SUCCESS) {
944 			exclaves_debug_printf(show_errors,
945 			    "exclaves: early exclaves enter failed\n");
946 			if (kr == KERN_ABORTED) {
947 				panic("Unexpected ringgate panic status");
948 			}
949 			return kr;
950 		}
951 	}
952 
953 	uint64_t xnuproxy_boot_info = 0;
954 	kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
955 	if (kr != KERN_SUCCESS) {
956 		exclaves_debug_printf(show_errors,
957 		    "exclaves: Init scheduler failed\n");
958 		return kr;
959 	}
960 
961 	kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
962 	if (kr != KERN_SUCCESS) {
963 		exclaves_debug_printf(show_errors,
964 		    "XNU proxy setup failed\n");
965 		return KERN_FAILURE;
966 	}
967 
968 	kr = exclaves_resource_init();
969 	if (kr != KERN_SUCCESS) {
970 		exclaves_debug_printf(show_errors,
971 		    "exclaves: failed to initialize resources\n");
972 		return kr;
973 	}
974 
975 	kr = exclaves_panic_thread_setup();
976 	if (kr != KERN_SUCCESS) {
977 		exclaves_debug_printf(show_errors,
978 		    "XNU proxy panic thread setup failed\n");
979 		return KERN_FAILURE;
980 	}
981 
982 	return KERN_SUCCESS;
983 }
984 #endif /* CONFIG_EXCLAVES */
985 
986 #if CONFIG_EXCLAVES
987 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
988 #endif /* CONFIG_EXCLAVES */
989 
990 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)991 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
992 {
993 #if CONFIG_EXCLAVES
994 	if (exclaves_callbacks == NULL) {
995 		exclaves_callbacks = callbacks;
996 	}
997 #else /* CONFIG_EXCLAVES */
998 #pragma unused(callbacks)
999 #endif /* CONFIG_EXCLAVES */
1000 }
1001 
1002 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1003 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1004 {
1005 	assert(
1006 		type == EXCLAVES_CLOCK_CONTINUOUS ||
1007 		type == EXCLAVES_CLOCK_ABSOLUTE);
1008 #if CONFIG_EXCLAVES
1009 	exclaves_clock_t *clock = &exclaves_clock[type];
1010 	uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1011 	while (latest_offset < offset) {
1012 		/* Update the latest offset with the new offset. If this fails, then a
1013 		 * concurrent update occurred and our offset may be stale. */
1014 		if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1015 		    offset, &latest_offset, relaxed)) {
1016 			break;
1017 		}
1018 	}
1019 #else
1020 #pragma unused(type, offset)
1021 #endif /* CONFIG_EXCLAVES */
1022 }
1023 
1024 /* -------------------------------------------------------------------------- */
1025 
1026 #pragma mark exclaves ipc internals
1027 
1028 #if CONFIG_EXCLAVES
1029 
1030 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1031 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1032     exclaves_id_t endpoint_id)
1033 {
1034 	kern_return_t kr = KERN_SUCCESS;
1035 
1036 	assert(port == IPC_PORT_NULL);
1037 
1038 	kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1039 
1040 	return kr;
1041 }
1042 
1043 /* -------------------------------------------------------------------------- */
1044 #pragma mark secure kernel communication
1045 
1046 /* ringgate entry endpoints */
1047 enum {
1048 	RINGGATE_EP_ENTER,
1049 	RINGGATE_EP_INFO
1050 };
1051 
1052 /* ringgate entry status codes */
1053 enum {
1054 	RINGGATE_STATUS_SUCCESS,
1055 	RINGGATE_STATUS_ERROR,
1056 	RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1057 };
1058 
1059 OS_NOINLINE
1060 static kern_return_t
exclaves_enter(void)1061 exclaves_enter(void)
1062 {
1063 	uint32_t endpoint = RINGGATE_EP_ENTER;
1064 	uint64_t result = RINGGATE_STATUS_ERROR;
1065 
1066 	sptm_call_regs_t regs = { };
1067 
1068 	__assert_only thread_t thread = current_thread();
1069 
1070 	/*
1071 	 * Should never re-enter exclaves.
1072 	 */
1073 	if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1074 	    (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1075 		panic("attempt to re-enter exclaves");
1076 	}
1077 
1078 	/*
1079 	 * Must have one (and only one) of the flags set to enter exclaves.
1080 	 */
1081 	__assert_only const thread_exclaves_state_flags_t mask = (
1082 		TH_EXCLAVES_RPC |
1083 		TH_EXCLAVES_XNUPROXY |
1084 		TH_EXCLAVES_SCHEDULER_CALL |
1085 		TH_EXCLAVES_RESUME_PANIC_THREAD);
1086 	assert3u(thread->th_exclaves_state & mask, !=, 0);
1087 	assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1088 
1089 #if MACH_ASSERT
1090 	/*
1091 	 * Set the ast to check that the thread doesn't return to userspace
1092 	 * while in an RPC or XNUPROXY call.
1093 	 */
1094 	act_set_debug_assert();
1095 #endif /* MACH_ASSERT */
1096 
1097 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1098 	    | DBG_FUNC_START);
1099 
1100 	recount_enter_secure();
1101 
1102 	/* xnu_return_to_gl2 relies on this flag being present to correctly return
1103 	 * to SK from interrupts xnu handles on behalf of SK. */
1104 	thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1105 
1106 	/*
1107 	 * Bracket with labels so stackshot can determine where exclaves are
1108 	 * entered from xnu.
1109 	 */
1110 	__asm__ volatile (
1111             "EXCLAVES_ENTRY_START:\n\t"
1112         );
1113 	result = sk_enter(endpoint, &regs);
1114 	__asm__ volatile (
1115             "EXCLAVES_ENTRY_END:\n\t"
1116         );
1117 
1118 	thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1119 
1120 	recount_leave_secure();
1121 
1122 #if CONFIG_SPTM
1123 	/**
1124 	 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1125 	 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1126 	 * debug state forcing a reload on the next return to user mode.
1127 	 */
1128 	if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1129 		arm_debug_set(NULL);
1130 	}
1131 #endif /* CONFIG_SPTM */
1132 
1133 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1134 	    | DBG_FUNC_END);
1135 
1136 	switch (result) {
1137 	case RINGGATE_STATUS_SUCCESS:
1138 		return KERN_SUCCESS;
1139 	case RINGGATE_STATUS_ERROR:
1140 		return KERN_FAILURE;
1141 	case RINGGATE_STATUS_PANIC:
1142 		return KERN_ABORTED;
1143 	default:
1144 		assertf(false, "Unknown ringgate status %llu", result);
1145 		__builtin_trap();
1146 	}
1147 }
1148 
1149 
1150 /*
1151  * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1152  * it in indicates that we should immediately enter the ringgate once in order
1153  * to allow the scheduler to perform early boot initialisation.
1154  */
1155 #define EARLY_RINGGATE_ENTER 2
1156 
1157 OS_NOINLINE
1158 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1159 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1160 {
1161 	uint32_t endpoint = RINGGATE_EP_INFO;
1162 	uint64_t result = RINGGATE_STATUS_ERROR;
1163 
1164 	sptm_call_regs_t regs = { };
1165 
1166 	recount_enter_secure();
1167 	result = sk_enter(endpoint, &regs);
1168 	recount_leave_secure();
1169 	if (result == RINGGATE_STATUS_ERROR) {
1170 		return KERN_FAILURE;
1171 	}
1172 
1173 	*early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1174 	*out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1175 
1176 	return KERN_SUCCESS;
1177 }
1178 
1179 /* -------------------------------------------------------------------------- */
1180 
1181 #pragma mark exclaves scheduler communication
1182 
1183 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1184 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1185 
1186 static void
exclaves_init_multicore(void)1187 exclaves_init_multicore(void)
1188 {
1189 	XrtHosted_Buffer_t **req, **res;
1190 
1191 	exclaves_wait_for_cpu_init();
1192 
1193 	exclaves_debug_printf(show_progress,
1194 	    "Using MPIDR for exclave scheduler core IDs\n");
1195 
1196 	/*
1197 	 * Match the hardwareID to the physical ID and stash the pointers to the
1198 	 * request/response buffers in per-cpu data for quick access.
1199 	 */
1200 	size_t core_count = exclaves_callbacks->v1.cores();
1201 	for (size_t i = 0; i < core_count; i++) {
1202 		const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1203 		uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1204 
1205 		percpu_foreach(cpu_data, cpu_data) {
1206 			if (cpu_data->cpu_phys_id != dt_phys_id) {
1207 				continue;
1208 			}
1209 			req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1210 			*req = exclaves_callbacks->v1.Core.request(i);
1211 
1212 			res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1213 			*res = exclaves_callbacks->v1.Core.response(i);
1214 
1215 			break;
1216 		}
1217 	}
1218 }
1219 
1220 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1221 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1222 {
1223 	kern_return_t kr = KERN_SUCCESS;
1224 	XrtHosted_Error_t hosted_error;
1225 
1226 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1227 
1228 	if (!pmap_valid_address(boot_info)) {
1229 		exclaves_debug_printf(show_errors,
1230 		    "exclaves: %s: 0x%012llx\n",
1231 		    "Invalid root physical address",
1232 		    boot_info);
1233 		return KERN_FAILURE;
1234 	}
1235 
1236 	if (exclaves_callbacks == NULL) {
1237 		exclaves_debug_printf(show_errors,
1238 		    "exclaves: Callbacks not registered\n");
1239 		return KERN_FAILURE;
1240 	}
1241 
1242 	/* Initialise XrtHostedXnu kext */
1243 	kr = exclaves_hosted_error(
1244 		exclaves_callbacks->v1.init(
1245 			XrtHosted_Version_current,
1246 			phystokv(boot_info),
1247 			&hosted_error),
1248 		&hosted_error);
1249 	if (kr != KERN_SUCCESS) {
1250 		return kr;
1251 	}
1252 
1253 	/* Record aperture addresses in buffer */
1254 	size_t frames = exclaves_callbacks->v1.frames();
1255 	XrtHosted_Mapped_t **pages = zalloc_permanent(
1256 		frames * sizeof(XrtHosted_Mapped_t *),
1257 		ZALIGN(XrtHosted_Mapped_t *));
1258 	size_t index = 0;
1259 	uint64_t phys = boot_info;
1260 	while (index < frames) {
1261 		if (!pmap_valid_address(phys)) {
1262 			exclaves_debug_printf(show_errors,
1263 			    "exclaves: %s: 0x%012llx\n",
1264 			    "Invalid shared physical address",
1265 			    phys);
1266 			return KERN_FAILURE;
1267 		}
1268 		pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1269 		kr = exclaves_hosted_error(
1270 			exclaves_callbacks->v1.nextPhys(
1271 				pages[index],
1272 				&index,
1273 				&phys,
1274 				&hosted_error),
1275 			&hosted_error);
1276 		if (kr != KERN_SUCCESS) {
1277 			return kr;
1278 		}
1279 	}
1280 
1281 	/* Initialise the mapped region */
1282 	exclaves_callbacks->v1.setMapping(
1283 		XrtHosted_Region_scattered(frames, pages));
1284 
1285 	/* Boot the scheduler. */
1286 	kr = exclaves_scheduler_boot();
1287 	if (kr != KERN_SUCCESS) {
1288 		return kr;
1289 	}
1290 
1291 	XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1292 
1293 	/* Only support MPIDR multicore. */
1294 	if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1295 		exclaves_debug_printf(show_errors,
1296 		    "exclaves: exclaves scheduler doesn't support multicore");
1297 		return KERN_FAILURE;
1298 	}
1299 	exclaves_init_multicore();
1300 
1301 	/* Initialise the XNU proxy */
1302 	if (!pmap_valid_address(global->v1.proxyInit)) {
1303 		exclaves_debug_printf(show_errors,
1304 		    "exclaves: %s: 0x%012llx\n",
1305 		    "Invalid xnu prpoxy physical address",
1306 		    phys);
1307 		return KERN_FAILURE;
1308 	}
1309 	*xnuproxy_boot_info = global->v1.proxyInit;
1310 
1311 	return kr;
1312 }
1313 
1314 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1315 #define exclaves_scheduler_debug_save_buffer(_buf) \
1316 	XrtHosted_Buffer_t _buf##_copy = *(_buf)
1317 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1318 	    _response_buf) ({ \
1319 	if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1320 	        printf("exclaves: Scheduler request = %p\n", _request_buf); \
1321 	        printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1322 	            _request_buf##_copy.tag); \
1323 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1324 	                printf("exclaves: Scheduler request.arguments[%02zu] = " \
1325 	                    "0x%04llx\n", arg, \
1326 	                    _request_buf##_copy.arguments[arg]); \
1327 	        } \
1328 	        printf("exclaves: Scheduler response = %p\n", _response_buf); \
1329 	        printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1330 	                _response_buf##_copy.tag); \
1331 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1332 	                printf("exclaves: Scheduler response.arguments[%02zu] = " \
1333 	                    "0x%04llx\n", arg, \
1334 	                    _response_buf##_copy.arguments[arg]); \
1335 	        } \
1336 	}})
1337 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1338 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1339 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1340 	    _response_buf) ({ })
1341 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1342 
1343 static void
request_trace_start(const XrtHosted_Request_t * request)1344 request_trace_start(const XrtHosted_Request_t *request)
1345 {
1346 	switch (request->tag) {
1347 	case XrtHosted_Request_ResumeWithHostId:
1348 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1349 		    MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1350 		    request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1351 		break;
1352 
1353 	case XrtHosted_Request_InterruptWithHostId:
1354 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1355 		    MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1356 		    request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1357 		break;
1358 
1359 	case XrtHosted_Request_UpdateTimerOffset:
1360 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1361 		    MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1362 		    request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1363 		break;
1364 
1365 	case XrtHosted_Request_BootExclaves:
1366 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1367 		    MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1368 		break;
1369 
1370 	case XrtHosted_Request_PmmEarlyAllocResponse:
1371 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1372 		    MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1373 		    request->PmmEarlyAllocResponse.a);
1374 		break;
1375 
1376 	case XrtHosted_Request_WatchdogPanic:
1377 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1378 		    MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1379 		break;
1380 
1381 	default:
1382 		panic("Unsupported exclaves scheduler request: %d", request->tag);
1383 	}
1384 }
1385 
1386 static void
request_trace_end(const XrtHosted_Request_t * request)1387 request_trace_end(const XrtHosted_Request_t *request)
1388 {
1389 	switch (request->tag) {
1390 	case XrtHosted_Request_ResumeWithHostId:
1391 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1392 		    MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1393 		break;
1394 
1395 	case XrtHosted_Request_InterruptWithHostId:
1396 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1397 		    MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1398 		break;
1399 
1400 	case XrtHosted_Request_UpdateTimerOffset:
1401 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1402 		    MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1403 		break;
1404 
1405 	case XrtHosted_Request_BootExclaves:
1406 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1407 		    MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1408 		break;
1409 
1410 	case XrtHosted_Request_PmmEarlyAllocResponse:
1411 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1412 		    MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1413 		break;
1414 
1415 	case XrtHosted_Request_WatchdogPanic:
1416 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1417 		    MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1418 		break;
1419 
1420 	default:
1421 		panic("Unsupported exclaves scheduler request: %d", request->tag);
1422 	}
1423 }
1424 
1425 __attribute__((always_inline))
1426 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1427 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1428     XrtHosted_Response_t *response)
1429 {
1430 	assert3u(request->tag, >, XrtHosted_Request_Invalid);
1431 	assert3u(request->tag, <, XrtHosted_Request_Limit);
1432 
1433 	kern_return_t kr = KERN_SUCCESS;
1434 	bool istate;
1435 
1436 	/*
1437 	 * Disable preemption and interrupts as the xrt hosted scheduler data
1438 	 * structures are per-core.
1439 	 * Preemption disabled and interrupt disabled timeouts are disabled for
1440 	 * now until we can co-ordinate the measurements with the exclaves side
1441 	 * of things.
1442 	 */
1443 	istate = ml_set_interrupts_enabled_with_debug(false, false);
1444 
1445 	/* Interrupts should have been enabled entering this function. */
1446 	assert(istate);
1447 
1448 	/*
1449 	 * This needs to be done with interrupts disabled, otherwise stackshot
1450 	 * could mark the thread blocked just after this function exits and a
1451 	 * thread marked as AST blocked would go into exclaves.
1452 	 */
1453 
1454 	while ((os_atomic_load(&current_thread()->th_exclaves_inspection_state,
1455 	    relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1456 		/* Enable interrupts */
1457 		(void) ml_set_interrupts_enabled_with_debug(true, false);
1458 
1459 		/* Wait until the thread is collected on exclaves side */
1460 		exclaves_inspection_check_ast();
1461 
1462 		/* Disable interrupts and preemption before next AST check */
1463 		ml_set_interrupts_enabled_with_debug(false, false);
1464 	}
1465 	/* Interrupts are disabled and exclaves_stackshot_ast is clean */
1466 
1467 	disable_preemption_without_measurements();
1468 
1469 	/*
1470 	 * Don't enter with a stale clock (unless updating the clock or
1471 	 * panicking).
1472 	 */
1473 	if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1474 	    request->tag != XrtHosted_Request_WatchdogPanic &&
1475 	    exclaves_clocks_need_update()) {
1476 		enable_preemption();
1477 		(void) ml_set_interrupts_enabled_with_debug(istate, false);
1478 		return KERN_POLICY_LIMIT;
1479 	}
1480 
1481 	XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1482 	assert3p(request_buf, !=, NULL);
1483 
1484 	request_trace_start(request);
1485 
1486 	exclaves_callbacks->v1.Request.encode(request_buf, request);
1487 	exclaves_scheduler_debug_save_buffer(request_buf);
1488 
1489 	kr = exclaves_enter();
1490 
1491 	/* The response may have come back on a different core. */
1492 	XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1493 	assert3p(response_buf, !=, NULL);
1494 
1495 	exclaves_scheduler_debug_save_buffer(response_buf);
1496 	exclaves_callbacks->v1.Response.decode(response_buf, response);
1497 
1498 	request_trace_end(request);
1499 
1500 	enable_preemption();
1501 	(void) ml_set_interrupts_enabled_with_debug(istate, false);
1502 
1503 	exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1504 
1505 	if (kr == KERN_ABORTED) {
1506 		/* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1507 		 * another core has paniced in exclaves and is on the way to call xnu
1508 		 * panic() via SPTM, so wait here for that to happen. */
1509 		exclaves_wait_for_panic();
1510 	}
1511 
1512 	return kr;
1513 }
1514 
1515 OS_NORETURN OS_NOINLINE
1516 static void
exclaves_wait_for_panic(void)1517 exclaves_wait_for_panic(void)
1518 {
1519 	assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1520 	    NSEC_PER_SEC);
1521 	wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1522 	panic("Unexpected wait for panic result: %d", wr);
1523 }
1524 
1525 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1526 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1527     const XrtHosted_Yield_t *yield)
1528 {
1529 	Exclaves_L4_Word_t responding_scid = yield->thread;
1530 	Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1531 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1532 
1533 	exclaves_debug_printf(show_progress,
1534 	    "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1535 	    early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1536 	/* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1537 	 * th_exclaves_scheduling_context_id so we know to resume it later
1538 	 * 2. translate yield_to to thread_switch()-style handoff.
1539 	 */
1540 	if (!early) {
1541 		assert3u(responding_scid, ==, scid);
1542 		assert3u(yield->threadHostId, ==, ctid);
1543 	}
1544 
1545 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1546 	    MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1547 
1548 	return KERN_SUCCESS;
1549 }
1550 
1551 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1552 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1553     const XrtHosted_Spawned_t *spawned)
1554 {
1555 	Exclaves_L4_Word_t responding_scid = spawned->thread;
1556 	thread_t thread = current_thread();
1557 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1558 
1559 	/*
1560 	 * There are only a few places an exclaves thread is expected to be
1561 	 * spawned. Any other cases are considered errors.
1562 	 */
1563 	if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1564 		exclaves_debug_printf(show_errors,
1565 		    "exclaves: Scheduler: Unexpected thread spawn: "
1566 		    "scid 0x%lx spawned scid 0x%llx\n",
1567 		    responding_scid, spawned->spawned);
1568 		return KERN_FAILURE;
1569 	}
1570 
1571 	exclaves_debug_printf(show_progress,
1572 	    "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1573 	    responding_scid, (unsigned long)spawned->spawned);
1574 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1575 	    MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1576 
1577 	assert3u(responding_scid, ==, scid);
1578 	assert3u(spawned->threadHostId, ==, ctid);
1579 
1580 	return KERN_SUCCESS;
1581 }
1582 
1583 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1584 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1585 {
1586 	Exclaves_L4_Word_t responding_scid = terminated->thread;
1587 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1588 
1589 	exclaves_debug_printf(show_errors,
1590 	    "exclaves: Scheduler: Unexpected thread terminate: "
1591 	    "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1592 	    terminated->terminated);
1593 	assert3u(terminated->threadHostId, ==, ctid);
1594 
1595 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1596 	    MACH_EXCLAVES_SCHEDULER_TERMINATED),
1597 	    terminated->terminated);
1598 
1599 	return KERN_TERMINATED;
1600 }
1601 
1602 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1603 handle_response_wait(const XrtHosted_Wait_t *wait)
1604 {
1605 	Exclaves_L4_Word_t responding_scid = wait->waiter;
1606 	thread_t thread = current_thread();
1607 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1608 
1609 	exclaves_debug_printf(show_progress,
1610 	    "exclaves: Scheduler: Wait: "
1611 	    "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1612 	    "epoch 0x%llx\n", responding_scid, wait->owner,
1613 	    wait->queueId, wait->epoch);
1614 	assert3u(wait->waiterHostId, ==, ctid);
1615 
1616 	/* The exclaves inspection thread should never wait. */
1617 	if ((thread->th_exclaves_state & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1618 		panic("Exclaves inspection thread tried to wait\n");
1619 	}
1620 
1621 	/*
1622 	 * Note, "owner" may not be safe to access directly, for example
1623 	 * the thread may have exited and been freed. esync_wait will
1624 	 * only access it under a lock if the epoch is fresh thus
1625 	 * ensuring safety.
1626 	 */
1627 	const ctid_t owner = (ctid_t)wait->ownerHostId;
1628 	const XrtHosted_Word_t id = wait->queueId;
1629 	const uint64_t epoch = wait->epoch;
1630 
1631 	wait_interrupt_t interruptible;
1632 	esync_policy_t policy;
1633 
1634 	switch (wait->interruptible) {
1635 	case XrtHosted_Interruptibility_None:
1636 		interruptible = THREAD_UNINT;
1637 		policy = ESYNC_POLICY_KERNEL;
1638 		break;
1639 
1640 	case XrtHosted_Interruptibility_Voluntary:
1641 		interruptible = THREAD_INTERRUPTIBLE;
1642 		policy = ESYNC_POLICY_KERNEL;
1643 		break;
1644 
1645 	case XrtHosted_Interruptibility_DynamicQueue:
1646 		interruptible = THREAD_INTERRUPTIBLE;
1647 		policy = ESYNC_POLICY_USER;
1648 		break;
1649 
1650 	default:
1651 		panic("Unknown exclaves interruptibility: %llu",
1652 		    wait->interruptible);
1653 	}
1654 
1655 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1656 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1657 	    wait->interruptible);
1658 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1659 	    exclaves_get_queue_counter(id), owner, policy, interruptible);
1660 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1661 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1662 
1663 	switch (wr) {
1664 	case THREAD_INTERRUPTED:
1665 		return KERN_ABORTED;
1666 
1667 	case THREAD_NOT_WAITING:
1668 	case THREAD_AWAKENED:
1669 		return KERN_SUCCESS;
1670 
1671 	default:
1672 		panic("Unexpected wait result from esync_wait: %d", wr);
1673 	}
1674 }
1675 
1676 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1677 handle_response_wake(const XrtHosted_Wake_t *wake)
1678 {
1679 	Exclaves_L4_Word_t responding_scid = wake->waker;
1680 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1681 
1682 	exclaves_debug_printf(show_progress,
1683 	    "exclaves: Scheduler: Wake: "
1684 	    "scid 0x%lx wake of queue id 0x%llx, "
1685 	    "epoch 0x%llx, all 0x%llx\n", responding_scid,
1686 	    wake->queueId, wake->epoch, wake->all);
1687 	assert3u(wake->wakerHostId, ==, ctid);
1688 
1689 	const XrtHosted_Word_t id = wake->queueId;
1690 	const uint64_t epoch = wake->epoch;
1691 	const esync_wake_mode_t mode = wake->all != 0 ?
1692 	    ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1693 
1694 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1695 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1696 
1697 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1698 	    exclaves_get_queue_counter(id), mode, 0);
1699 
1700 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1701 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1702 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1703 
1704 	return KERN_SUCCESS;
1705 }
1706 
1707 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1708 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1709 {
1710 	Exclaves_L4_Word_t responding_scid = wake->waker;
1711 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1712 
1713 	exclaves_debug_printf(show_progress,
1714 	    "exclaves: Scheduler: WakeWithOwner: "
1715 	    "scid 0x%lx wake of queue id 0x%llx, "
1716 	    "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1717 	    wake->queueId, wake->epoch,
1718 	    wake->owner);
1719 
1720 	assert3u(wake->wakerHostId, ==, ctid);
1721 
1722 	const ctid_t owner = (ctid_t)wake->ownerHostId;
1723 	const XrtHosted_Word_t id = wake->queueId;
1724 	const uint64_t epoch = wake->epoch;
1725 
1726 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1727 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1728 	    ESYNC_WAKE_ONE_WITH_OWNER);
1729 
1730 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1731 	    exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1732 
1733 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1734 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1735 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1736 
1737 	return KERN_SUCCESS;
1738 }
1739 
1740 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1741 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1742 {
1743 	Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1744 	__assert_only thread_t thread = current_thread();
1745 
1746 	exclaves_debug_printf(show_progress,
1747 	    "exclaves: Scheduler: PanicWait: "
1748 	    "Panic thread SCID %lx\n",
1749 	    panic_thread_scid);
1750 
1751 	assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1752 
1753 	exclaves_panic_thread_wait();
1754 
1755 	/* NOT REACHABLE */
1756 	return KERN_SUCCESS;
1757 }
1758 
1759 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1760 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1761 {
1762 	Exclaves_L4_Word_t responding_scid = suspended->suspended;
1763 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1764 
1765 	exclaves_debug_printf(show_progress,
1766 	    "exclaves: Scheduler: Suspended: "
1767 	    "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1768 	assert3u(suspended->suspendedHostId, ==, ctid);
1769 
1770 	const uint64_t id = suspended->suspended;
1771 	const uint64_t epoch = suspended->epoch;
1772 
1773 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1774 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1775 
1776 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1777 	    exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1778 
1779 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1780 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1781 
1782 	switch (wr) {
1783 	case THREAD_INTERRUPTED:
1784 		return KERN_ABORTED;
1785 
1786 	case THREAD_NOT_WAITING:
1787 	case THREAD_AWAKENED:
1788 		return KERN_SUCCESS;
1789 
1790 	default:
1791 		panic("Unexpected wait result from esync_wait: %d", wr);
1792 	}
1793 }
1794 
1795 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1796 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1797 {
1798 	Exclaves_L4_Word_t responding_scid = resumed->thread;
1799 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1800 
1801 	exclaves_debug_printf(show_progress,
1802 	    "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1803 	    "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1804 	    resumed->resumedHostId, resumed->epoch);
1805 	assert3u(resumed->threadHostId, ==, ctid);
1806 
1807 	const ctid_t target = (ctid_t)resumed->resumedHostId;
1808 	const XrtHosted_Word_t id = resumed->resumed;
1809 	const uint64_t epoch = resumed->epoch;
1810 
1811 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1812 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1813 	    target);
1814 
1815 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1816 	    exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1817 
1818 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1819 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1820 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1821 
1822 	return KERN_SUCCESS;
1823 }
1824 
1825 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1826 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1827 {
1828 	Exclaves_L4_Word_t responding_scid = interrupted->thread;
1829 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1830 
1831 	exclaves_debug_printf(show_progress,
1832 	    "exclaves: Scheduler: Interrupted: "
1833 	    "scid 0x%lx interrupt on queue id 0x%llx, "
1834 	    "epoch 0x%llx, target 0x%llx\n", responding_scid,
1835 	    interrupted->queueId, interrupted->epoch,
1836 	    interrupted->interruptedHostId);
1837 	assert3u(interrupted->threadHostId, ==, ctid);
1838 
1839 	const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1840 	const XrtHosted_Word_t id = interrupted->queueId;
1841 	const uint64_t epoch = interrupted->epoch;
1842 
1843 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1844 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1845 	    target);
1846 
1847 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1848 	    exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1849 
1850 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1851 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1852 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1853 
1854 	return KERN_SUCCESS;
1855 }
1856 
1857 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1858 handle_response_nothing_scheduled(
1859 	__unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1860 {
1861 	exclaves_debug_printf(show_progress,
1862 	    "exclaves: Scheduler: nothing scheduled\n");
1863 
1864 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1865 	    MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1866 
1867 	return KERN_SUCCESS;
1868 }
1869 
1870 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)1871 handle_response_all_exclaves_booted(
1872 	__unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
1873 {
1874 	exclaves_debug_printf(show_progress,
1875 	    "exclaves: scheduler: all exclaves booted\n");
1876 
1877 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1878 	    MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
1879 
1880 	return KERN_SUCCESS;
1881 }
1882 
1883 /*
1884  * The Early Alloc response asks for npages to be allocated. The list of
1885  * allocated pages is written into the first allocated page in the form of 32bit
1886  * page numbers. The physical address of the first page is passed back to the
1887  * exclaves scheduler as part of the next request.
1888  */
1889 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)1890 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
1891     uint64_t *pagelist_pa)
1892 {
1893 	const uint32_t npages = (uint32_t)pmm_early_alloc->a;
1894 	const uint64_t flags = pmm_early_alloc->b;
1895 
1896 	exclaves_debug_printf(show_progress,
1897 	    "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
1898 	    npages, flags);
1899 
1900 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1901 	    MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
1902 
1903 	if (npages == 0) {
1904 		return KERN_SUCCESS;
1905 	}
1906 
1907 	if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
1908 		exclaves_debug_printf(show_errors,
1909 		    "exclaves: request to allocate too many pages: %u\n",
1910 		    npages);
1911 		return KERN_NO_SPACE;
1912 	}
1913 
1914 	/*
1915 	 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
1916 	 * stack allocation is sufficient and fast. If
1917 	 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
1918 	 * to the heap.
1919 	 */
1920 	uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
1921 	exclaves_memory_alloc(npages, page, EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN);
1922 
1923 	/* Now copy the list of pages into the first page. */
1924 	uint64_t first_page_pa = ptoa(page[0]);
1925 #if 0
1926 	// move to before sptm retype
1927 	uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
1928 	for (int i = 0; i < npages; i++) {
1929 		first_page[i] = page[i];
1930 	}
1931 #endif
1932 
1933 	*pagelist_pa = first_page_pa;
1934 	return KERN_SUCCESS;
1935 }
1936 
1937 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)1938 handle_response_watchdog_panic_complete(
1939 	__unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
1940 {
1941 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1942 	    MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
1943 }
1944 
1945 OS_NORETURN
1946 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)1947 handle_response_panicking(
1948 	__unused const XrtHosted_Panicking_t *panicking)
1949 {
1950 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1951 	    MACH_EXCLAVES_SCHEDULER_PANICKING));
1952 
1953 	exclaves_wait_for_panic();
1954 
1955 	/* Not reached. */
1956 }
1957 
1958 static inline bool
exclaves_clocks_need_update(void)1959 exclaves_clocks_need_update(void)
1960 {
1961 	const exclaves_clock_type_t clocks[] = {
1962 		EXCLAVES_CLOCK_ABSOLUTE,
1963 		EXCLAVES_CLOCK_CONTINUOUS
1964 	};
1965 
1966 	for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
1967 		const exclaves_clock_t *clock = &exclaves_clock[i];
1968 		exclaves_clock_t local = {
1969 			.u128 = os_atomic_load(&clock->a_u128, relaxed),
1970 		};
1971 
1972 		if (local.u64.sent_offset != local.u64.latest_offset) {
1973 			return true;
1974 		}
1975 	}
1976 
1977 	return false;
1978 }
1979 
1980 OS_NOINLINE
1981 static kern_return_t
exclaves_clocks_update(void)1982 exclaves_clocks_update(void)
1983 {
1984 	const exclaves_clock_type_t clocks[] = {
1985 		EXCLAVES_CLOCK_ABSOLUTE,
1986 		EXCLAVES_CLOCK_CONTINUOUS
1987 	};
1988 
1989 	for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
1990 		exclaves_clock_t local;
1991 		exclaves_clock_t *clock = &exclaves_clock[i];
1992 
1993 		local.u128 = os_atomic_load(&clock->a_u128, relaxed);
1994 		while (local.u64.sent_offset != local.u64.latest_offset) {
1995 			XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
1996 			    XrtHosted_Timer_Absolute :
1997 			    XrtHosted_Timer_Continuous;
1998 
1999 			kern_return_t kr =
2000 			    exclaves_scheduler_request_update_timer(timer,
2001 			    local.u64.latest_offset);
2002 			if (kr != KERN_SUCCESS) {
2003 				return kr;
2004 			}
2005 
2006 			/*
2007 			 * Swap the sent offset with the local latest offset. If
2008 			 * it fails, the sent offset will be reloaded.
2009 			 */
2010 			os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2011 			    local.u64.sent_offset, local.u64.latest_offset,
2012 			    &local.u64.sent_offset, relaxed);
2013 
2014 			/*
2015 			 * Fetch the latest offset again, in case we are stale.
2016 			 */
2017 			local.u64.latest_offset = os_atomic_load(
2018 				&clock->a_u64.latest_offset, relaxed);
2019 		}
2020 	}
2021 
2022 	return KERN_SUCCESS;
2023 }
2024 
2025 static kern_return_t
exclaves_scheduler_boot(void)2026 exclaves_scheduler_boot(void)
2027 {
2028 	/* This must happen on the boot CPU - bind the thread. */
2029 	bind_to_boot_core();
2030 
2031 	/*
2032 	 * Set the request/response buffers. These may be overriden later when
2033 	 * doing multicore setup.
2034 	 */
2035 	*PERCPU_GET(exclaves_request) =
2036 	    exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2037 	*PERCPU_GET(exclaves_response) =
2038 	    exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2039 
2040 	kern_return_t kr = exclaves_scheduler_request_boot();
2041 
2042 	unbind_from_boot_core();
2043 
2044 	return kr;
2045 }
2046 
2047 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2048 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2049     uint64_t offset)
2050 {
2051 	thread_t thread = current_thread();
2052 
2053 	exclaves_debug_printf(show_progress,
2054 	    "exclaves: Scheduler: Request to update timer\n");
2055 
2056 	XrtHosted_Response_t response = {
2057 		.tag = XrtHosted_Response_NothingScheduled,
2058 	};
2059 
2060 	const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2061 		.timer = timer,
2062 		.offset = offset,
2063 		);
2064 
2065 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2066 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2067 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2068 
2069 	switch (kr) {
2070 	case KERN_SUCCESS:
2071 		break;
2072 
2073 	case KERN_POLICY_LIMIT:
2074 		/*
2075 		 * POLICY_LIMIT should only happen if a timer update was pending
2076 		 * (and thus should never happen when trying to update a timer.
2077 		 */
2078 		panic("exclaves: timer update requested when updating timer");
2079 
2080 	default:
2081 		exclaves_debug_printf(show_errors,
2082 		    "exclaves: scheduler request failed\n");
2083 		return kr;
2084 	}
2085 
2086 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2087 
2088 	switch (response.tag) {
2089 	case XrtHosted_Response_NothingScheduled:
2090 		kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2091 		break;
2092 
2093 	default:
2094 		exclaves_debug_printf(show_errors, "exclaves: "
2095 		    "unexpected scheduler response when updating timer\n");
2096 		kr = KERN_FAILURE;
2097 		break;
2098 	}
2099 
2100 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2101 
2102 	return kr;
2103 }
2104 
2105 static kern_return_t
exclaves_scheduler_request_boot(void)2106 exclaves_scheduler_request_boot(void)
2107 {
2108 	kern_return_t kr = KERN_FAILURE;
2109 	thread_t thread = current_thread();
2110 
2111 	assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2112 
2113 	exclaves_debug_printf(show_progress,
2114 	    "exclaves: Scheduler: Request to boot exclave\n");
2115 
2116 	XrtHosted_Response_t response = {
2117 		.tag = XrtHosted_Response_Invalid,
2118 	};
2119 	uint64_t pagelist_pa = 0;
2120 
2121 	while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2122 		const XrtHosted_Request_t request = pagelist_pa != 0 ?
2123 		    XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2124 		    XrtHosted_Request_BootExclavesMsg();
2125 		pagelist_pa = 0;
2126 
2127 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2128 		kr = exclaves_scheduler_request(&request, &response);
2129 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2130 
2131 		switch (kr) {
2132 		case KERN_SUCCESS:
2133 			break;
2134 
2135 		case KERN_POLICY_LIMIT:
2136 			kr = exclaves_clocks_update();
2137 			if (kr != KERN_SUCCESS) {
2138 				return kr;
2139 			}
2140 			/*
2141 			 * Don't try to process the response - we just updated
2142 			 * the clock so continue with the boot request.
2143 			 */
2144 			continue;
2145 
2146 		default:
2147 			exclaves_debug_printf(show_errors,
2148 			    "exclaves: scheduler request failed\n");
2149 			return KERN_FAILURE;
2150 		}
2151 
2152 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2153 
2154 		switch (response.tag) {
2155 		case XrtHosted_Response_Yield:
2156 			kr = handle_response_yield(true, 0, &response.Yield);
2157 			break;
2158 
2159 		case XrtHosted_Response_NothingScheduled:
2160 			kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2161 			break;
2162 
2163 		case XrtHosted_Response_AllExclavesBooted:
2164 			kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2165 			break;
2166 
2167 		case XrtHosted_Response_PmmEarlyAlloc:
2168 			kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2169 			break;
2170 
2171 		case XrtHosted_Response_PanicBufferAddress:
2172 			handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2173 			break;
2174 
2175 		case XrtHosted_Response_Panicking:
2176 			handle_response_panicking(&response.Panicking);
2177 		/* Not reached. */
2178 
2179 		default:
2180 			exclaves_debug_printf(show_errors,
2181 			    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2182 			    response.tag);
2183 			kr = KERN_FAILURE;
2184 			break;
2185 		}
2186 
2187 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2188 
2189 		if (kr != KERN_SUCCESS) {
2190 			break;
2191 		}
2192 	}
2193 
2194 	return kr;
2195 }
2196 
2197 OS_INLINE
2198 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2199 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2200 {
2201 	thread_t thread = current_thread();
2202 	const ctid_t ctid = thread_get_ctid(thread);
2203 
2204 	assert3u(thread->th_exclaves_state &
2205 	    (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2206 
2207 	exclaves_debug_printf(show_progress,
2208 	    "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2209 
2210 	XrtHosted_Response_t response = {};
2211 	const XrtHosted_Request_t request = interrupted ?
2212 	    XrtHosted_Request_InterruptWithHostIdMsg(
2213 		.thread = ctx->scid,
2214 		.hostId = ctid,
2215 		) :
2216 	    XrtHosted_Request_ResumeWithHostIdMsg(
2217 		.thread = ctx->scid,
2218 		.hostId = ctid,
2219 		);
2220 
2221 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2222 
2223 	switch (kr) {
2224 	case KERN_SUCCESS:
2225 		break;
2226 
2227 	case KERN_POLICY_LIMIT:
2228 		/*
2229 		 * Don't try to handle any response (as there isn't one), just
2230 		 * return to the caller which will check MSG STATUS and re-enter
2231 		 * if neccessary.
2232 		 */
2233 		return exclaves_clocks_update();
2234 
2235 	default:
2236 		exclaves_debug_printf(show_errors,
2237 		    "exclaves: scheduler request failed\n");
2238 		break;
2239 	}
2240 
2241 	if (kr != KERN_SUCCESS) {
2242 		return kr;
2243 	}
2244 
2245 	__asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2246 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2247 
2248 	switch (response.tag) {
2249 	case XrtHosted_Response_Wait:
2250 		kr = handle_response_wait(&response.Wait);
2251 		break;
2252 
2253 	case XrtHosted_Response_Wake:
2254 		kr = handle_response_wake(&response.Wake);
2255 		break;
2256 
2257 	case XrtHosted_Response_Yield:
2258 		kr = handle_response_yield(false, ctx->scid, &response.Yield);
2259 		break;
2260 
2261 	case XrtHosted_Response_Spawned:
2262 		kr = handle_response_spawned(ctx->scid, &response.Spawned);
2263 		break;
2264 
2265 	case XrtHosted_Response_Terminated:
2266 		kr = handle_response_terminated(&response.Terminated);
2267 		break;
2268 
2269 	case XrtHosted_Response_WakeWithOwner:
2270 		kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2271 		break;
2272 
2273 	case XrtHosted_Response_PanicWait:
2274 		kr = handle_response_panic_wait(&response.PanicWait);
2275 		break;
2276 
2277 	case XrtHosted_Response_Suspended:
2278 		kr = handle_response_suspended(&response.Suspended);
2279 		break;
2280 
2281 	case XrtHosted_Response_Resumed:
2282 		kr = handle_response_resumed(&response.Resumed);
2283 		break;
2284 
2285 	case XrtHosted_Response_Interrupted:
2286 		kr = handle_response_interrupted(&response.Interrupted);
2287 		break;
2288 
2289 	case XrtHosted_Response_Panicking:
2290 		handle_response_panicking(&response.Panicking);
2291 	/* Not reached. */
2292 
2293 	case XrtHosted_Response_Invalid:
2294 	case XrtHosted_Response_Failure:
2295 	case XrtHosted_Response_Pong:
2296 	case XrtHosted_Response_SleepUntil:
2297 	case XrtHosted_Response_Awaken:
2298 	default:
2299 		exclaves_debug_printf(show_errors,
2300 		    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2301 		    response.tag);
2302 		kr = KERN_FAILURE;
2303 		break;
2304 	}
2305 
2306 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2307 	__asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2308 
2309 	return kr;
2310 }
2311 
2312 /* A friendly name to show up in backtraces. */
2313 OS_NOINLINE
2314 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2315 exclaves_run(thread_t thread, bool interrupted)
2316 {
2317 	return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2318 	           interrupted);
2319 }
2320 
2321 /*
2322  * Note: this is called from a thread with RT priority which is on the way to
2323  * panicking and thus doesn't log.
2324  */
2325 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2326 exclaves_scheduler_request_watchdog_panic(void)
2327 {
2328 	thread_t thread = current_thread();
2329 
2330 	XrtHosted_Response_t response = {};
2331 	const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2332 
2333 	/*
2334 	 * Check for consistent exclaves thread state to make sure we don't
2335 	 * accidentally block. This should normally never happen but if it does,
2336 	 * just return and allow the caller to panic without gathering an
2337 	 * exclaves stackshot.
2338 	 */
2339 	if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2340 	    thread->th_exclaves_state != 0) {
2341 		return KERN_FAILURE;
2342 	}
2343 
2344 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2345 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2346 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2347 
2348 	switch (kr) {
2349 	case KERN_SUCCESS:
2350 		break;
2351 
2352 	case KERN_POLICY_LIMIT:
2353 		/*
2354 		 * POLICY_LIMIT should only happen if a timer update was pending
2355 		 * (and thus should never happen when trying to send a watchdog
2356 		 * panic message.
2357 		 */
2358 		panic("exclaves: "
2359 		    "timer update requested when calling watchdog panic");
2360 
2361 	default:
2362 		return kr;
2363 	}
2364 
2365 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2366 
2367 	switch (response.tag) {
2368 	case XrtHosted_Response_WatchdogPanicComplete:
2369 		handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2370 		break;
2371 
2372 	case XrtHosted_Response_Panicking:
2373 		handle_response_panicking(&response.Panicking);
2374 	/* Not Reached. */
2375 
2376 	default:
2377 		panic("exclaves: unexpected scheduler response "
2378 		    "when sending watchdog panic request: %d", response.tag);
2379 	}
2380 
2381 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2382 
2383 	return kr;
2384 }
2385 
2386 /* -------------------------------------------------------------------------- */
2387 
2388 #pragma mark exclaves xnu proxy communication
2389 
2390 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2391 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2392 {
2393 	if (success) {
2394 		return KERN_SUCCESS;
2395 	} else {
2396 		exclaves_debug_printf(show_errors,
2397 		    "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2398 		    error->file,
2399 		    error->line,
2400 		    error->function,
2401 		    error->expression
2402 		    );
2403 		return KERN_FAILURE;
2404 	}
2405 }
2406 
2407 
2408 #pragma mark exclaves privilege management
2409 
2410 /*
2411  * All entitlement checking enabled by default.
2412  */
2413 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2414 
2415 /*
2416  * boot-arg to control the use of entitlements.
2417  * Eventually this should be removed and entitlement checking should be gated on
2418  * the EXCLAVES_R_ENTITLEMENTS requirement.
2419  * This will be addressed with rdar://125153460.
2420  */
2421 TUNABLE(unsigned int, exclaves_entitlement_flags,
2422     "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2423 
2424 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2425 has_entitlement(task_t task, const exclaves_priv_t priv,
2426     const char *entitlement)
2427 {
2428 	/* Skip the entitlement if not enabled. */
2429 	if ((exclaves_entitlement_flags & priv) == 0) {
2430 		return true;
2431 	}
2432 
2433 	return IOTaskHasEntitlement(task, entitlement);
2434 }
2435 
2436 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2437 has_entitlement_vnode(void *vnode, const int64_t off,
2438     const exclaves_priv_t priv, const char *entitlement)
2439 {
2440 	/* Skip the entitlement if not enabled. */
2441 	if ((exclaves_entitlement_flags & priv) == 0) {
2442 		return true;
2443 	}
2444 
2445 	return IOVnodeHasEntitlement(vnode, off, entitlement);
2446 }
2447 
2448 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2449 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2450 {
2451 	const bool is_kernel = task == kernel_task;
2452 	const bool is_launchd = task_pid(task) == 1;
2453 
2454 	switch (priv) {
2455 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2456 		/* Both launchd and entitled tasks can spawn new conclaves. */
2457 		if (is_launchd) {
2458 			return true;
2459 		}
2460 		return has_entitlement(task, priv,
2461 		           "com.apple.private.exclaves.conclave-spawn");
2462 
2463 	case EXCLAVES_PRIV_KERNEL_DOMAIN:
2464 		/*
2465 		 * Both the kernel itself and user tasks with the right
2466 		 * privilege can access exclaves resources in the kernel domain.
2467 		 */
2468 		if (is_kernel) {
2469 			return true;
2470 		}
2471 
2472 		/*
2473 		 * If the task was entitled and has been through this path
2474 		 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2475 		 */
2476 		if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2477 			return true;
2478 		}
2479 
2480 		if (has_entitlement(task, priv,
2481 		    "com.apple.private.exclaves.kernel-domain")) {
2482 			task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2483 			return true;
2484 		}
2485 
2486 		return false;
2487 
2488 	case EXCLAVES_PRIV_BOOT:
2489 		/* Both launchd and entitled tasks can boot exclaves. */
2490 		if (is_launchd) {
2491 			return true;
2492 		}
2493 		/* BEGIN IGNORE CODESTYLE */
2494 		return has_entitlement(task, priv,
2495 		    "com.apple.private.exclaves.boot");
2496 		/* END IGNORE CODESTYLE */
2497 
2498 	/* The CONCLAVE HOST priv is always checked by vnode. */
2499 	case EXCLAVES_PRIV_CONCLAVE_HOST:
2500 	default:
2501 		panic("bad exclaves privilege (%u)", priv);
2502 	}
2503 }
2504 
2505 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2506 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2507 {
2508 	switch (priv) {
2509 	case EXCLAVES_PRIV_CONCLAVE_HOST: {
2510 		const bool has_conclave_host = has_entitlement_vnode(vnode,
2511 		    off, priv, "com.apple.private.exclaves.conclave-host");
2512 
2513 		/*
2514 		 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2515 		 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2516 		 */
2517 
2518 		/* Don't check if neither entitlemenent is being enforced.*/
2519 		if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2520 		    (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2521 			return has_conclave_host;
2522 		}
2523 
2524 		const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2525 		    EXCLAVES_PRIV_KERNEL_DOMAIN,
2526 		    "com.apple.private.exclaves.kernel-domain");
2527 
2528 		/* See if it has both. */
2529 		if (has_conclave_host && has_domain_kernel) {
2530 			exclaves_debug_printf(show_errors,
2531 			    "exclaves: task has both conclave-host and "
2532 			    "kernel-domain entitlements which is forbidden\n");
2533 			return false;
2534 		}
2535 
2536 		return has_conclave_host;
2537 	}
2538 
2539 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2540 		return has_entitlement_vnode(vnode, off, priv,
2541 		           "com.apple.private.exclaves.conclave-spawn");
2542 
2543 	default:
2544 		panic("bad exclaves privilege (%u)", priv);
2545 	}
2546 }
2547 
2548 
2549 #pragma mark exclaves stackshot range
2550 
2551 /* Unslid pointers defining the range of code which switches threads into
2552  * secure world */
2553 uintptr_t exclaves_enter_range_start;
2554 uintptr_t exclaves_enter_range_end;
2555 
2556 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2557 uintptr_t exclaves_scheduler_request_range_start;
2558 uintptr_t exclaves_scheduler_request_range_end;
2559 
2560 
2561 __startup_func
2562 static void
initialize_exclaves_ranges(void)2563 initialize_exclaves_ranges(void)
2564 {
2565 	exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2566 	assert3u(exclaves_enter_range_start, !=, 0);
2567 	exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2568 	assert3u(exclaves_enter_range_end, !=, 0);
2569 
2570 	exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2571 	assert3u(exclaves_scheduler_request_range_start, !=, 0);
2572 	exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2573 	assert3u(exclaves_scheduler_request_range_end, !=, 0);
2574 }
2575 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2576 
2577 /*
2578  * Return true if the specified address is in exclaves_enter.
2579  */
2580 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2581 exclaves_enter_in_range(uintptr_t addr, bool slid)
2582 {
2583 	return slid ?
2584 	       exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2585 	       exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2586 }
2587 
2588 /*
2589  * Return true if the specified address is in scheduler request handlers.
2590  */
2591 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2592 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2593 {
2594 	return slid ?
2595 	       exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2596 	       exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2597 }
2598 
2599 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2600 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2601 {
2602 	size_t i = 0;
2603 
2604 	// Check for a frame matching scheduler request range
2605 	for (i = 0; i < nframes; i++) {
2606 		if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2607 			break;
2608 		}
2609 	}
2610 
2611 	// Insert exclaves stacks before the scheduler request frame
2612 	if (i < nframes) {
2613 		return (uint32_t)(i + 1);
2614 	}
2615 
2616 	// Check for a frame matching upcall code range
2617 	for (i = 0; i < nframes; i++) {
2618 		if (exclaves_upcall_in_range(addr[i], slid)) {
2619 			break;
2620 		}
2621 	}
2622 
2623 	// Insert exclaves stacks before the upcall frame when found
2624 	if (i < nframes) {
2625 		return (uint32_t)(i + 1);
2626 	}
2627 
2628 	// Check for a frame matching exclaves enter range
2629 	for (i = 0; i < nframes; i++) {
2630 		if (exclaves_enter_in_range(addr[i], slid)) {
2631 			break;
2632 		}
2633 	}
2634 
2635 	// Put exclaves stacks on top of kernel stacks by default
2636 	if (i == nframes) {
2637 		i = 0;
2638 	}
2639 	return (uint32_t)i;
2640 }
2641 
2642 #if DEVELOPMENT || DEBUG
2643 
2644 /* Tweak the set of relaxed requirements on startup. */
2645 __startup_func
2646 static void
exclaves_requirement_startup(void)2647 exclaves_requirement_startup(void)
2648 {
2649 	/*
2650 	 * The medium-term plan is that the boot-arg controlling entitlements
2651 	 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2652 	 * Until that happens, for historical reasons, if the entitlement
2653 	 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2654 	 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2655 	 */
2656 	if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2657 		exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2658 	}
2659 
2660 	exclaves_requirement_relax(EXCLAVES_R_EIC);
2661 }
2662 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2663 
2664 #endif /* DEVELOPMENT || DEBUG */
2665 
2666 #endif /* CONFIG_EXCLAVES */
2667 
2668 
2669 #ifndef CONFIG_EXCLAVES
2670 /* stubs for sensor functions which are not compiled in from exclaves.c when
2671  * CONFIG_EXCLAVE is disabled */
2672 
2673 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2674 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2675     exclaves_sensor_status_t *status)
2676 {
2677 #pragma unused(sensor_type, flags, status)
2678 	return KERN_NOT_SUPPORTED;
2679 }
2680 
2681 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2682 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2683     exclaves_sensor_status_t *status)
2684 {
2685 #pragma unused(sensor_type, flags, status)
2686 	return KERN_NOT_SUPPORTED;
2687 }
2688 
2689 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2690 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2691     exclaves_sensor_status_t *status)
2692 {
2693 #pragma unused(sensor_type, flags, status)
2694 	return KERN_NOT_SUPPORTED;
2695 }
2696 
2697 #endif /* ! CONFIG_EXCLAVES */
2698