xref: /xnu-11417.121.6/osfmk/kern/exclaves.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35 
36 #if CONFIG_EXCLAVES
37 
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #include <arm64/hv/hv_vm.h>
41 #include <arm64/hv/hv_vcpu.h>
42 #else
43 #error Invalid configuration
44 #endif /* CONFIG_SPTM */
45 
46 #include <arm/cpu_data_internal.h>
47 #include <arm/misc_protos.h>
48 #include <kern/epoch_sync.h>
49 #include <kern/ipc_kobject.h>
50 #include <kern/kalloc.h>
51 #include <kern/locks.h>
52 #include <kern/percpu.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
55 #include <kern/zalloc.h>
56 #include <kern/exclaves_stackshot.h>
57 #include <kern/exclaves_test_stackshot.h>
58 #include <vm/pmap.h>
59 #include <pexpert/pexpert.h>
60 
61 #include <mach/exclaves_l4.h>
62 #include <mach/mach_port.h>
63 
64 #include <Exclaves/Exclaves.h>
65 
66 #include <IOKit/IOBSD.h>
67 
68 #include <xnuproxy/messages.h>
69 
70 #include "exclaves_debug.h"
71 #include "exclaves_panic.h"
72 #include "exclaves_xnuproxy.h"
73 
74 /* External & generated headers */
75 #include <xrt_hosted_types/types.h>
76 
77 #if __has_include(<Tightbeam/tightbeam.h>)
78 #include <Tightbeam/tightbeam.h>
79 #include <Tightbeam/tightbeam_private.h>
80 #endif
81 
82 #include "exclaves_resource.h"
83 #include "exclaves_upcalls.h"
84 #include "exclaves_boot.h"
85 #include "exclaves_inspection.h"
86 #include "exclaves_memory.h"
87 #include "exclaves_internal.h"
88 #include "exclaves_sensor.h"
89 
90 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
91 
92 /* Boot lock - only used here for assertions. */
93 extern lck_mtx_t exclaves_boot_lock;
94 
95 /*
96  * Sent/latest offset for updating exclaves clocks
97  */
98 typedef struct {
99 	union {
100 		/* atomic fields are used via atomic primitives */
101 		struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
102 		_Atomic unsigned __int128 a_u128;
103 		/* non-atomic fields are used via local variable. this is needed
104 		 * to avoid undefined behavior with an atomic struct or
105 		 * accessing atomic fields non-atomically */
106 		struct { uint64_t sent_offset, latest_offset; } u64;
107 		unsigned __int128 u128;
108 	};
109 } exclaves_clock_t;
110 
111 
112 /*
113  * Two clocks indexed by their type.
114  * This makes things easy to lookup.
115  */
116 static exclaves_clock_t exclaves_clock[] = {
117 	[EXCLAVES_CLOCK_ABSOLUTE] = {},
118 	[EXCLAVES_CLOCK_CONTINUOUS] = {},
119 };
120 
121 static kern_return_t
122 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
123 
124 static kern_return_t
125 exclaves_enter(void);
126 static kern_return_t
127 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
128 
129 static kern_return_t
130 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
131 OS_NORETURN OS_NOINLINE
132 static void
133 exclaves_wait_for_panic(void);
134 
135 static inline bool
136 exclaves_clocks_need_update(void);
137 
138 static kern_return_t
139 exclaves_scheduler_boot(void);
140 
141 static kern_return_t
142 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
143 
144 static kern_return_t
145 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
146     uint64_t offset);
147 
148 static kern_return_t
149 exclaves_scheduler_request_boot(void);
150 
151 
152 /*
153  * A static set of exclave epoch counters.
154  */
155 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
156 
os_atomic(uint64_t)157 static inline os_atomic(uint64_t) *
158 exclaves_get_queue_counter(const uint64_t id)
159 {
160 	return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
161 }
162 
os_atomic(uint64_t)163 static inline os_atomic(uint64_t) *
164 exclaves_get_thread_counter(const uint64_t id)
165 {
166 	return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
167 }
168 
169 
170 /* -------------------------------------------------------------------------- */
171 #pragma mark exclaves debug configuration
172 
173 #if DEVELOPMENT || DEBUG
174 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
175     exclaves_debug_show_errors);
176 
177 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
178     "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
179     0, TUNABLE_DT_NONE);
180 #else
181 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
182 #endif
183 
184 #endif /* CONFIG_EXCLAVES */
185 
186 /* -------------------------------------------------------------------------- */
187 #pragma mark userspace entry point
188 
189 #if CONFIG_EXCLAVES
190 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)191 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
192 {
193 	if (name != MACH_PORT_NULL) {
194 		/* Only accept MACH_PORT_NULL for now */
195 		return KERN_INVALID_CAPABILITY;
196 	}
197 
198 	/*
199 	 * As the boot operation itself happens outside the context of any
200 	 * conclave, it requires special privilege.
201 	 */
202 	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
203 		return KERN_DENIED;
204 	}
205 
206 	return exclaves_boot(stage);
207 }
208 #endif /* CONFIG_EXCLAVES */
209 
210 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)211 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
212 {
213 #if CONFIG_EXCLAVES
214 	kern_return_t kr = KERN_SUCCESS;
215 	int error = 0;
216 
217 	mach_port_name_t name = uap->name;
218 	exclaves_id_t identifier = uap->identifier;
219 	mach_vm_address_t ubuffer = uap->buffer;
220 	mach_vm_size_t usize = uap->size;
221 	mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
222 	mach_vm_size_t usize2 = uap->size2;
223 	mach_vm_size_t uoffset2 = uap->offset;
224 	mach_vm_address_t ustatus = uap->status;
225 
226 	task_t task = current_task();
227 
228 	/*
229 	 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
230 	 * with xnu. That header is not shared with userspace. Make sure that
231 	 * the retval userspace picks up is the same as the one
232 	 * xnu/ExclavePlatform thinks it is.
233 	 */
234 	assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
235 	    &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
236 
237 	uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
238 	uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
239 	if (flags != 0) {
240 		return KERN_INVALID_ARGUMENT;
241 	}
242 
243 	/*
244 	 * Deal with OP_BOOT up-front as it has slightly different restrictions
245 	 * than the other operations.
246 	 */
247 	if (operation == EXCLAVES_CTL_OP_BOOT) {
248 		return operation_boot(name, (uint32_t)identifier);
249 	}
250 
251 	/*
252 	 * All other operations are restricted to properly entitled tasks which
253 	 * can operate in the kernel domain, or those which have joined
254 	 * conclaves (which has its own entitlement check).
255 	 * If requirements are relaxed during development, tasks with no
256 	 * conclaves are also allowed.
257 	 */
258 	if (operation == EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME) {
259 		if (!exclaves_has_priv(task, EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME)) {
260 			return KERN_DENIED;
261 		}
262 	} else if (task_get_conclave(task) == NULL &&
263 	    !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
264 	    !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
265 		return KERN_DENIED;
266 	}
267 
268 	/*
269 	 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
270 	 * return immediately.
271 	 */
272 	kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
273 	if (kr != KERN_SUCCESS) {
274 		return kr;
275 	}
276 
277 	if (task_get_conclave(task) != NULL) {
278 		/*
279 		 * For calls from tasks that have joined conclaves, now wait until
280 		 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
281 		 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
282 		 * panic). This is a separate call to the one above because we
283 		 * need to distinguish EXCLAVECORE being not supported and
284 		 * still wait for EXCLAVEKIT to boot if it *is* supported.
285 		 */
286 		kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
287 		if (kr != KERN_SUCCESS) {
288 			return kr;
289 		}
290 	}
291 
292 	switch (operation) {
293 	case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
294 		if (name != MACH_PORT_NULL) {
295 			/* Only accept MACH_PORT_NULL for now */
296 			return KERN_INVALID_CAPABILITY;
297 		}
298 		if (ubuffer == USER_ADDR_NULL || usize == 0 ||
299 		    usize != Exclaves_L4_IpcBuffer_Size) {
300 			return KERN_INVALID_ARGUMENT;
301 		}
302 
303 
304 		Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
305 		/* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
306 		if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
307 			return error;
308 		}
309 		assert(ipcb != NULL);
310 		if ((error = copyin(ubuffer, ipcb, usize))) {
311 			return error;
312 		}
313 
314 		if (identifier >= CONCLAVE_SERVICE_MAX) {
315 			return KERN_INVALID_ARGUMENT;
316 		}
317 
318 		/*
319 		 * Verify that the service actually exists in the current
320 		 * domain.
321 		 */
322 		if (!exclaves_conclave_has_service(task_get_conclave(task),
323 		    identifier)) {
324 			return KERN_INVALID_ARGUMENT;
325 		}
326 
327 		kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
328 		error = copyout(ipcb, ubuffer, usize);
329 		/*
330 		 * Endpoint call to conclave may have trigger a stop upcall,
331 		 * check if stop upcall completion handler needs to run.
332 		 */
333 		task_stop_conclave_upcall_complete();
334 		if (error) {
335 			return error;
336 		}
337 		break;
338 	}
339 
340 	case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
341 		if (name != MACH_PORT_NULL) {
342 			/* Only accept MACH_PORT_NULL for now */
343 			return KERN_INVALID_CAPABILITY;
344 		}
345 
346 		size_t len = 0;
347 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
348 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
349 		    &len) != 0 || id_name[0] == '\0') {
350 			return KERN_INVALID_ARGUMENT;
351 		}
352 
353 		exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
354 		const exclaves_buffer_perm_t supported =
355 		    EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
356 		if ((perm & supported) == 0 || (perm & ~supported) != 0) {
357 			return KERN_INVALID_ARGUMENT;
358 		}
359 
360 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
361 		exclaves_resource_t *resource = NULL;
362 		kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
363 		    perm, &resource);
364 		if (kr != KERN_SUCCESS) {
365 			return kr;
366 		}
367 
368 		kr = exclaves_resource_create_port_name(resource,
369 		    current_space(), &name);
370 		if (kr != KERN_SUCCESS) {
371 			return kr;
372 		}
373 
374 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
375 		if (kr != KERN_SUCCESS) {
376 			mach_port_deallocate(current_space(), name);
377 			return kr;
378 		}
379 
380 		break;
381 	}
382 
383 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
384 		exclaves_resource_t *resource = NULL;
385 		kr = exclaves_resource_from_port_name(current_space(), name,
386 		    &resource);
387 		if (kr != KERN_SUCCESS) {
388 			return kr;
389 		}
390 
391 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
392 			exclaves_resource_release(resource);
393 			return KERN_INVALID_CAPABILITY;
394 		}
395 
396 		kr = exclaves_resource_shared_memory_copyin(resource,
397 		    ubuffer, usize, uoffset, usize2, uoffset2);
398 
399 		exclaves_resource_release(resource);
400 
401 		if (kr != KERN_SUCCESS) {
402 			return kr;
403 		}
404 		break;
405 	}
406 
407 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
408 		exclaves_resource_t *resource = NULL;
409 		kr = exclaves_resource_from_port_name(current_space(), name,
410 		    &resource);
411 		if (kr != KERN_SUCCESS) {
412 			return kr;
413 		}
414 
415 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
416 			exclaves_resource_release(resource);
417 			return KERN_INVALID_CAPABILITY;
418 		}
419 
420 		kr = exclaves_resource_shared_memory_copyout(resource,
421 		    ubuffer, usize, uoffset, usize2, uoffset2);
422 
423 		exclaves_resource_release(resource);
424 
425 		if (kr != KERN_SUCCESS) {
426 			return kr;
427 		}
428 		break;
429 	}
430 
431 	case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
432 		if (name != MACH_PORT_NULL) {
433 			/* Only accept MACH_PORT_NULL for now */
434 			return KERN_INVALID_CAPABILITY;
435 		}
436 		kr = task_launch_conclave(name);
437 
438 		/*
439 		 * Conclave launch call to may have trigger a stop upcall,
440 		 * check if stop upcall completion handler needs to run.
441 		 */
442 		task_stop_conclave_upcall_complete();
443 		break;
444 
445 	case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
446 		if (name != MACH_PORT_NULL) {
447 			/* Only accept MACH_PORT_NULL for now */
448 			return KERN_INVALID_CAPABILITY;
449 		}
450 		struct exclaves_resource_user uresource = {};
451 
452 		if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
453 		    (usize % sizeof(struct exclaves_resource_user) != 0)) {
454 			return KERN_INVALID_ARGUMENT;
455 		}
456 
457 		if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
458 		    (usize == 0 && ubuffer != USER_ADDR_NULL)) {
459 			return KERN_INVALID_ARGUMENT;
460 		}
461 
462 		if (ubuffer == USER_ADDR_NULL) {
463 			return KERN_INVALID_ARGUMENT;
464 		}
465 
466 		/* For the moment we only ever have to deal with one request. */
467 		if (usize != sizeof(struct exclaves_resource_user)) {
468 			return KERN_INVALID_ARGUMENT;
469 		}
470 		error = copyin(ubuffer, &uresource, usize);
471 		if (error) {
472 			return KERN_INVALID_ARGUMENT;
473 		}
474 
475 		const size_t name_buf_len = sizeof(uresource.r_name);
476 		if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
477 			return KERN_INVALID_ARGUMENT;
478 		}
479 
480 		/*
481 		 * Do the regular lookup first. If that fails, fallback to the
482 		 * DARWIN domain, finally fallback to the KERNEL domain.
483 		 */
484 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
485 		uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
486 
487 		if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
488 		    exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
489 			if (id == EXCLAVES_INVALID_ID) {
490 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
491 				    uresource.r_name);
492 			}
493 			if (id == EXCLAVES_INVALID_ID) {
494 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
495 				    uresource.r_name);
496 			}
497 		}
498 
499 		if (id == EXCLAVES_INVALID_ID) {
500 			return KERN_NOT_FOUND;
501 		}
502 
503 		uresource.r_id = id;
504 		uresource.r_port = MACH_PORT_NULL;
505 
506 		error = copyout(&uresource, ubuffer, usize);
507 		if (error) {
508 			return KERN_INVALID_ADDRESS;
509 		}
510 
511 		kr = KERN_SUCCESS;
512 		break;
513 	}
514 
515 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
516 		if (identifier == 0) {
517 			return KERN_INVALID_ARGUMENT;
518 		}
519 
520 		/* copy in string name */
521 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
522 		size_t done = 0;
523 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
524 			return KERN_INVALID_ARGUMENT;
525 		}
526 
527 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
528 		exclaves_resource_t *resource = NULL;
529 		kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
530 		    &resource);
531 		if (kr != KERN_SUCCESS) {
532 			return kr;
533 		}
534 
535 		kr = exclaves_resource_create_port_name(resource, current_space(),
536 		    &name);
537 		if (kr != KERN_SUCCESS) {
538 			return kr;
539 		}
540 
541 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
542 		if (kr != KERN_SUCCESS) {
543 			mach_port_deallocate(current_space(), name);
544 			return kr;
545 		}
546 
547 		break;
548 	}
549 
550 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
551 		exclaves_resource_t *resource;
552 
553 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
554 		if (kr != KERN_SUCCESS) {
555 			return kr;
556 		}
557 
558 		if (resource->r_type !=
559 		    XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
560 			exclaves_resource_release(resource);
561 			return KERN_INVALID_CAPABILITY;
562 		}
563 
564 		kr = exclaves_resource_audio_memory_copyout(resource,
565 		    ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
566 
567 		exclaves_resource_release(resource);
568 
569 		if (kr != KERN_SUCCESS) {
570 			return kr;
571 		}
572 
573 		break;
574 	}
575 
576 	case EXCLAVES_CTL_OP_SENSOR_CREATE: {
577 		if (identifier == 0) {
578 			return KERN_INVALID_ARGUMENT;
579 		}
580 
581 		/* copy in string name */
582 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
583 		size_t done = 0;
584 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
585 			return KERN_INVALID_ARGUMENT;
586 		}
587 
588 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
589 		exclaves_resource_t *resource = NULL;
590 		kr = exclaves_resource_sensor_open(domain, id_name, &resource);
591 		if (kr != KERN_SUCCESS) {
592 			return kr;
593 		}
594 
595 		kr = exclaves_resource_create_port_name(resource, current_space(),
596 		    &name);
597 		if (kr != KERN_SUCCESS) {
598 			return kr;
599 		}
600 
601 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
602 		if (kr != KERN_SUCCESS) {
603 			/* No senders drops the reference. */
604 			mach_port_deallocate(current_space(), name);
605 			return kr;
606 		}
607 
608 		break;
609 	}
610 
611 	case EXCLAVES_CTL_OP_SENSOR_START: {
612 		exclaves_resource_t *resource;
613 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
614 		if (kr != KERN_SUCCESS) {
615 			return kr;
616 		}
617 
618 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
619 			exclaves_resource_release(resource);
620 			return KERN_FAILURE;
621 		}
622 
623 		exclaves_sensor_status_t status;
624 		kr = exclaves_resource_sensor_start(resource, identifier, &status);
625 
626 		exclaves_resource_release(resource);
627 
628 		if (kr != KERN_SUCCESS) {
629 			return kr;
630 		}
631 
632 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
633 
634 		break;
635 	}
636 	case EXCLAVES_CTL_OP_SENSOR_STOP: {
637 		exclaves_resource_t *resource;
638 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
639 		if (kr != KERN_SUCCESS) {
640 			return kr;
641 		}
642 
643 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
644 			exclaves_resource_release(resource);
645 			return KERN_FAILURE;
646 		}
647 
648 		exclaves_sensor_status_t status;
649 		kr = exclaves_resource_sensor_stop(resource, identifier, &status);
650 
651 		exclaves_resource_release(resource);
652 
653 		if (kr != KERN_SUCCESS) {
654 			return kr;
655 		}
656 
657 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
658 
659 		break;
660 	}
661 	case EXCLAVES_CTL_OP_SENSOR_STATUS: {
662 		exclaves_resource_t *resource;
663 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
664 		if (kr != KERN_SUCCESS) {
665 			return kr;
666 		}
667 
668 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
669 			exclaves_resource_release(resource);
670 			return KERN_FAILURE;
671 		}
672 
673 
674 		exclaves_sensor_status_t status;
675 		kr = exclaves_resource_sensor_status(resource, identifier, &status);
676 
677 		exclaves_resource_release(resource);
678 
679 		if (kr != KERN_SUCCESS) {
680 			return kr;
681 		}
682 
683 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
684 		break;
685 	}
686 	case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
687 		exclaves_resource_t *notification_resource = NULL;
688 		mach_port_name_t port_name = MACH_PORT_NULL;
689 
690 		struct exclaves_resource_user *notification_resource_user = NULL;
691 		if (usize != sizeof(struct exclaves_resource_user)) {
692 			return KERN_INVALID_ARGUMENT;
693 		}
694 
695 		if (ubuffer == USER_ADDR_NULL) {
696 			return KERN_INVALID_ARGUMENT;
697 		}
698 
699 		notification_resource_user = (struct exclaves_resource_user *)
700 		    kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
701 
702 		error = copyin(ubuffer, notification_resource_user, usize);
703 		if (error) {
704 			kr = KERN_INVALID_ARGUMENT;
705 			goto notification_resource_lookup_out;
706 		}
707 
708 		const size_t name_buf_len = sizeof(notification_resource_user->r_name);
709 		if (strnlen(notification_resource_user->r_name, name_buf_len)
710 		    == name_buf_len) {
711 			kr = KERN_INVALID_ARGUMENT;
712 			goto notification_resource_lookup_out;
713 		}
714 
715 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
716 		kr = exclaves_notification_create(domain,
717 		    notification_resource_user->r_name, &notification_resource);
718 		if (kr != KERN_SUCCESS) {
719 			goto notification_resource_lookup_out;
720 		}
721 
722 		kr = exclaves_resource_create_port_name(notification_resource,
723 		    current_space(), &port_name);
724 		if (kr != KERN_SUCCESS) {
725 			goto notification_resource_lookup_out;
726 		}
727 		notification_resource_user->r_type = notification_resource->r_type;
728 		notification_resource_user->r_id = notification_resource->r_id;
729 		notification_resource_user->r_port = port_name;
730 		error = copyout(notification_resource_user, ubuffer, usize);
731 		if (error) {
732 			kr = KERN_INVALID_ADDRESS;
733 			goto notification_resource_lookup_out;
734 		}
735 
736 notification_resource_lookup_out:
737 		if (notification_resource_user != NULL) {
738 			kfree_data(notification_resource_user, usize);
739 		}
740 		if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
741 			mach_port_deallocate(current_space(), port_name);
742 		}
743 		break;
744 	}
745 
746 
747 	case EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME: {
748 		if (name != MACH_PORT_NULL) {
749 			/* Only accept MACH_PORT_NULL for now */
750 			return KERN_INVALID_CAPABILITY;
751 		}
752 
753 		if (ubuffer == USER_ADDR_NULL || usize == 0 ||
754 		    usize != sizeof(struct exclaves_indicator_deadlines)) {
755 			return KERN_INVALID_ARGUMENT;
756 		}
757 
758 		struct exclaves_indicator_deadlines udurations;
759 		error = copyin(ubuffer, &udurations, usize);
760 		if (error) {
761 			return KERN_INVALID_ARGUMENT;
762 		}
763 
764 		kr = exclaves_indicator_min_on_time_deadlines(&udurations);
765 		if (kr != KERN_SUCCESS) {
766 			return kr;
767 		}
768 
769 		error = copyout(&udurations, ubuffer, usize);
770 		if (error) {
771 			return KERN_INVALID_ADDRESS;
772 		}
773 
774 		break;
775 	}
776 
777 	default:
778 		kr = KERN_INVALID_ARGUMENT;
779 		break;
780 	}
781 
782 	return kr;
783 #else /* CONFIG_EXCLAVES */
784 #pragma unused(uap)
785 	return KERN_NOT_SUPPORTED;
786 #endif /* CONFIG_EXCLAVES */
787 }
788 
789 /* -------------------------------------------------------------------------- */
790 #pragma mark kernel entry points
791 
792 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)793 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
794     exclaves_tag_t *tag, exclaves_error_t *error)
795 {
796 #if CONFIG_EXCLAVES
797 	kern_return_t kr = KERN_SUCCESS;
798 	assert(port == IPC_PORT_NULL);
799 
800 	Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
801 	assert(ipcb != NULL);
802 
803 	exclaves_debug_printf(show_progress,
804 	    "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
805 	    endpoint_id, *tag);
806 
807 	ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
808 	kr = exclaves_endpoint_call_internal(port, endpoint_id);
809 	*tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
810 	*error = XNUPROXY_CR_RETVAL(ipcb);
811 
812 	exclaves_debug_printf(show_progress,
813 	    "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
814 	    "error 0x%llx\n", endpoint_id, *tag, *error);
815 
816 	return kr;
817 #else /* CONFIG_EXCLAVES */
818 #pragma unused(port, endpoint_id, tag, error)
819 	return KERN_NOT_SUPPORTED;
820 #endif /* CONFIG_EXCLAVES */
821 }
822 
823 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)824 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
825 {
826 #if CONFIG_EXCLAVES
827 	kern_return_t kr = KERN_SUCCESS;
828 	thread_t thread = current_thread();
829 
830 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
831 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
832 		kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
833 		if (kr != KERN_SUCCESS) {
834 			return kr;
835 		}
836 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
837 	}
838 	thread->th_exclaves_ipc_ctx.usecnt++;
839 
840 	if (out_ipc_buffer != NULL) {
841 		*out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
842 	}
843 	return KERN_SUCCESS;
844 #else /* CONFIG_EXCLAVES */
845 #pragma unused(out_ipc_buffer)
846 	return KERN_NOT_SUPPORTED;
847 #endif /* CONFIG_EXCLAVES */
848 }
849 
850 kern_return_t
exclaves_free_ipc_buffer(void)851 exclaves_free_ipc_buffer(void)
852 {
853 #if CONFIG_EXCLAVES
854 
855 	/* The inspection thread's cached buffer should never be freed */
856 	thread_t thread = current_thread();
857 
858 	/* Don't try to free unallocated contexts. */
859 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
860 		return KERN_SUCCESS;
861 	}
862 
863 	const thread_exclaves_inspection_flags_t iflags =
864 	    os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
865 	if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
866 		return KERN_SUCCESS;
867 	}
868 
869 	assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
870 	if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
871 		return KERN_SUCCESS;
872 	}
873 
874 	return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
875 #else /* CONFIG_EXCLAVES */
876 	return KERN_NOT_SUPPORTED;
877 #endif /* CONFIG_EXCLAVES */
878 }
879 
880 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)881 exclaves_thread_terminate(__unused thread_t thread)
882 {
883 	kern_return_t kr = KERN_SUCCESS;
884 
885 #if CONFIG_EXCLAVES
886 	assert(thread == current_thread());
887 	assert(thread->th_exclaves_intstate == 0);
888 	assert(thread->th_exclaves_state == 0);
889 	if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
890 		exclaves_debug_printf(show_progress,
891 		    "exclaves: thread_terminate freeing abandoned exclaves "
892 		    "ipc buffer\n");
893 		/* Unconditionally free context irrespective of usecount */
894 		thread->th_exclaves_ipc_ctx.usecnt = 0;
895 		kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
896 		assert(kr == KERN_SUCCESS);
897 	}
898 #else
899 #pragma unused(thread)
900 #endif /* CONFIG_EXCLAVES */
901 
902 	return kr;
903 }
904 
905 OS_CONST
906 void*
exclaves_get_ipc_buffer(void)907 exclaves_get_ipc_buffer(void)
908 {
909 #if CONFIG_EXCLAVES
910 	thread_t thread = current_thread();
911 	Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
912 
913 	return ipcb;
914 #else /* CONFIG_EXCLAVES */
915 	return NULL;
916 #endif /* CONFIG_EXCLAVES */
917 }
918 
919 #if CONFIG_EXCLAVES
920 
921 static void
bind_to_boot_core(void)922 bind_to_boot_core(void)
923 {
924 	/*
925 	 * First ensure the boot cluster isn't powered down preventing the
926 	 * thread from running at all.
927 	 */
928 	suspend_cluster_powerdown();
929 	const int cpu = ml_get_boot_cpu_number();
930 	processor_t processor = cpu_to_processor(cpu);
931 	assert3p(processor, !=, NULL);
932 	__assert_only processor_t old = thread_bind(processor);
933 	assert3p(old, ==, PROCESSOR_NULL);
934 	thread_block(THREAD_CONTINUE_NULL);
935 }
936 
937 static void
unbind_from_boot_core(void)938 unbind_from_boot_core(void)
939 {
940 	/* Unbind the thread from the boot CPU. */
941 	thread_bind(PROCESSOR_NULL);
942 	thread_block(THREAD_CONTINUE_NULL);
943 	resume_cluster_powerdown();
944 }
945 
946 extern kern_return_t exclaves_boot_early(void);
947 kern_return_t
exclaves_boot_early(void)948 exclaves_boot_early(void)
949 {
950 	kern_return_t kr = KERN_FAILURE;
951 	uint64_t boot_info = 0;
952 	bool early_enter = false;
953 
954 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
955 
956 	kr = exclaves_bootinfo(&boot_info, &early_enter);
957 	if (kr != KERN_SUCCESS) {
958 		exclaves_debug_printf(show_errors,
959 		    "exclaves: Get bootinfo failed\n");
960 		return kr;
961 	}
962 
963 	if (early_enter) {
964 		thread_t thread = current_thread();
965 		assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
966 
967 		bind_to_boot_core();
968 
969 		disable_preemption_without_measurements();
970 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
971 
972 		kr = exclaves_enter();
973 
974 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
975 		enable_preemption();
976 
977 		unbind_from_boot_core();
978 
979 		if (kr != KERN_SUCCESS) {
980 			exclaves_debug_printf(show_errors,
981 			    "exclaves: early exclaves enter failed\n");
982 			if (kr == KERN_ABORTED) {
983 				panic("Unexpected ringgate panic status");
984 			}
985 			return kr;
986 		}
987 	}
988 
989 	uint64_t xnuproxy_boot_info = 0;
990 	kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
991 	if (kr != KERN_SUCCESS) {
992 		exclaves_debug_printf(show_errors,
993 		    "exclaves: Init scheduler failed\n");
994 		return kr;
995 	}
996 
997 	kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
998 	if (kr != KERN_SUCCESS) {
999 		exclaves_debug_printf(show_errors,
1000 		    "XNU proxy setup failed\n");
1001 		return KERN_FAILURE;
1002 	}
1003 
1004 	kr = exclaves_resource_init();
1005 	if (kr != KERN_SUCCESS) {
1006 		exclaves_debug_printf(show_errors,
1007 		    "exclaves: failed to initialize resources\n");
1008 		return kr;
1009 	}
1010 
1011 	kr = exclaves_panic_thread_setup();
1012 	if (kr != KERN_SUCCESS) {
1013 		exclaves_debug_printf(show_errors,
1014 		    "XNU proxy panic thread setup failed\n");
1015 		return KERN_FAILURE;
1016 	}
1017 
1018 	return KERN_SUCCESS;
1019 }
1020 #endif /* CONFIG_EXCLAVES */
1021 
1022 #if CONFIG_EXCLAVES
1023 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
1024 #endif /* CONFIG_EXCLAVES */
1025 
1026 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)1027 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
1028 {
1029 #if CONFIG_EXCLAVES
1030 	if (exclaves_callbacks == NULL) {
1031 		exclaves_callbacks = callbacks;
1032 	}
1033 #else /* CONFIG_EXCLAVES */
1034 #pragma unused(callbacks)
1035 #endif /* CONFIG_EXCLAVES */
1036 }
1037 
1038 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1039 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1040 {
1041 	assert(
1042 		type == EXCLAVES_CLOCK_CONTINUOUS ||
1043 		type == EXCLAVES_CLOCK_ABSOLUTE);
1044 #if CONFIG_EXCLAVES
1045 	exclaves_clock_t *clock = &exclaves_clock[type];
1046 	uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1047 	while (latest_offset != offset) {
1048 		/* Update the latest offset with the new offset. If this fails, then a
1049 		 * concurrent update occurred and our offset may be stale. */
1050 		if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1051 		    offset, &latest_offset, relaxed)) {
1052 			break;
1053 		}
1054 	}
1055 #else
1056 #pragma unused(type, offset)
1057 #endif /* CONFIG_EXCLAVES */
1058 }
1059 
1060 /* -------------------------------------------------------------------------- */
1061 
1062 #pragma mark exclaves ipc internals
1063 
1064 #if CONFIG_EXCLAVES
1065 
1066 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1067 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1068     exclaves_id_t endpoint_id)
1069 {
1070 	kern_return_t kr = KERN_SUCCESS;
1071 
1072 	assert(port == IPC_PORT_NULL);
1073 
1074 	kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1075 
1076 	return kr;
1077 }
1078 
1079 /* -------------------------------------------------------------------------- */
1080 #pragma mark secure kernel communication
1081 
1082 /* ringgate entry endpoints */
1083 enum {
1084 	RINGGATE_EP_ENTER,
1085 	RINGGATE_EP_INFO
1086 };
1087 
1088 /* ringgate entry status codes */
1089 enum {
1090 	RINGGATE_STATUS_SUCCESS,
1091 	RINGGATE_STATUS_ERROR,
1092 	RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1093 };
1094 
1095 OS_NOINLINE
1096 static kern_return_t
exclaves_enter(void)1097 exclaves_enter(void)
1098 {
1099 	uint32_t endpoint = RINGGATE_EP_ENTER;
1100 	uint64_t result = RINGGATE_STATUS_ERROR;
1101 
1102 	sptm_call_regs_t regs = { };
1103 
1104 	__assert_only thread_t thread = current_thread();
1105 
1106 	/*
1107 	 * Should never re-enter exclaves.
1108 	 */
1109 	if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1110 	    (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1111 		panic("attempt to re-enter exclaves");
1112 	}
1113 
1114 	/*
1115 	 * Must have one (and only one) of the flags set to enter exclaves.
1116 	 */
1117 	__assert_only const thread_exclaves_state_flags_t mask = (
1118 		TH_EXCLAVES_RPC |
1119 		TH_EXCLAVES_XNUPROXY |
1120 		TH_EXCLAVES_SCHEDULER_CALL |
1121 		TH_EXCLAVES_RESUME_PANIC_THREAD);
1122 	assert3u(thread->th_exclaves_state & mask, !=, 0);
1123 	assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1124 
1125 #if MACH_ASSERT
1126 	/*
1127 	 * Set the ast to check that the thread doesn't return to userspace
1128 	 * while in an RPC or XNUPROXY call.
1129 	 */
1130 	act_set_debug_assert();
1131 #endif /* MACH_ASSERT */
1132 
1133 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1134 	    | DBG_FUNC_START);
1135 
1136 	recount_enter_secure();
1137 
1138 	/* xnu_return_to_gl2 relies on this flag being present to correctly return
1139 	 * to SK from interrupts xnu handles on behalf of SK. */
1140 	thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1141 
1142 	/*
1143 	 * Bracket with labels so stackshot can determine where exclaves are
1144 	 * entered from xnu.
1145 	 */
1146 	__asm__ volatile (
1147             "EXCLAVES_ENTRY_START:\n\t"
1148         );
1149 	result = sk_enter(endpoint, &regs);
1150 	__asm__ volatile (
1151             "EXCLAVES_ENTRY_END:\n\t"
1152         );
1153 
1154 	thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1155 
1156 	recount_leave_secure();
1157 
1158 #if CONFIG_SPTM
1159 	/**
1160 	 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1161 	 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1162 	 * debug state forcing a reload on the next return to user mode.
1163 	 */
1164 	if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1165 		arm_debug_set(NULL);
1166 	}
1167 #endif /* CONFIG_SPTM */
1168 
1169 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1170 	    | DBG_FUNC_END);
1171 
1172 	switch (result) {
1173 	case RINGGATE_STATUS_SUCCESS:
1174 		return KERN_SUCCESS;
1175 	case RINGGATE_STATUS_ERROR:
1176 		return KERN_FAILURE;
1177 	case RINGGATE_STATUS_PANIC:
1178 		return KERN_ABORTED;
1179 	default:
1180 		assertf(false, "Unknown ringgate status %llu", result);
1181 		__builtin_trap();
1182 	}
1183 }
1184 
1185 
1186 /*
1187  * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1188  * it in indicates that we should immediately enter the ringgate once in order
1189  * to allow the scheduler to perform early boot initialisation.
1190  */
1191 #define EARLY_RINGGATE_ENTER 2
1192 
1193 OS_NOINLINE
1194 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1195 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1196 {
1197 	uint32_t endpoint = RINGGATE_EP_INFO;
1198 	uint64_t result = RINGGATE_STATUS_ERROR;
1199 
1200 	sptm_call_regs_t regs = { };
1201 
1202 	recount_enter_secure();
1203 	result = sk_enter(endpoint, &regs);
1204 	recount_leave_secure();
1205 	if (result == RINGGATE_STATUS_ERROR) {
1206 		return KERN_FAILURE;
1207 	}
1208 
1209 	*early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1210 	*out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1211 
1212 	return KERN_SUCCESS;
1213 }
1214 
1215 /* -------------------------------------------------------------------------- */
1216 
1217 #pragma mark exclaves scheduler communication
1218 
1219 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1220 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1221 
1222 static void
exclaves_init_multicore(void)1223 exclaves_init_multicore(void)
1224 {
1225 	XrtHosted_Buffer_t **req, **res;
1226 
1227 	exclaves_wait_for_cpu_init();
1228 
1229 	exclaves_debug_printf(show_progress,
1230 	    "Using MPIDR for exclave scheduler core IDs\n");
1231 
1232 	/*
1233 	 * Match the hardwareID to the physical ID and stash the pointers to the
1234 	 * request/response buffers in per-cpu data for quick access.
1235 	 */
1236 	size_t core_count = exclaves_callbacks->v1.cores();
1237 	for (size_t i = 0; i < core_count; i++) {
1238 		const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1239 		uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1240 
1241 		percpu_foreach(cpu_data, cpu_data) {
1242 			if (cpu_data->cpu_phys_id != dt_phys_id) {
1243 				continue;
1244 			}
1245 			req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1246 			*req = exclaves_callbacks->v1.Core.request(i);
1247 
1248 			res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1249 			*res = exclaves_callbacks->v1.Core.response(i);
1250 
1251 			break;
1252 		}
1253 	}
1254 }
1255 
1256 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1257 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1258 {
1259 	kern_return_t kr = KERN_SUCCESS;
1260 	XrtHosted_Error_t hosted_error;
1261 
1262 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1263 
1264 	if (!pmap_valid_address(boot_info)) {
1265 		exclaves_debug_printf(show_errors,
1266 		    "exclaves: %s: 0x%012llx\n",
1267 		    "Invalid root physical address",
1268 		    boot_info);
1269 		return KERN_FAILURE;
1270 	}
1271 
1272 	if (exclaves_callbacks == NULL) {
1273 		exclaves_debug_printf(show_errors,
1274 		    "exclaves: Callbacks not registered\n");
1275 		return KERN_FAILURE;
1276 	}
1277 
1278 	/* Initialise XrtHostedXnu kext */
1279 	kr = exclaves_hosted_error(
1280 		exclaves_callbacks->v1.init(
1281 			XrtHosted_Version_current,
1282 			phystokv(boot_info),
1283 			&hosted_error),
1284 		&hosted_error);
1285 	if (kr != KERN_SUCCESS) {
1286 		return kr;
1287 	}
1288 
1289 	/* Record aperture addresses in buffer */
1290 	size_t frames = exclaves_callbacks->v1.frames();
1291 	XrtHosted_Mapped_t **pages = zalloc_permanent(
1292 		frames * sizeof(XrtHosted_Mapped_t *),
1293 		ZALIGN(XrtHosted_Mapped_t *));
1294 	size_t index = 0;
1295 	uint64_t phys = boot_info;
1296 	while (index < frames) {
1297 		if (!pmap_valid_address(phys)) {
1298 			exclaves_debug_printf(show_errors,
1299 			    "exclaves: %s: 0x%012llx\n",
1300 			    "Invalid shared physical address",
1301 			    phys);
1302 			return KERN_FAILURE;
1303 		}
1304 		pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1305 		kr = exclaves_hosted_error(
1306 			exclaves_callbacks->v1.nextPhys(
1307 				pages[index],
1308 				&index,
1309 				&phys,
1310 				&hosted_error),
1311 			&hosted_error);
1312 		if (kr != KERN_SUCCESS) {
1313 			return kr;
1314 		}
1315 	}
1316 
1317 	/* Initialise the mapped region */
1318 	exclaves_callbacks->v1.setMapping(
1319 		XrtHosted_Region_scattered(frames, pages));
1320 
1321 	/* Boot the scheduler. */
1322 	kr = exclaves_scheduler_boot();
1323 	if (kr != KERN_SUCCESS) {
1324 		return kr;
1325 	}
1326 
1327 	XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1328 
1329 	/* Only support MPIDR multicore. */
1330 	if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1331 		exclaves_debug_printf(show_errors,
1332 		    "exclaves: exclaves scheduler doesn't support multicore");
1333 		return KERN_FAILURE;
1334 	}
1335 	exclaves_init_multicore();
1336 
1337 	/* Initialise the XNU proxy */
1338 	if (!pmap_valid_address(global->v1.proxyInit)) {
1339 		exclaves_debug_printf(show_errors,
1340 		    "exclaves: %s: 0x%012llx\n",
1341 		    "Invalid xnu prpoxy physical address",
1342 		    phys);
1343 		return KERN_FAILURE;
1344 	}
1345 	*xnuproxy_boot_info = global->v1.proxyInit;
1346 
1347 	return kr;
1348 }
1349 
1350 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1351 #define exclaves_scheduler_debug_save_buffer(_buf) \
1352 	XrtHosted_Buffer_t _buf##_copy = *(_buf)
1353 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1354 	    _response_buf) ({ \
1355 	if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1356 	        printf("exclaves: Scheduler request = %p\n", _request_buf); \
1357 	        printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1358 	            _request_buf##_copy.tag); \
1359 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1360 	                printf("exclaves: Scheduler request.arguments[%02zu] = " \
1361 	                    "0x%04llx\n", arg, \
1362 	                    _request_buf##_copy.arguments[arg]); \
1363 	        } \
1364 	        printf("exclaves: Scheduler response = %p\n", _response_buf); \
1365 	        printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1366 	                _response_buf##_copy.tag); \
1367 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1368 	                printf("exclaves: Scheduler response.arguments[%02zu] = " \
1369 	                    "0x%04llx\n", arg, \
1370 	                    _response_buf##_copy.arguments[arg]); \
1371 	        } \
1372 	}})
1373 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1374 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1375 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1376 	    _response_buf) ({ })
1377 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1378 
1379 static void
request_trace_start(const XrtHosted_Request_t * request)1380 request_trace_start(const XrtHosted_Request_t *request)
1381 {
1382 	switch (request->tag) {
1383 	case XrtHosted_Request_ResumeWithHostId:
1384 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1385 		    MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1386 		    request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1387 		break;
1388 
1389 	case XrtHosted_Request_InterruptWithHostId:
1390 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1391 		    MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1392 		    request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1393 		break;
1394 
1395 	case XrtHosted_Request_UpdateTimerOffset:
1396 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1397 		    MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1398 		    request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1399 		break;
1400 
1401 	case XrtHosted_Request_BootExclaves:
1402 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1403 		    MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1404 		break;
1405 
1406 	case XrtHosted_Request_PmmEarlyAllocResponse:
1407 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1408 		    MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1409 		    request->PmmEarlyAllocResponse.a);
1410 		break;
1411 
1412 	case XrtHosted_Request_WatchdogPanic:
1413 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1414 		    MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1415 		break;
1416 
1417 	default:
1418 		panic("Unsupported exclaves scheduler request: %d", request->tag);
1419 	}
1420 }
1421 
1422 static void
request_trace_end(const XrtHosted_Request_t * request)1423 request_trace_end(const XrtHosted_Request_t *request)
1424 {
1425 	switch (request->tag) {
1426 	case XrtHosted_Request_ResumeWithHostId:
1427 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1428 		    MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1429 		break;
1430 
1431 	case XrtHosted_Request_InterruptWithHostId:
1432 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1433 		    MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1434 		break;
1435 
1436 	case XrtHosted_Request_UpdateTimerOffset:
1437 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1438 		    MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1439 		break;
1440 
1441 	case XrtHosted_Request_BootExclaves:
1442 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1443 		    MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1444 		break;
1445 
1446 	case XrtHosted_Request_PmmEarlyAllocResponse:
1447 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1448 		    MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1449 		break;
1450 
1451 	case XrtHosted_Request_WatchdogPanic:
1452 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1453 		    MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1454 		break;
1455 
1456 	default:
1457 		panic("Unsupported exclaves scheduler request: %d", request->tag);
1458 	}
1459 }
1460 
1461 __attribute__((always_inline))
1462 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1463 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1464     XrtHosted_Response_t *response)
1465 {
1466 	assert3u(request->tag, >, XrtHosted_Request_Invalid);
1467 	assert3u(request->tag, <, XrtHosted_Request_Limit);
1468 
1469 	kern_return_t kr = KERN_SUCCESS;
1470 	bool istate;
1471 
1472 	/*
1473 	 * Disable preemption and interrupts as the xrt hosted scheduler data
1474 	 * structures are per-core.
1475 	 * Preemption disabled and interrupt disabled timeouts are disabled for
1476 	 * now until we can co-ordinate the measurements with the exclaves side
1477 	 * of things.
1478 	 */
1479 	istate = ml_set_interrupts_enabled_with_debug(false, false);
1480 
1481 	/* Interrupts should have been enabled entering this function. */
1482 	assert(istate);
1483 
1484 	/*
1485 	 * This needs to be done with interrupts disabled, otherwise stackshot
1486 	 * could mark the thread blocked just after this function exits and a
1487 	 * thread marked as AST blocked would go into exclaves.
1488 	 */
1489 
1490 	while ((os_atomic_load(&current_thread()->th_exclaves_inspection_state,
1491 	    relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1492 		/* Enable interrupts */
1493 		(void) ml_set_interrupts_enabled_with_debug(true, false);
1494 
1495 		/* Wait until the thread is collected on exclaves side */
1496 		exclaves_inspection_check_ast();
1497 
1498 		/* Disable interrupts and preemption before next AST check */
1499 		ml_set_interrupts_enabled_with_debug(false, false);
1500 	}
1501 	/* Interrupts are disabled and exclaves_stackshot_ast is clean */
1502 
1503 	disable_preemption_without_measurements();
1504 
1505 	/*
1506 	 * Don't enter with a stale clock (unless updating the clock or
1507 	 * panicking).
1508 	 */
1509 	if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1510 	    request->tag != XrtHosted_Request_WatchdogPanic &&
1511 	    exclaves_clocks_need_update()) {
1512 		enable_preemption();
1513 		(void) ml_set_interrupts_enabled_with_debug(istate, false);
1514 		return KERN_POLICY_LIMIT;
1515 	}
1516 
1517 	XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1518 	assert3p(request_buf, !=, NULL);
1519 
1520 	request_trace_start(request);
1521 
1522 	exclaves_callbacks->v1.Request.encode(request_buf, request);
1523 	exclaves_scheduler_debug_save_buffer(request_buf);
1524 
1525 	kr = exclaves_enter();
1526 
1527 	/* The response may have come back on a different core. */
1528 	XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1529 	assert3p(response_buf, !=, NULL);
1530 
1531 	exclaves_scheduler_debug_save_buffer(response_buf);
1532 	exclaves_callbacks->v1.Response.decode(response_buf, response);
1533 
1534 	request_trace_end(request);
1535 
1536 	enable_preemption();
1537 	(void) ml_set_interrupts_enabled_with_debug(istate, false);
1538 
1539 	exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1540 
1541 	if (kr == KERN_ABORTED) {
1542 		/* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1543 		 * another core has paniced in exclaves and is on the way to call xnu
1544 		 * panic() via SPTM, so wait here for that to happen. */
1545 		exclaves_wait_for_panic();
1546 	}
1547 
1548 	return kr;
1549 }
1550 
1551 OS_NORETURN OS_NOINLINE
1552 static void
exclaves_wait_for_panic(void)1553 exclaves_wait_for_panic(void)
1554 {
1555 	assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1556 	    NSEC_PER_SEC);
1557 	wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1558 	panic("Unexpected wait for panic result: %d", wr);
1559 }
1560 
1561 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1562 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1563     const XrtHosted_Yield_t *yield)
1564 {
1565 	Exclaves_L4_Word_t responding_scid = yield->thread;
1566 	Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1567 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1568 
1569 	exclaves_debug_printf(show_progress,
1570 	    "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1571 	    early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1572 	/* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1573 	 * th_exclaves_scheduling_context_id so we know to resume it later
1574 	 * 2. translate yield_to to thread_switch()-style handoff.
1575 	 */
1576 	if (!early) {
1577 		assert3u(responding_scid, ==, scid);
1578 		assert3u(yield->threadHostId, ==, ctid);
1579 	}
1580 
1581 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1582 	    MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1583 
1584 	return KERN_SUCCESS;
1585 }
1586 
1587 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1588 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1589     const XrtHosted_Spawned_t *spawned)
1590 {
1591 	Exclaves_L4_Word_t responding_scid = spawned->thread;
1592 	thread_t thread = current_thread();
1593 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1594 
1595 	/*
1596 	 * There are only a few places an exclaves thread is expected to be
1597 	 * spawned. Any other cases are considered errors.
1598 	 */
1599 	if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1600 		exclaves_debug_printf(show_errors,
1601 		    "exclaves: Scheduler: Unexpected thread spawn: "
1602 		    "scid 0x%lx spawned scid 0x%llx\n",
1603 		    responding_scid, spawned->spawned);
1604 		return KERN_FAILURE;
1605 	}
1606 
1607 	exclaves_debug_printf(show_progress,
1608 	    "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1609 	    responding_scid, (unsigned long)spawned->spawned);
1610 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1611 	    MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1612 
1613 	assert3u(responding_scid, ==, scid);
1614 	assert3u(spawned->threadHostId, ==, ctid);
1615 
1616 	return KERN_SUCCESS;
1617 }
1618 
1619 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1620 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1621 {
1622 	Exclaves_L4_Word_t responding_scid = terminated->thread;
1623 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1624 
1625 	exclaves_debug_printf(show_errors,
1626 	    "exclaves: Scheduler: Unexpected thread terminate: "
1627 	    "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1628 	    terminated->terminated);
1629 	assert3u(terminated->threadHostId, ==, ctid);
1630 
1631 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1632 	    MACH_EXCLAVES_SCHEDULER_TERMINATED),
1633 	    terminated->terminated);
1634 
1635 	return KERN_TERMINATED;
1636 }
1637 
1638 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1639 handle_response_wait(const XrtHosted_Wait_t *wait)
1640 {
1641 	Exclaves_L4_Word_t responding_scid = wait->waiter;
1642 	thread_t thread = current_thread();
1643 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1644 
1645 	exclaves_debug_printf(show_progress,
1646 	    "exclaves: Scheduler: Wait: "
1647 	    "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1648 	    "epoch 0x%llx\n", responding_scid, wait->owner,
1649 	    wait->queueId, wait->epoch);
1650 	assert3u(wait->waiterHostId, ==, ctid);
1651 
1652 	/* The exclaves inspection thread should never wait. */
1653 	if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1654 		panic("Exclaves inspection thread tried to wait\n");
1655 	}
1656 
1657 	/*
1658 	 * Note, "owner" may not be safe to access directly, for example
1659 	 * the thread may have exited and been freed. esync_wait will
1660 	 * only access it under a lock if the epoch is fresh thus
1661 	 * ensuring safety.
1662 	 */
1663 	const ctid_t owner = (ctid_t)wait->ownerHostId;
1664 	const XrtHosted_Word_t id = wait->queueId;
1665 	const uint64_t epoch = wait->epoch;
1666 
1667 	wait_interrupt_t interruptible;
1668 	esync_policy_t policy;
1669 
1670 	switch (wait->interruptible) {
1671 	case XrtHosted_Interruptibility_None:
1672 		interruptible = THREAD_UNINT;
1673 		policy = ESYNC_POLICY_KERNEL;
1674 		break;
1675 
1676 	case XrtHosted_Interruptibility_Voluntary:
1677 		interruptible = THREAD_INTERRUPTIBLE;
1678 		policy = ESYNC_POLICY_KERNEL;
1679 		break;
1680 
1681 	case XrtHosted_Interruptibility_DynamicQueue:
1682 		interruptible = THREAD_INTERRUPTIBLE;
1683 		policy = ESYNC_POLICY_USER;
1684 		break;
1685 
1686 	default:
1687 		panic("Unknown exclaves interruptibility: %llu",
1688 		    wait->interruptible);
1689 	}
1690 
1691 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1692 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1693 	    wait->interruptible);
1694 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1695 	    exclaves_get_queue_counter(id), owner, policy, interruptible);
1696 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1697 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1698 
1699 	switch (wr) {
1700 	case THREAD_INTERRUPTED:
1701 		return KERN_ABORTED;
1702 
1703 	case THREAD_NOT_WAITING:
1704 	case THREAD_AWAKENED:
1705 		return KERN_SUCCESS;
1706 
1707 	default:
1708 		panic("Unexpected wait result from esync_wait: %d", wr);
1709 	}
1710 }
1711 
1712 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1713 handle_response_wake(const XrtHosted_Wake_t *wake)
1714 {
1715 	Exclaves_L4_Word_t responding_scid = wake->waker;
1716 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1717 
1718 	exclaves_debug_printf(show_progress,
1719 	    "exclaves: Scheduler: Wake: "
1720 	    "scid 0x%lx wake of queue id 0x%llx, "
1721 	    "epoch 0x%llx, all 0x%llx\n", responding_scid,
1722 	    wake->queueId, wake->epoch, wake->all);
1723 	assert3u(wake->wakerHostId, ==, ctid);
1724 
1725 	const XrtHosted_Word_t id = wake->queueId;
1726 	const uint64_t epoch = wake->epoch;
1727 	const esync_wake_mode_t mode = wake->all != 0 ?
1728 	    ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1729 
1730 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1731 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1732 
1733 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1734 	    exclaves_get_queue_counter(id), mode, 0);
1735 
1736 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1737 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1738 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1739 
1740 	return KERN_SUCCESS;
1741 }
1742 
1743 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1744 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1745 {
1746 	Exclaves_L4_Word_t responding_scid = wake->waker;
1747 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1748 
1749 	exclaves_debug_printf(show_progress,
1750 	    "exclaves: Scheduler: WakeWithOwner: "
1751 	    "scid 0x%lx wake of queue id 0x%llx, "
1752 	    "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1753 	    wake->queueId, wake->epoch,
1754 	    wake->owner);
1755 
1756 	assert3u(wake->wakerHostId, ==, ctid);
1757 
1758 	const ctid_t owner = (ctid_t)wake->ownerHostId;
1759 	const XrtHosted_Word_t id = wake->queueId;
1760 	const uint64_t epoch = wake->epoch;
1761 
1762 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1763 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1764 	    ESYNC_WAKE_ONE_WITH_OWNER);
1765 
1766 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1767 	    exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1768 
1769 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1770 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1771 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1772 
1773 	return KERN_SUCCESS;
1774 }
1775 
1776 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1777 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1778 {
1779 	Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1780 	__assert_only thread_t thread = current_thread();
1781 
1782 	exclaves_debug_printf(show_progress,
1783 	    "exclaves: Scheduler: PanicWait: "
1784 	    "Panic thread SCID %lx\n",
1785 	    panic_thread_scid);
1786 
1787 	assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1788 
1789 	exclaves_panic_thread_wait();
1790 
1791 	/* NOT REACHABLE */
1792 	return KERN_SUCCESS;
1793 }
1794 
1795 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1796 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1797 {
1798 	Exclaves_L4_Word_t responding_scid = suspended->suspended;
1799 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1800 
1801 	exclaves_debug_printf(show_progress,
1802 	    "exclaves: Scheduler: Suspended: "
1803 	    "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1804 	assert3u(suspended->suspendedHostId, ==, ctid);
1805 
1806 	const uint64_t id = suspended->suspended;
1807 	const uint64_t epoch = suspended->epoch;
1808 
1809 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1810 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1811 
1812 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1813 	    exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1814 
1815 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1816 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1817 
1818 	switch (wr) {
1819 	case THREAD_INTERRUPTED:
1820 		return KERN_ABORTED;
1821 
1822 	case THREAD_NOT_WAITING:
1823 	case THREAD_AWAKENED:
1824 		return KERN_SUCCESS;
1825 
1826 	default:
1827 		panic("Unexpected wait result from esync_wait: %d", wr);
1828 	}
1829 }
1830 
1831 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1832 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1833 {
1834 	Exclaves_L4_Word_t responding_scid = resumed->thread;
1835 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1836 
1837 	exclaves_debug_printf(show_progress,
1838 	    "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1839 	    "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1840 	    resumed->resumedHostId, resumed->epoch);
1841 	assert3u(resumed->threadHostId, ==, ctid);
1842 
1843 	const ctid_t target = (ctid_t)resumed->resumedHostId;
1844 	const XrtHosted_Word_t id = resumed->resumed;
1845 	const uint64_t epoch = resumed->epoch;
1846 
1847 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1848 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1849 	    target);
1850 
1851 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1852 	    exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1853 
1854 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1855 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1856 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1857 
1858 	return KERN_SUCCESS;
1859 }
1860 
1861 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1862 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1863 {
1864 	Exclaves_L4_Word_t responding_scid = interrupted->thread;
1865 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1866 
1867 	exclaves_debug_printf(show_progress,
1868 	    "exclaves: Scheduler: Interrupted: "
1869 	    "scid 0x%lx interrupt on queue id 0x%llx, "
1870 	    "epoch 0x%llx, target 0x%llx\n", responding_scid,
1871 	    interrupted->queueId, interrupted->epoch,
1872 	    interrupted->interruptedHostId);
1873 	assert3u(interrupted->threadHostId, ==, ctid);
1874 
1875 	const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1876 	const XrtHosted_Word_t id = interrupted->queueId;
1877 	const uint64_t epoch = interrupted->epoch;
1878 
1879 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1880 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1881 	    target);
1882 
1883 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1884 	    exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1885 
1886 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1887 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1888 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1889 
1890 	return KERN_SUCCESS;
1891 }
1892 
1893 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1894 handle_response_nothing_scheduled(
1895 	__unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1896 {
1897 	exclaves_debug_printf(show_progress,
1898 	    "exclaves: Scheduler: nothing scheduled\n");
1899 
1900 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1901 	    MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1902 
1903 	return KERN_SUCCESS;
1904 }
1905 
1906 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)1907 handle_response_all_exclaves_booted(
1908 	__unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
1909 {
1910 	exclaves_debug_printf(show_progress,
1911 	    "exclaves: scheduler: all exclaves booted\n");
1912 
1913 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1914 	    MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
1915 
1916 	return KERN_SUCCESS;
1917 }
1918 
1919 /*
1920  * The Early Alloc response asks for npages to be allocated. The list of
1921  * allocated pages is written into the first allocated page in the form of 32bit
1922  * page numbers. The physical address of the first page is passed back to the
1923  * exclaves scheduler as part of the next request.
1924  */
1925 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)1926 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
1927     uint64_t *pagelist_pa)
1928 {
1929 	const uint32_t npages = (uint32_t)pmm_early_alloc->a;
1930 	const uint64_t flags = pmm_early_alloc->b;
1931 
1932 	exclaves_memory_pagekind_t kind = EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN;
1933 	exclaves_memory_page_flags_t alloc_flags = EXCLAVES_MEMORY_PAGE_FLAGS_NONE;
1934 
1935 	exclaves_debug_printf(show_progress,
1936 	    "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
1937 	    npages, flags);
1938 
1939 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1940 	    MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
1941 
1942 	if (npages == 0) {
1943 		return KERN_SUCCESS;
1944 	}
1945 
1946 	if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
1947 		exclaves_debug_printf(show_errors,
1948 		    "exclaves: request to allocate too many pages: %u\n",
1949 		    npages);
1950 		return KERN_NO_SPACE;
1951 	}
1952 
1953 
1954 	/*
1955 	 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
1956 	 * stack allocation is sufficient and fast. If
1957 	 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
1958 	 * to the heap.
1959 	 */
1960 	uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
1961 	exclaves_memory_alloc(npages, page, kind, alloc_flags);
1962 
1963 	/* Now copy the list of pages into the first page. */
1964 	uint64_t first_page_pa = ptoa(page[0]);
1965 #if 0
1966 	// move to before sptm retype
1967 	uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
1968 	for (int i = 0; i < npages; i++) {
1969 		first_page[i] = page[i];
1970 	}
1971 #endif
1972 
1973 	*pagelist_pa = first_page_pa;
1974 	return KERN_SUCCESS;
1975 }
1976 
1977 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)1978 handle_response_watchdog_panic_complete(
1979 	__unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
1980 {
1981 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1982 	    MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
1983 }
1984 
1985 OS_NORETURN
1986 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)1987 handle_response_panicking(
1988 	__unused const XrtHosted_Panicking_t *panicking)
1989 {
1990 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1991 	    MACH_EXCLAVES_SCHEDULER_PANICKING));
1992 
1993 	exclaves_wait_for_panic();
1994 
1995 	/* Not reached. */
1996 }
1997 
1998 static inline bool
exclaves_clocks_need_update(void)1999 exclaves_clocks_need_update(void)
2000 {
2001 	const exclaves_clock_type_t clocks[] = {
2002 		EXCLAVES_CLOCK_ABSOLUTE,
2003 		EXCLAVES_CLOCK_CONTINUOUS
2004 	};
2005 
2006 	for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2007 		const exclaves_clock_t *clock = &exclaves_clock[i];
2008 		exclaves_clock_t local = {
2009 			.u128 = os_atomic_load(&clock->a_u128, relaxed),
2010 		};
2011 
2012 		if (local.u64.sent_offset != local.u64.latest_offset) {
2013 			return true;
2014 		}
2015 	}
2016 
2017 	return false;
2018 }
2019 
2020 OS_NOINLINE
2021 static kern_return_t
exclaves_clocks_update(void)2022 exclaves_clocks_update(void)
2023 {
2024 	const exclaves_clock_type_t clocks[] = {
2025 		EXCLAVES_CLOCK_ABSOLUTE,
2026 		EXCLAVES_CLOCK_CONTINUOUS
2027 	};
2028 
2029 	for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2030 		exclaves_clock_t local;
2031 		exclaves_clock_t *clock = &exclaves_clock[i];
2032 
2033 		local.u128 = os_atomic_load(&clock->a_u128, relaxed);
2034 		while (local.u64.sent_offset != local.u64.latest_offset) {
2035 			XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
2036 			    XrtHosted_Timer_Absolute :
2037 			    XrtHosted_Timer_Continuous;
2038 
2039 			kern_return_t kr =
2040 			    exclaves_scheduler_request_update_timer(timer,
2041 			    local.u64.latest_offset);
2042 			if (kr != KERN_SUCCESS) {
2043 				return kr;
2044 			}
2045 
2046 			/*
2047 			 * Swap the sent offset with the local latest offset. If
2048 			 * it fails, the sent offset will be reloaded.
2049 			 */
2050 			os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2051 			    local.u64.sent_offset, local.u64.latest_offset,
2052 			    &local.u64.sent_offset, relaxed);
2053 
2054 			/*
2055 			 * Fetch the latest offset again, in case we are stale.
2056 			 */
2057 			local.u64.latest_offset = os_atomic_load(
2058 				&clock->a_u64.latest_offset, relaxed);
2059 		}
2060 	}
2061 
2062 	return KERN_SUCCESS;
2063 }
2064 
2065 static kern_return_t
exclaves_scheduler_boot(void)2066 exclaves_scheduler_boot(void)
2067 {
2068 	/* This must happen on the boot CPU - bind the thread. */
2069 	bind_to_boot_core();
2070 
2071 	/*
2072 	 * Set the request/response buffers. These may be overriden later when
2073 	 * doing multicore setup.
2074 	 */
2075 	*PERCPU_GET(exclaves_request) =
2076 	    exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2077 	*PERCPU_GET(exclaves_response) =
2078 	    exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2079 
2080 	kern_return_t kr = exclaves_scheduler_request_boot();
2081 
2082 	unbind_from_boot_core();
2083 
2084 	return kr;
2085 }
2086 
2087 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2088 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2089     uint64_t offset)
2090 {
2091 	thread_t thread = current_thread();
2092 
2093 	exclaves_debug_printf(show_progress,
2094 	    "exclaves: Scheduler: Request to update timer\n");
2095 
2096 	XrtHosted_Response_t response = {
2097 		.tag = XrtHosted_Response_NothingScheduled,
2098 	};
2099 
2100 	const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2101 		.timer = timer,
2102 		.offset = offset,
2103 		);
2104 
2105 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2106 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2107 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2108 
2109 	switch (kr) {
2110 	case KERN_SUCCESS:
2111 		break;
2112 
2113 	case KERN_POLICY_LIMIT:
2114 		/*
2115 		 * POLICY_LIMIT should only happen if a timer update was pending
2116 		 * (and thus should never happen when trying to update a timer.
2117 		 */
2118 		panic("exclaves: timer update requested when updating timer");
2119 
2120 	default:
2121 		exclaves_debug_printf(show_errors,
2122 		    "exclaves: scheduler request failed\n");
2123 		return kr;
2124 	}
2125 
2126 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2127 
2128 	switch (response.tag) {
2129 	case XrtHosted_Response_NothingScheduled:
2130 		kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2131 		break;
2132 
2133 	default:
2134 		exclaves_debug_printf(show_errors, "exclaves: "
2135 		    "unexpected scheduler response when updating timer\n");
2136 		kr = KERN_FAILURE;
2137 		break;
2138 	}
2139 
2140 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2141 
2142 	return kr;
2143 }
2144 
2145 static kern_return_t
exclaves_scheduler_request_boot(void)2146 exclaves_scheduler_request_boot(void)
2147 {
2148 	kern_return_t kr = KERN_FAILURE;
2149 	thread_t thread = current_thread();
2150 
2151 	assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2152 
2153 	exclaves_debug_printf(show_progress,
2154 	    "exclaves: Scheduler: Request to boot exclave\n");
2155 
2156 	XrtHosted_Response_t response = {
2157 		.tag = XrtHosted_Response_Invalid,
2158 	};
2159 	uint64_t pagelist_pa = 0;
2160 
2161 	while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2162 		const XrtHosted_Request_t request = pagelist_pa != 0 ?
2163 		    XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2164 		    XrtHosted_Request_BootExclavesMsg();
2165 		pagelist_pa = 0;
2166 
2167 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2168 		kr = exclaves_scheduler_request(&request, &response);
2169 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2170 
2171 		switch (kr) {
2172 		case KERN_SUCCESS:
2173 			break;
2174 
2175 		case KERN_POLICY_LIMIT:
2176 			kr = exclaves_clocks_update();
2177 			if (kr != KERN_SUCCESS) {
2178 				return kr;
2179 			}
2180 			/*
2181 			 * Don't try to process the response - we just updated
2182 			 * the clock so continue with the boot request.
2183 			 */
2184 			continue;
2185 
2186 		default:
2187 			exclaves_debug_printf(show_errors,
2188 			    "exclaves: scheduler request failed\n");
2189 			return KERN_FAILURE;
2190 		}
2191 
2192 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2193 
2194 		switch (response.tag) {
2195 		case XrtHosted_Response_Yield:
2196 			kr = handle_response_yield(true, 0, &response.Yield);
2197 			break;
2198 
2199 		case XrtHosted_Response_NothingScheduled:
2200 			kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2201 			break;
2202 
2203 		case XrtHosted_Response_AllExclavesBooted:
2204 			kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2205 			break;
2206 
2207 		case XrtHosted_Response_PmmEarlyAlloc:
2208 			kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2209 			break;
2210 
2211 		case XrtHosted_Response_PanicBufferAddress:
2212 			handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2213 			break;
2214 
2215 		case XrtHosted_Response_Panicking:
2216 			handle_response_panicking(&response.Panicking);
2217 		/* Not reached. */
2218 
2219 		default:
2220 			exclaves_debug_printf(show_errors,
2221 			    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2222 			    response.tag);
2223 			kr = KERN_FAILURE;
2224 			break;
2225 		}
2226 
2227 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2228 
2229 		if (kr != KERN_SUCCESS) {
2230 			break;
2231 		}
2232 	}
2233 
2234 	return kr;
2235 }
2236 
2237 OS_INLINE
2238 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2239 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2240 {
2241 	thread_t thread = current_thread();
2242 	const ctid_t ctid = thread_get_ctid(thread);
2243 
2244 	assert3u(thread->th_exclaves_state &
2245 	    (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2246 
2247 	exclaves_debug_printf(show_progress,
2248 	    "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2249 
2250 	XrtHosted_Response_t response = {};
2251 	const XrtHosted_Request_t request = interrupted ?
2252 	    XrtHosted_Request_InterruptWithHostIdMsg(
2253 		.thread = ctx->scid,
2254 		.hostId = ctid,
2255 		) :
2256 	    XrtHosted_Request_ResumeWithHostIdMsg(
2257 		.thread = ctx->scid,
2258 		.hostId = ctid,
2259 		);
2260 
2261 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2262 
2263 	switch (kr) {
2264 	case KERN_SUCCESS:
2265 		break;
2266 
2267 	case KERN_POLICY_LIMIT:
2268 		/*
2269 		 * Don't try to handle any response (as there isn't one), just
2270 		 * return to the caller which will check MSG STATUS and re-enter
2271 		 * if neccessary.
2272 		 */
2273 		return exclaves_clocks_update();
2274 
2275 	default:
2276 		exclaves_debug_printf(show_errors,
2277 		    "exclaves: scheduler request failed\n");
2278 		break;
2279 	}
2280 
2281 	if (kr != KERN_SUCCESS) {
2282 		return kr;
2283 	}
2284 
2285 	__asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2286 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2287 
2288 	switch (response.tag) {
2289 	case XrtHosted_Response_Wait:
2290 		kr = handle_response_wait(&response.Wait);
2291 		break;
2292 
2293 	case XrtHosted_Response_Wake:
2294 		kr = handle_response_wake(&response.Wake);
2295 		break;
2296 
2297 	case XrtHosted_Response_Yield:
2298 		kr = handle_response_yield(false, ctx->scid, &response.Yield);
2299 		break;
2300 
2301 	case XrtHosted_Response_Spawned:
2302 		kr = handle_response_spawned(ctx->scid, &response.Spawned);
2303 		break;
2304 
2305 	case XrtHosted_Response_Terminated:
2306 		kr = handle_response_terminated(&response.Terminated);
2307 		break;
2308 
2309 	case XrtHosted_Response_WakeWithOwner:
2310 		kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2311 		break;
2312 
2313 	case XrtHosted_Response_PanicWait:
2314 		kr = handle_response_panic_wait(&response.PanicWait);
2315 		break;
2316 
2317 	case XrtHosted_Response_Suspended:
2318 		kr = handle_response_suspended(&response.Suspended);
2319 		break;
2320 
2321 	case XrtHosted_Response_Resumed:
2322 		kr = handle_response_resumed(&response.Resumed);
2323 		break;
2324 
2325 	case XrtHosted_Response_Interrupted:
2326 		kr = handle_response_interrupted(&response.Interrupted);
2327 		break;
2328 
2329 	case XrtHosted_Response_Panicking:
2330 		handle_response_panicking(&response.Panicking);
2331 	/* Not reached. */
2332 
2333 	case XrtHosted_Response_Invalid:
2334 	case XrtHosted_Response_Failure:
2335 	case XrtHosted_Response_Pong:
2336 	case XrtHosted_Response_SleepUntil:
2337 	case XrtHosted_Response_Awaken:
2338 	default:
2339 		exclaves_debug_printf(show_errors,
2340 		    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2341 		    response.tag);
2342 		kr = KERN_FAILURE;
2343 		break;
2344 	}
2345 
2346 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2347 	__asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2348 
2349 	return kr;
2350 }
2351 
2352 /* A friendly name to show up in backtraces. */
2353 OS_NOINLINE
2354 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2355 exclaves_run(thread_t thread, bool interrupted)
2356 {
2357 	return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2358 	           interrupted);
2359 }
2360 
2361 /*
2362  * Note: this is called from a thread with RT priority which is on the way to
2363  * panicking and thus doesn't log.
2364  */
2365 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2366 exclaves_scheduler_request_watchdog_panic(void)
2367 {
2368 	thread_t thread = current_thread();
2369 
2370 	XrtHosted_Response_t response = {};
2371 	const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2372 
2373 	/*
2374 	 * Check for consistent exclaves thread state to make sure we don't
2375 	 * accidentally block. This should normally never happen but if it does,
2376 	 * just return and allow the caller to panic without gathering an
2377 	 * exclaves stackshot.
2378 	 */
2379 	if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2380 	    thread->th_exclaves_state != 0) {
2381 		return KERN_FAILURE;
2382 	}
2383 
2384 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2385 	kern_return_t kr = exclaves_scheduler_request(&request, &response);
2386 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2387 
2388 	switch (kr) {
2389 	case KERN_SUCCESS:
2390 		break;
2391 
2392 	case KERN_POLICY_LIMIT:
2393 		/*
2394 		 * POLICY_LIMIT should only happen if a timer update was pending
2395 		 * (and thus should never happen when trying to send a watchdog
2396 		 * panic message.
2397 		 */
2398 		panic("exclaves: "
2399 		    "timer update requested when calling watchdog panic");
2400 
2401 	default:
2402 		return kr;
2403 	}
2404 
2405 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2406 
2407 	switch (response.tag) {
2408 	case XrtHosted_Response_WatchdogPanicComplete:
2409 		handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2410 		break;
2411 
2412 	case XrtHosted_Response_Panicking:
2413 		handle_response_panicking(&response.Panicking);
2414 	/* Not Reached. */
2415 
2416 	default:
2417 		panic("exclaves: unexpected scheduler response "
2418 		    "when sending watchdog panic request: %d", response.tag);
2419 	}
2420 
2421 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2422 
2423 	return kr;
2424 }
2425 
2426 /* -------------------------------------------------------------------------- */
2427 
2428 #pragma mark exclaves xnu proxy communication
2429 
2430 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2431 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2432 {
2433 	if (success) {
2434 		return KERN_SUCCESS;
2435 	} else {
2436 		exclaves_debug_printf(show_errors,
2437 		    "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2438 		    error->file,
2439 		    error->line,
2440 		    error->function,
2441 		    error->expression
2442 		    );
2443 		return KERN_FAILURE;
2444 	}
2445 }
2446 
2447 
2448 #pragma mark exclaves privilege management
2449 
2450 /*
2451  * All entitlement checking enabled by default.
2452  */
2453 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2454 
2455 /*
2456  * boot-arg to control the use of entitlements.
2457  * Eventually this should be removed and entitlement checking should be gated on
2458  * the EXCLAVES_R_ENTITLEMENTS requirement.
2459  * This will be addressed with rdar://125153460.
2460  */
2461 TUNABLE(unsigned int, exclaves_entitlement_flags,
2462     "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2463 
2464 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2465 has_entitlement(task_t task, const exclaves_priv_t priv,
2466     const char *entitlement)
2467 {
2468 	/* Skip the entitlement if not enabled. */
2469 	if ((exclaves_entitlement_flags & priv) == 0) {
2470 		return true;
2471 	}
2472 
2473 	return IOTaskHasEntitlement(task, entitlement);
2474 }
2475 
2476 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2477 has_entitlement_vnode(void *vnode, const int64_t off,
2478     const exclaves_priv_t priv, const char *entitlement)
2479 {
2480 	/* Skip the entitlement if not enabled. */
2481 	if ((exclaves_entitlement_flags & priv) == 0) {
2482 		return true;
2483 	}
2484 
2485 	return IOVnodeHasEntitlement(vnode, off, entitlement);
2486 }
2487 
2488 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2489 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2490 {
2491 	const bool is_kernel = task == kernel_task;
2492 	const bool is_launchd = task_pid(task) == 1;
2493 
2494 	switch (priv) {
2495 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2496 		/* Both launchd and entitled tasks can spawn new conclaves. */
2497 		if (is_launchd) {
2498 			return true;
2499 		}
2500 		return has_entitlement(task, priv,
2501 		           "com.apple.private.exclaves.conclave-spawn");
2502 
2503 	case EXCLAVES_PRIV_KERNEL_DOMAIN:
2504 		/*
2505 		 * Both the kernel itself and user tasks with the right
2506 		 * privilege can access exclaves resources in the kernel domain.
2507 		 */
2508 		if (is_kernel) {
2509 			return true;
2510 		}
2511 
2512 		/*
2513 		 * If the task was entitled and has been through this path
2514 		 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2515 		 */
2516 		if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2517 			return true;
2518 		}
2519 
2520 		if (has_entitlement(task, priv,
2521 		    "com.apple.private.exclaves.kernel-domain")) {
2522 			task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2523 			return true;
2524 		}
2525 
2526 		return false;
2527 
2528 	case EXCLAVES_PRIV_BOOT:
2529 		/* Both launchd and entitled tasks can boot exclaves. */
2530 		if (is_launchd) {
2531 			return true;
2532 		}
2533 		/* BEGIN IGNORE CODESTYLE */
2534 		return has_entitlement(task, priv,
2535 		    "com.apple.private.exclaves.boot");
2536 		/* END IGNORE CODESTYLE */
2537 
2538 	case EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME:
2539 		/*
2540 		 * If the task was entitled and has been through this path
2541 		 * before, it will have set the TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS flag.
2542 		 */
2543 		if ((task_ro_flags_get(task) & TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS) != 0) {
2544 			return true;
2545 		}
2546 
2547 		if (has_entitlement(task, priv,
2548 		    "com.apple.private.exclaves.indicator_min_on_time")) {
2549 			task_ro_flags_set(task, TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS);
2550 			return true;
2551 		}
2552 
2553 		return false;
2554 
2555 	/* The CONCLAVE HOST priv is always checked by vnode. */
2556 	case EXCLAVES_PRIV_CONCLAVE_HOST:
2557 	default:
2558 		panic("bad exclaves privilege (%u)", priv);
2559 	}
2560 }
2561 
2562 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2563 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2564 {
2565 	switch (priv) {
2566 	case EXCLAVES_PRIV_CONCLAVE_HOST: {
2567 		const bool has_conclave_host = has_entitlement_vnode(vnode,
2568 		    off, priv, "com.apple.private.exclaves.conclave-host");
2569 
2570 		/*
2571 		 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2572 		 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2573 		 */
2574 
2575 		/* Don't check if neither entitlemenent is being enforced.*/
2576 		if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2577 		    (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2578 			return has_conclave_host;
2579 		}
2580 
2581 		const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2582 		    EXCLAVES_PRIV_KERNEL_DOMAIN,
2583 		    "com.apple.private.exclaves.kernel-domain");
2584 
2585 		/* See if it has both. */
2586 		if (has_conclave_host && has_domain_kernel) {
2587 			exclaves_debug_printf(show_errors,
2588 			    "exclaves: task has both conclave-host and "
2589 			    "kernel-domain entitlements which is forbidden\n");
2590 			return false;
2591 		}
2592 
2593 		return has_conclave_host;
2594 	}
2595 
2596 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2597 		return has_entitlement_vnode(vnode, off, priv,
2598 		           "com.apple.private.exclaves.conclave-spawn");
2599 
2600 	default:
2601 		panic("bad exclaves privilege (%u)", priv);
2602 	}
2603 }
2604 
2605 
2606 #pragma mark exclaves stackshot range
2607 
2608 /* Unslid pointers defining the range of code which switches threads into
2609  * secure world */
2610 uintptr_t exclaves_enter_range_start;
2611 uintptr_t exclaves_enter_range_end;
2612 
2613 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2614 uintptr_t exclaves_scheduler_request_range_start;
2615 uintptr_t exclaves_scheduler_request_range_end;
2616 
2617 
2618 __startup_func
2619 static void
initialize_exclaves_ranges(void)2620 initialize_exclaves_ranges(void)
2621 {
2622 	exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2623 	assert3u(exclaves_enter_range_start, !=, 0);
2624 	exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2625 	assert3u(exclaves_enter_range_end, !=, 0);
2626 
2627 	exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2628 	assert3u(exclaves_scheduler_request_range_start, !=, 0);
2629 	exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2630 	assert3u(exclaves_scheduler_request_range_end, !=, 0);
2631 }
2632 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2633 
2634 /*
2635  * Return true if the specified address is in exclaves_enter.
2636  */
2637 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2638 exclaves_enter_in_range(uintptr_t addr, bool slid)
2639 {
2640 	return slid ?
2641 	       exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2642 	       exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2643 }
2644 
2645 /*
2646  * Return true if the specified address is in scheduler request handlers.
2647  */
2648 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2649 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2650 {
2651 	return slid ?
2652 	       exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2653 	       exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2654 }
2655 
2656 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2657 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2658 {
2659 	size_t i = 0;
2660 
2661 	// Check for a frame matching scheduler request range
2662 	for (i = 0; i < nframes; i++) {
2663 		if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2664 			break;
2665 		}
2666 	}
2667 
2668 	// Insert exclaves stacks before the scheduler request frame
2669 	if (i < nframes) {
2670 		return (uint32_t)(i + 1);
2671 	}
2672 
2673 	// Check for a frame matching upcall code range
2674 	for (i = 0; i < nframes; i++) {
2675 		if (exclaves_upcall_in_range(addr[i], slid)) {
2676 			break;
2677 		}
2678 	}
2679 
2680 	// Insert exclaves stacks before the upcall frame when found
2681 	if (i < nframes) {
2682 		return (uint32_t)(i + 1);
2683 	}
2684 
2685 	// Check for a frame matching exclaves enter range
2686 	for (i = 0; i < nframes; i++) {
2687 		if (exclaves_enter_in_range(addr[i], slid)) {
2688 			break;
2689 		}
2690 	}
2691 
2692 	// Put exclaves stacks on top of kernel stacks by default
2693 	if (i == nframes) {
2694 		i = 0;
2695 	}
2696 	return (uint32_t)i;
2697 }
2698 
2699 #if DEVELOPMENT || DEBUG
2700 
2701 /* Tweak the set of relaxed requirements on startup. */
2702 __startup_func
2703 static void
exclaves_requirement_startup(void)2704 exclaves_requirement_startup(void)
2705 {
2706 	/*
2707 	 * The medium-term plan is that the boot-arg controlling entitlements
2708 	 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2709 	 * Until that happens, for historical reasons, if the entitlement
2710 	 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2711 	 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2712 	 */
2713 	if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2714 		exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2715 	}
2716 
2717 	exclaves_requirement_relax(EXCLAVES_R_EIC);
2718 }
2719 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2720 
2721 #endif /* DEVELOPMENT || DEBUG */
2722 
2723 #endif /* CONFIG_EXCLAVES */
2724 
2725 
2726 #ifndef CONFIG_EXCLAVES
2727 /* stubs for sensor functions which are not compiled in from exclaves.c when
2728  * CONFIG_EXCLAVE is disabled */
2729 
2730 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2731 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2732     exclaves_sensor_status_t *status)
2733 {
2734 #pragma unused(sensor_type, flags, status)
2735 	return KERN_NOT_SUPPORTED;
2736 }
2737 
2738 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2739 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2740     exclaves_sensor_status_t *status)
2741 {
2742 #pragma unused(sensor_type, flags, status)
2743 	return KERN_NOT_SUPPORTED;
2744 }
2745 
2746 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2747 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2748     exclaves_sensor_status_t *status)
2749 {
2750 #pragma unused(sensor_type, flags, status)
2751 	return KERN_NOT_SUPPORTED;
2752 }
2753 
2754 #endif /* ! CONFIG_EXCLAVES */
2755