xref: /xnu-11215.1.10/osfmk/kern/exclaves.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35 
36 #if CONFIG_EXCLAVES
37 
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #include <arm64/hv/hv_vm.h>
41 #include <arm64/hv/hv_vcpu.h>
42 #else
43 #error Invalid configuration
44 #endif /* CONFIG_SPTM */
45 
46 #include <arm/cpu_data_internal.h>
47 #include <arm/misc_protos.h>
48 #include <kern/epoch_sync.h>
49 #include <kern/ipc_kobject.h>
50 #include <kern/kalloc.h>
51 #include <kern/locks.h>
52 #include <kern/percpu.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
55 #include <kern/zalloc.h>
56 #include <kern/exclaves_stackshot.h>
57 #include <kern/exclaves_test_stackshot.h>
58 #include <vm/pmap.h>
59 #include <pexpert/pexpert.h>
60 #include <pexpert/device_tree.h>
61 
62 #include <mach/exclaves_l4.h>
63 #include <mach/mach_port.h>
64 
65 #include <Exclaves/Exclaves.h>
66 
67 #include <IOKit/IOBSD.h>
68 
69 #include <xnuproxy/messages.h>
70 
71 #include "exclaves_debug.h"
72 #include "exclaves_panic.h"
73 #include "exclaves_xnuproxy.h"
74 
75 /* External & generated headers */
76 #include <xrt_hosted_types/types.h>
77 
78 #if __has_include(<Tightbeam/tightbeam.h>)
79 #include <Tightbeam/tightbeam.h>
80 #include <Tightbeam/tightbeam_private.h>
81 #endif
82 
83 #include "exclaves_resource.h"
84 #include "exclaves_upcalls.h"
85 #include "exclaves_boot.h"
86 #include "exclaves_inspection.h"
87 #include "exclaves_memory.h"
88 #include "exclaves_internal.h"
89 
90 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
91 
92 /* Boot lock - only used here for assertions. */
93 extern lck_mtx_t exclaves_boot_lock;
94 
95 /*
96  * Control access to exclaves. Multicore support is learned at runtime.
97  */
98 static LCK_MTX_DECLARE(exclaves_scheduler_lock, &exclaves_lck_grp);
99 static bool exclaves_multicore;
100 #if DEVELOPMENT || DEBUG
101 /* boot-arg to control use of the exclaves_scheduler_lock independently of
102  * whether exclaves multicore support is enabled */
103 static TUNABLE(bool, exclaves_smp_enabled, "exclaves_smp", true);
104 #else
105 #define exclaves_smp_enabled true
106 #endif
107 
108 /*
109  * Sent/latest offset for updating exclaves clocks
110  */
111 typedef struct {
112 	union {
113 		/* atomic fields are used via atomic primitives */
114 		struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
115 		_Atomic unsigned __int128 a_u128;
116 		/* non-atomic fields are used via local variable. this is needed to
117 		 * avoid undefined behavior with an atomic struct or accessing atomic
118 		 * fields non-atomically */
119 		struct { uint64_t sent_offset, latest_offset; } u64;
120 		unsigned __int128 u128;
121 	};
122 } exclaves_clock_t;
123 
124 static exclaves_clock_t exclaves_absolute_clock, exclaves_continuous_clock;
125 
126 static kern_return_t
127 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
128 
129 static kern_return_t
130 exclaves_enter(void);
131 static kern_return_t
132 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
133 
134 static kern_return_t
135 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
136 OS_NORETURN OS_NOINLINE
137 static void
138 exclaves_wait_for_panic(void);
139 
140 static bool
141 exclaves_clock_needs_update(const exclaves_clock_t *clock);
142 static kern_return_t
143 exclaves_clock_update(exclaves_clock_t *clock, XrtHosted_Buffer_t *save_out_ptr, XrtHosted_Buffer_t *save_in_ptr);
144 
145 static kern_return_t
146 exclaves_scheduler_boot(void);
147 
148 static kern_return_t
149 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
150 
151 /*
152  * A static set of exclave epoch counters.
153  */
154 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
155 
os_atomic(uint64_t)156 static inline os_atomic(uint64_t) *
157 exclaves_get_queue_counter(const uint64_t id)
158 {
159 	return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
160 }
161 
os_atomic(uint64_t)162 static inline os_atomic(uint64_t) *
163 exclaves_get_thread_counter(const uint64_t id)
164 {
165 	return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
166 }
167 
168 
169 /* -------------------------------------------------------------------------- */
170 #pragma mark exclaves debug configuration
171 
172 #if DEVELOPMENT || DEBUG
173 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
174     exclaves_debug_show_errors);
175 
176 TUNABLE_DT(exclaves_requirement_t, exclaves_relaxed_requirements, "/defaults",
177     "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements", 0,
178     TUNABLE_DT_NONE);
179 #else
180 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
181 #endif
182 
183 #endif /* CONFIG_EXCLAVES */
184 
185 /* -------------------------------------------------------------------------- */
186 #pragma mark userspace entry point
187 
188 #if CONFIG_EXCLAVES
189 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)190 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
191 {
192 	if (name != MACH_PORT_NULL) {
193 		/* Only accept MACH_PORT_NULL for now */
194 		return KERN_INVALID_CAPABILITY;
195 	}
196 
197 	/*
198 	 * As the boot operation itself happens outside the context of any
199 	 * conclave, it requires special privilege.
200 	 */
201 	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
202 		return KERN_DENIED;
203 	}
204 
205 	return exclaves_boot(stage);
206 }
207 #endif /* CONFIG_EXCLAVES */
208 
209 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)210 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
211 {
212 #if CONFIG_EXCLAVES
213 	kern_return_t kr = KERN_SUCCESS;
214 	int error = 0;
215 
216 	mach_port_name_t name = uap->name;
217 	exclaves_id_t identifier = uap->identifier;
218 	mach_vm_address_t ubuffer = uap->buffer;
219 	mach_vm_size_t usize = uap->size;
220 	mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
221 	mach_vm_size_t usize2 = uap->size2;
222 	mach_vm_size_t uoffset2 = uap->offset;
223 	task_t task = current_task();
224 
225 	/*
226 	 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
227 	 * with xnu. That header is not shared with userspace. Make sure that
228 	 * the retval userspace picks up is the same as the one
229 	 * xnu/ExclavePlatform thinks it is.
230 	 */
231 	assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
232 	    &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
233 
234 	uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
235 	uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
236 	if (flags != 0) {
237 		return KERN_INVALID_ARGUMENT;
238 	}
239 
240 	/*
241 	 * Deal with OP_BOOT up-front as it has slightly different restrictions
242 	 * than the other operations.
243 	 */
244 	if (operation == EXCLAVES_CTL_OP_BOOT) {
245 		return operation_boot(name, (uint32_t)identifier);
246 	}
247 
248 	/*
249 	 * All other operations are restricted to properly entitled tasks which
250 	 * can operate in the kernel domain, or those which have joined
251 	 * conclaves (which has its own entitlement check).
252 	 * If requirements are relaxed during development, tasks with no
253 	 * conclaves are also allowed.
254 	 */
255 	if (task_get_conclave(task) == NULL &&
256 	    !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
257 	    !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
258 		return KERN_DENIED;
259 	}
260 
261 	/*
262 	 * Wait for STAGE_2 boot to complete. If exclaves are unsupported,
263 	 * return immediately,.
264 	 */
265 	kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_2);
266 	if (kr != KERN_SUCCESS) {
267 		return kr;
268 	}
269 
270 	if (task_get_conclave(task) != NULL) {
271 		/*
272 		 * For calls from tasks that have joined conclaves, now wait until
273 		 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
274 		 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would panic).
275 		 * For testing purposes, continue even if EXCLAVEKIT fails. This is a
276 		 * separate call to the one above because we need to distinguish
277 		 * STAGE_2 NOT SUPPORTED and still wait for EXCLAVEKIT to boot if it
278 		 * *is* supported.
279 		 */
280 		(void) exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
281 	}
282 
283 	switch (operation) {
284 	case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
285 		if (name != MACH_PORT_NULL) {
286 			/* Only accept MACH_PORT_NULL for now */
287 			return KERN_INVALID_CAPABILITY;
288 		}
289 		if (ubuffer == USER_ADDR_NULL || usize == 0 ||
290 		    usize != Exclaves_L4_IpcBuffer_Size) {
291 			return KERN_INVALID_ARGUMENT;
292 		}
293 
294 
295 		Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
296 		/* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
297 		if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
298 			return error;
299 		}
300 		assert(ipcb != NULL);
301 		if ((error = copyin(ubuffer, ipcb, usize))) {
302 			return error;
303 		}
304 
305 		if (identifier >= CONCLAVE_SERVICE_MAX) {
306 			return KERN_INVALID_ARGUMENT;
307 		}
308 
309 		/*
310 		 * Verify that the service actually exists in the current
311 		 * domain.
312 		 */
313 		if (!exclaves_conclave_has_service(task_get_conclave(task),
314 		    identifier)) {
315 			return KERN_INVALID_ARGUMENT;
316 		}
317 
318 		kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
319 		error = copyout(ipcb, ubuffer, usize);
320 		/*
321 		 * Endpoint call to conclave may have trigger a stop upcall,
322 		 * check if stop upcall completion handler needs to run.
323 		 */
324 		task_stop_conclave_upcall_complete();
325 		if (error) {
326 			return error;
327 		}
328 		break;
329 	}
330 
331 	case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
332 		if (name != MACH_PORT_NULL) {
333 			/* Only accept MACH_PORT_NULL for now */
334 			return KERN_INVALID_CAPABILITY;
335 		}
336 
337 		size_t len = 0;
338 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
339 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
340 		    &len) != 0 || id_name[0] == '\0') {
341 			return KERN_INVALID_ARGUMENT;
342 		}
343 
344 		exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
345 		const exclaves_buffer_perm_t supported =
346 		    EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
347 		if ((perm & supported) == 0 || (perm & ~supported) != 0) {
348 			return KERN_INVALID_ARGUMENT;
349 		}
350 
351 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
352 		const bool new_api =
353 		    (perm == EXCLAVES_BUFFER_PERM_READ) ||
354 		    (perm == EXCLAVES_BUFFER_PERM_WRITE);
355 		const bool shared_mem_available =
356 		    exclaves_resource_lookup_by_name(domain, id_name,
357 		    XNUPROXY_RESOURCETYPE_SHAREDMEMORY) != NULL;
358 		const bool use_shared_mem = new_api && shared_mem_available;
359 
360 		exclaves_resource_t *resource = NULL;
361 		kr = use_shared_mem ?
362 		    exclaves_resource_shared_memory_map(domain, id_name, usize, perm, &resource) :
363 		    exclaves_named_buffer_map(domain, id_name, usize, perm, &resource);
364 		if (kr != KERN_SUCCESS) {
365 			return kr;
366 		}
367 
368 		kr = exclaves_resource_create_port_name(resource,
369 		    current_space(), &name);
370 		if (kr != KERN_SUCCESS) {
371 			return kr;
372 		}
373 
374 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
375 		if (kr != KERN_SUCCESS) {
376 			mach_port_deallocate(current_space(), name);
377 			return kr;
378 		}
379 
380 		break;
381 	}
382 
383 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
384 		exclaves_resource_t *resource = NULL;
385 		kr = exclaves_resource_from_port_name(current_space(), name,
386 		    &resource);
387 		if (kr != KERN_SUCCESS) {
388 			return kr;
389 		}
390 
391 		switch (resource->r_type) {
392 		case XNUPROXY_RESOURCETYPE_NAMEDBUFFER:
393 			kr = exclaves_named_buffer_copyin(resource, ubuffer,
394 			    usize, uoffset, usize2, uoffset2);
395 			break;
396 
397 		case XNUPROXY_RESOURCETYPE_SHAREDMEMORY:
398 			kr = exclaves_resource_shared_memory_copyin(resource,
399 			    ubuffer, usize, uoffset, usize2, uoffset2);
400 			break;
401 
402 		default:
403 			exclaves_resource_release(resource);
404 			return KERN_INVALID_CAPABILITY;
405 		}
406 
407 		exclaves_resource_release(resource);
408 
409 		if (kr != KERN_SUCCESS) {
410 			return kr;
411 		}
412 		break;
413 	}
414 
415 	case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
416 		exclaves_resource_t *resource = NULL;
417 		kr = exclaves_resource_from_port_name(current_space(), name,
418 		    &resource);
419 		if (kr != KERN_SUCCESS) {
420 			return kr;
421 		}
422 
423 		switch (resource->r_type) {
424 		case XNUPROXY_RESOURCETYPE_NAMEDBUFFER:
425 			kr = exclaves_named_buffer_copyout(resource, ubuffer,
426 			    usize, uoffset, usize2, uoffset2);
427 			break;
428 
429 		case XNUPROXY_RESOURCETYPE_SHAREDMEMORY:
430 			kr = exclaves_resource_shared_memory_copyout(resource,
431 			    ubuffer, usize, uoffset, usize2, uoffset2);
432 			break;
433 
434 		default:
435 			exclaves_resource_release(resource);
436 			return KERN_INVALID_CAPABILITY;
437 		}
438 
439 		exclaves_resource_release(resource);
440 
441 		if (kr != KERN_SUCCESS) {
442 			return kr;
443 		}
444 		break;
445 	}
446 
447 	case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
448 		if (name != MACH_PORT_NULL) {
449 			/* Only accept MACH_PORT_NULL for now */
450 			return KERN_INVALID_CAPABILITY;
451 		}
452 		kr = task_launch_conclave(name);
453 
454 		/*
455 		 * Conclave launch call to may have trigger a stop upcall,
456 		 * check if stop upcall completion handler needs to run.
457 		 */
458 		task_stop_conclave_upcall_complete();
459 		break;
460 
461 	case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
462 		if (name != MACH_PORT_NULL) {
463 			/* Only accept MACH_PORT_NULL for now */
464 			return KERN_INVALID_CAPABILITY;
465 		}
466 		struct exclaves_resource_user uresource = {};
467 
468 		if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
469 		    (usize % sizeof(struct exclaves_resource_user) != 0)) {
470 			return KERN_INVALID_ARGUMENT;
471 		}
472 
473 		if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
474 		    (usize == 0 && ubuffer != USER_ADDR_NULL)) {
475 			return KERN_INVALID_ARGUMENT;
476 		}
477 
478 		if (ubuffer == USER_ADDR_NULL) {
479 			return KERN_INVALID_ARGUMENT;
480 		}
481 
482 		/* For the moment we only ever have to deal with one request. */
483 		if (usize != sizeof(struct exclaves_resource_user)) {
484 			return KERN_INVALID_ARGUMENT;
485 		}
486 		error = copyin(ubuffer, &uresource, usize);
487 		if (error) {
488 			return KERN_INVALID_ARGUMENT;
489 		}
490 
491 		const size_t name_buf_len = sizeof(uresource.r_name);
492 		if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
493 			return KERN_INVALID_ARGUMENT;
494 		}
495 
496 		/*
497 		 * Do the regular lookup first. If that fails, fallback to the
498 		 * DARWIN domain, finally fallback to the KERNEL domain.
499 		 */
500 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
501 		uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
502 
503 		if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
504 		    exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
505 			if (id == EXCLAVES_INVALID_ID) {
506 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
507 				    uresource.r_name);
508 			}
509 			if (id == EXCLAVES_INVALID_ID) {
510 				id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
511 				    uresource.r_name);
512 			}
513 		}
514 
515 		if (id == EXCLAVES_INVALID_ID) {
516 			return KERN_NOT_FOUND;
517 		}
518 
519 		uresource.r_id = id;
520 		uresource.r_port = MACH_PORT_NULL;
521 
522 		error = copyout(&uresource, ubuffer, usize);
523 		if (error) {
524 			return KERN_INVALID_ADDRESS;
525 		}
526 
527 		kr = KERN_SUCCESS;
528 		break;
529 	}
530 
531 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
532 		if (identifier == 0) {
533 			return KERN_INVALID_ARGUMENT;
534 		}
535 
536 		/* copy in string name */
537 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
538 		size_t done = 0;
539 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
540 			return KERN_INVALID_ARGUMENT;
541 		}
542 
543 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
544 		const bool use_audio_memory =
545 		    exclaves_resource_lookup_by_name(domain, id_name,
546 		    XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) != NULL;
547 		exclaves_resource_t *resource = NULL;
548 		kr = use_audio_memory ?
549 		    exclaves_resource_audio_memory_map(domain, id_name, usize, &resource) :
550 		    exclaves_audio_buffer_map(domain, id_name, usize, &resource);
551 		if (kr != KERN_SUCCESS) {
552 			return kr;
553 		}
554 
555 		kr = exclaves_resource_create_port_name(resource, current_space(),
556 		    &name);
557 		if (kr != KERN_SUCCESS) {
558 			return kr;
559 		}
560 
561 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
562 		if (kr != KERN_SUCCESS) {
563 			mach_port_deallocate(current_space(), name);
564 			return kr;
565 		}
566 
567 		break;
568 	}
569 
570 	case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
571 		exclaves_resource_t *resource;
572 
573 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
574 		if (kr != KERN_SUCCESS) {
575 			return kr;
576 		}
577 
578 		switch (resource->r_type) {
579 		case XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOBUFFER:
580 			kr = exclaves_audio_buffer_copyout(resource, ubuffer,
581 			    usize, uoffset, usize2, uoffset2);
582 			break;
583 
584 		case XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY:
585 			kr = exclaves_resource_audio_memory_copyout(resource,
586 			    ubuffer, usize, uoffset, usize2, uoffset2);
587 			break;
588 
589 		default:
590 			exclaves_resource_release(resource);
591 			return KERN_INVALID_CAPABILITY;
592 		}
593 
594 		exclaves_resource_release(resource);
595 
596 		if (kr != KERN_SUCCESS) {
597 			return kr;
598 		}
599 
600 		break;
601 	}
602 
603 	case EXCLAVES_CTL_OP_SENSOR_CREATE: {
604 		if (identifier == 0) {
605 			return KERN_INVALID_ARGUMENT;
606 		}
607 
608 		/* copy in string name */
609 		char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
610 		size_t done = 0;
611 		if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
612 			return KERN_INVALID_ARGUMENT;
613 		}
614 
615 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
616 		exclaves_resource_t *resource = NULL;
617 		kr = exclaves_resource_sensor_open(domain, id_name, &resource);
618 		if (kr != KERN_SUCCESS) {
619 			return kr;
620 		}
621 
622 		kr = exclaves_resource_create_port_name(resource, current_space(),
623 		    &name);
624 		if (kr != KERN_SUCCESS) {
625 			return kr;
626 		}
627 
628 		kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
629 		if (kr != KERN_SUCCESS) {
630 			/* No senders drops the reference. */
631 			mach_port_deallocate(current_space(), name);
632 			return kr;
633 		}
634 
635 		break;
636 	}
637 
638 	case EXCLAVES_CTL_OP_SENSOR_START: {
639 		exclaves_resource_t *resource;
640 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
641 		if (kr != KERN_SUCCESS) {
642 			return kr;
643 		}
644 
645 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
646 			exclaves_resource_release(resource);
647 			return KERN_FAILURE;
648 		}
649 
650 		exclaves_sensor_status_t status;
651 		kr = exclaves_resource_sensor_start(resource, identifier, &status);
652 
653 		exclaves_resource_release(resource);
654 
655 		if (kr != KERN_SUCCESS) {
656 			return kr;
657 		}
658 
659 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
660 
661 		break;
662 	}
663 	case EXCLAVES_CTL_OP_SENSOR_STOP: {
664 		exclaves_resource_t *resource;
665 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
666 		if (kr != KERN_SUCCESS) {
667 			return kr;
668 		}
669 
670 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
671 			exclaves_resource_release(resource);
672 			return KERN_FAILURE;
673 		}
674 
675 		exclaves_sensor_status_t status;
676 		kr = exclaves_resource_sensor_stop(resource, identifier, &status);
677 
678 		exclaves_resource_release(resource);
679 
680 		if (kr != KERN_SUCCESS) {
681 			return kr;
682 		}
683 
684 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
685 
686 		break;
687 	}
688 	case EXCLAVES_CTL_OP_SENSOR_STATUS: {
689 		exclaves_resource_t *resource;
690 		kr = exclaves_resource_from_port_name(current_space(), name, &resource);
691 		if (kr != KERN_SUCCESS) {
692 			return kr;
693 		}
694 
695 		if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
696 			exclaves_resource_release(resource);
697 			return KERN_FAILURE;
698 		}
699 
700 
701 		exclaves_sensor_status_t status;
702 		kr = exclaves_resource_sensor_status(resource, identifier, &status);
703 
704 		exclaves_resource_release(resource);
705 
706 		if (kr != KERN_SUCCESS) {
707 			return kr;
708 		}
709 
710 		kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
711 		break;
712 	}
713 	case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
714 		exclaves_resource_t *notification_resource = NULL;
715 		mach_port_name_t port_name = MACH_PORT_NULL;
716 
717 		struct exclaves_resource_user *notification_resource_user = NULL;
718 		if (usize != sizeof(struct exclaves_resource_user)) {
719 			return KERN_INVALID_ARGUMENT;
720 		}
721 
722 		if (ubuffer == USER_ADDR_NULL) {
723 			return KERN_INVALID_ARGUMENT;
724 		}
725 
726 		notification_resource_user = (struct exclaves_resource_user *)
727 		    kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
728 
729 		error = copyin(ubuffer, notification_resource_user, usize);
730 		if (error) {
731 			kr = KERN_INVALID_ARGUMENT;
732 			goto notification_resource_lookup_out;
733 		}
734 
735 		const size_t name_buf_len = sizeof(notification_resource_user->r_name);
736 		if (strnlen(notification_resource_user->r_name, name_buf_len)
737 		    == name_buf_len) {
738 			kr = KERN_INVALID_ARGUMENT;
739 			goto notification_resource_lookup_out;
740 		}
741 
742 		const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
743 		kr = exclaves_notification_create(domain,
744 		    notification_resource_user->r_name, &notification_resource);
745 		if (kr != KERN_SUCCESS) {
746 			goto notification_resource_lookup_out;
747 		}
748 
749 		kr = exclaves_resource_create_port_name(notification_resource,
750 		    current_space(), &port_name);
751 		if (kr != KERN_SUCCESS) {
752 			goto notification_resource_lookup_out;
753 		}
754 		notification_resource_user->r_type = notification_resource->r_type;
755 		notification_resource_user->r_id = notification_resource->r_id;
756 		notification_resource_user->r_port = port_name;
757 		error = copyout(notification_resource_user, ubuffer, usize);
758 		if (error) {
759 			kr = KERN_INVALID_ADDRESS;
760 			goto notification_resource_lookup_out;
761 		}
762 
763 notification_resource_lookup_out:
764 		if (notification_resource_user != NULL) {
765 			kfree_data(notification_resource_user, usize);
766 		}
767 		if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
768 			mach_port_deallocate(current_space(), port_name);
769 		}
770 		break;
771 	}
772 
773 	default:
774 		kr = KERN_INVALID_ARGUMENT;
775 		break;
776 	}
777 
778 	return kr;
779 #else /* CONFIG_EXCLAVES */
780 #pragma unused(uap)
781 	return KERN_NOT_SUPPORTED;
782 #endif /* CONFIG_EXCLAVES */
783 }
784 
785 /* -------------------------------------------------------------------------- */
786 #pragma mark kernel entry points
787 
788 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)789 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
790     exclaves_tag_t *tag, exclaves_error_t *error)
791 {
792 #if CONFIG_EXCLAVES
793 	kern_return_t kr = KERN_SUCCESS;
794 	assert(port == IPC_PORT_NULL);
795 
796 	Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
797 	assert(ipcb != NULL);
798 
799 	exclaves_debug_printf(show_progress,
800 	    "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
801 	    endpoint_id, *tag);
802 
803 	ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
804 	kr = exclaves_endpoint_call_internal(port, endpoint_id);
805 	*tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
806 	*error = XNUPROXY_CR_RETVAL(ipcb);
807 
808 	exclaves_debug_printf(show_progress,
809 	    "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
810 	    "error 0x%llx\n", endpoint_id, *tag, *error);
811 
812 	return kr;
813 #else /* CONFIG_EXCLAVES */
814 #pragma unused(port, endpoint_id, tag, error)
815 	return KERN_NOT_SUPPORTED;
816 #endif /* CONFIG_EXCLAVES */
817 }
818 
819 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)820 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
821 {
822 #if CONFIG_EXCLAVES
823 	kern_return_t kr = KERN_SUCCESS;
824 	thread_t thread = current_thread();
825 
826 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
827 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
828 		kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
829 		if (kr != KERN_SUCCESS) {
830 			return kr;
831 		}
832 		assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
833 	}
834 	thread->th_exclaves_ipc_ctx.usecnt++;
835 
836 	if (out_ipc_buffer != NULL) {
837 		*out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
838 	}
839 	return KERN_SUCCESS;
840 #else /* CONFIG_EXCLAVES */
841 #pragma unused(out_ipc_buffer)
842 	return KERN_NOT_SUPPORTED;
843 #endif /* CONFIG_EXCLAVES */
844 }
845 
846 kern_return_t
exclaves_free_ipc_buffer(void)847 exclaves_free_ipc_buffer(void)
848 {
849 #if CONFIG_EXCLAVES
850 
851 	/* The inspection thread's cached buffer should never be freed */
852 	thread_t thread = current_thread();
853 
854 	/* Don't try to free unallocated contexts. */
855 	if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
856 		return KERN_SUCCESS;
857 	}
858 
859 	const thread_exclaves_inspection_flags_t iflags =
860 	    os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
861 	if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
862 		return KERN_SUCCESS;
863 	}
864 
865 	assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
866 	if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
867 		return KERN_SUCCESS;
868 	}
869 
870 	return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
871 #else /* CONFIG_EXCLAVES */
872 	return KERN_NOT_SUPPORTED;
873 #endif /* CONFIG_EXCLAVES */
874 }
875 
876 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)877 exclaves_thread_terminate(__unused thread_t thread)
878 {
879 	kern_return_t kr = KERN_SUCCESS;
880 
881 #if CONFIG_EXCLAVES
882 	assert(thread == current_thread());
883 	assert(thread->th_exclaves_intstate == 0);
884 	assert(thread->th_exclaves_state == 0);
885 	if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
886 		exclaves_debug_printf(show_progress,
887 		    "exclaves: thread_terminate freeing abandoned exclaves "
888 		    "ipc buffer\n");
889 		/* Unconditionally free context irrespective of usecount */
890 		thread->th_exclaves_ipc_ctx.usecnt = 0;
891 		kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
892 		assert(kr == KERN_SUCCESS);
893 	}
894 #else
895 #pragma unused(thread)
896 #endif /* CONFIG_EXCLAVES */
897 
898 	return kr;
899 }
900 
901 OS_CONST
902 void*
exclaves_get_ipc_buffer(void)903 exclaves_get_ipc_buffer(void)
904 {
905 #if CONFIG_EXCLAVES
906 	thread_t thread = current_thread();
907 	Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
908 
909 	return ipcb;
910 #else /* CONFIG_EXCLAVES */
911 	return NULL;
912 #endif /* CONFIG_EXCLAVES */
913 }
914 
915 #if CONFIG_EXCLAVES
916 
917 static void
bind_to_boot_core(void)918 bind_to_boot_core(void)
919 {
920 	/*
921 	 * First ensure the boot cluster isn't powered down preventing the
922 	 * thread from running at all.
923 	 */
924 	suspend_cluster_powerdown();
925 	const int cpu = ml_get_boot_cpu_number();
926 	processor_t processor = cpu_to_processor(cpu);
927 	assert3p(processor, !=, NULL);
928 	__assert_only processor_t old = thread_bind(processor);
929 	assert3p(old, ==, PROCESSOR_NULL);
930 	thread_block(THREAD_CONTINUE_NULL);
931 }
932 
933 static void
unbind_from_boot_core(void)934 unbind_from_boot_core(void)
935 {
936 	/* Unbind the thread from the boot CPU. */
937 	thread_bind(PROCESSOR_NULL);
938 	thread_block(THREAD_CONTINUE_NULL);
939 	resume_cluster_powerdown();
940 }
941 
942 extern kern_return_t exclaves_boot_early(void);
943 kern_return_t
exclaves_boot_early(void)944 exclaves_boot_early(void)
945 {
946 	kern_return_t kr = KERN_FAILURE;
947 	uint64_t boot_info = 0;
948 	bool early_enter = false;
949 
950 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
951 
952 	kr = exclaves_bootinfo(&boot_info, &early_enter);
953 	if (kr != KERN_SUCCESS) {
954 		exclaves_debug_printf(show_errors,
955 		    "exclaves: Get bootinfo failed\n");
956 		return kr;
957 	}
958 
959 	if (early_enter) {
960 		thread_t thread = current_thread();
961 		assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
962 
963 		bind_to_boot_core();
964 
965 		disable_preemption_without_measurements();
966 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
967 
968 		kr = exclaves_enter();
969 
970 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
971 		enable_preemption();
972 
973 		unbind_from_boot_core();
974 
975 		if (kr != KERN_SUCCESS) {
976 			exclaves_debug_printf(show_errors,
977 			    "exclaves: early exclaves enter failed\n");
978 			if (kr == KERN_ABORTED) {
979 				panic("Unexpected ringgate panic status");
980 			}
981 			return kr;
982 		}
983 	}
984 
985 	uint64_t xnuproxy_boot_info = 0;
986 	kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
987 	if (kr != KERN_SUCCESS) {
988 		exclaves_debug_printf(show_errors,
989 		    "exclaves: Init scheduler failed\n");
990 		return kr;
991 	}
992 
993 	kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
994 	if (kr != KERN_SUCCESS) {
995 		exclaves_debug_printf(show_errors,
996 		    "XNU proxy setup failed\n");
997 		return KERN_FAILURE;
998 	}
999 
1000 	kr = exclaves_panic_thread_setup();
1001 	if (kr != KERN_SUCCESS) {
1002 		exclaves_debug_printf(show_errors,
1003 		    "XNU proxy panic thread setup failed\n");
1004 		return KERN_FAILURE;
1005 	}
1006 
1007 	kr = exclaves_resource_init();
1008 	if (kr != KERN_SUCCESS) {
1009 		exclaves_debug_printf(show_errors,
1010 		    "exclaves: failed to initialize resources\n");
1011 		return kr;
1012 	}
1013 
1014 	return KERN_SUCCESS;
1015 }
1016 #endif /* CONFIG_EXCLAVES */
1017 
1018 #if CONFIG_EXCLAVES
1019 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
1020 #endif /* CONFIG_EXCLAVES */
1021 
1022 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)1023 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
1024 {
1025 #if CONFIG_EXCLAVES
1026 	if (exclaves_callbacks == NULL) {
1027 		exclaves_callbacks = callbacks;
1028 	}
1029 #else /* CONFIG_EXCLAVES */
1030 #pragma unused(callbacks)
1031 #endif /* CONFIG_EXCLAVES */
1032 }
1033 
1034 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1035 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1036 {
1037 #if CONFIG_EXCLAVES
1038 	exclaves_clock_t *clock = (type == EXCLAVES_CLOCK_ABSOLUTE ?
1039 	    &exclaves_absolute_clock : &exclaves_continuous_clock);
1040 	uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1041 	while (latest_offset < offset) {
1042 		/* Update the latest offset with the new offset. If this fails, then a
1043 		 * concurrent update occurred and our offset may be stale. */
1044 		if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1045 		    offset, &latest_offset, relaxed)) {
1046 			break;
1047 		}
1048 	}
1049 #else
1050 #pragma unused(type, offset)
1051 #endif /* CONFIG_EXCLAVES */
1052 }
1053 
1054 /* -------------------------------------------------------------------------- */
1055 
1056 #pragma mark exclaves ipc internals
1057 
1058 #if CONFIG_EXCLAVES
1059 
1060 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1061 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1062     exclaves_id_t endpoint_id)
1063 {
1064 	kern_return_t kr = KERN_SUCCESS;
1065 
1066 	assert(port == IPC_PORT_NULL);
1067 
1068 	kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1069 
1070 	return kr;
1071 }
1072 
1073 /* -------------------------------------------------------------------------- */
1074 #pragma mark secure kernel communication
1075 
1076 /* ringgate entry endpoints */
1077 enum {
1078 	RINGGATE_EP_ENTER,
1079 	RINGGATE_EP_INFO
1080 };
1081 
1082 /* ringgate entry status codes */
1083 enum {
1084 	RINGGATE_STATUS_SUCCESS,
1085 	RINGGATE_STATUS_ERROR,
1086 	RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1087 };
1088 
1089 OS_NOINLINE
1090 static kern_return_t
exclaves_enter(void)1091 exclaves_enter(void)
1092 {
1093 	uint32_t endpoint = RINGGATE_EP_ENTER;
1094 	uint64_t result = RINGGATE_STATUS_ERROR;
1095 
1096 	sptm_call_regs_t regs = { };
1097 
1098 	__assert_only thread_t thread = current_thread();
1099 
1100 	/*
1101 	 * Should never re-enter exclaves.
1102 	 */
1103 	if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1104 	    (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1105 		panic("attempt to re-enter exclaves");
1106 	}
1107 
1108 	/*
1109 	 * Must have one (and only one) of the flags set to enter exclaves.
1110 	 */
1111 	__assert_only const thread_exclaves_state_flags_t mask = (
1112 		TH_EXCLAVES_RPC |
1113 		TH_EXCLAVES_XNUPROXY |
1114 		TH_EXCLAVES_SCHEDULER_CALL);
1115 	assert3u(thread->th_exclaves_state & mask, !=, 0);
1116 	assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1117 
1118 #if MACH_ASSERT
1119 	/*
1120 	 * Set the ast to check that the thread doesn't return to userspace
1121 	 * while in an RPC or XNUPROXY call.
1122 	 */
1123 	act_set_debug_assert();
1124 #endif /* MACH_ASSERT */
1125 
1126 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1127 	    | DBG_FUNC_START);
1128 
1129 	recount_enter_secure();
1130 
1131 	/* xnu_return_to_gl2 relies on this flag being present to correctly return
1132 	 * to SK from interrupts xnu handles on behalf of SK. */
1133 	thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1134 
1135 	/*
1136 	 * Bracket with labels so stackshot can determine where exclaves are
1137 	 * entered from xnu.
1138 	 */
1139 	__asm__ volatile (
1140             "EXCLAVES_ENTRY_START: nop\n\t"
1141         );
1142 	result = sk_enter(endpoint, &regs);
1143 	__asm__ volatile (
1144             "EXCLAVES_ENTRY_END: nop\n\t"
1145         );
1146 
1147 	thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1148 
1149 	recount_leave_secure();
1150 
1151 #if CONFIG_SPTM
1152 	/**
1153 	 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1154 	 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1155 	 * debug state forcing a reload on the next return to user mode.
1156 	 */
1157 	if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1158 		arm_debug_set(NULL);
1159 	}
1160 #endif /* CONFIG_SPTM */
1161 
1162 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1163 	    | DBG_FUNC_END);
1164 
1165 	switch (result) {
1166 	case RINGGATE_STATUS_SUCCESS:
1167 		return KERN_SUCCESS;
1168 	case RINGGATE_STATUS_ERROR:
1169 		return KERN_FAILURE;
1170 	case RINGGATE_STATUS_PANIC:
1171 		return KERN_ABORTED;
1172 	default:
1173 		assertf(false, "Unknown ringgate status %llu", result);
1174 		__builtin_trap();
1175 	}
1176 }
1177 
1178 
1179 /*
1180  * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1181  * it in indicates that we should immediately enter the ringgate once in order
1182  * to allow the scheduler to perform early boot initialisation.
1183  */
1184 #define EARLY_RINGGATE_ENTER 2
1185 
1186 OS_NOINLINE
1187 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1188 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1189 {
1190 	uint32_t endpoint = RINGGATE_EP_INFO;
1191 	uint64_t result = RINGGATE_STATUS_ERROR;
1192 
1193 	sptm_call_regs_t regs = { };
1194 
1195 	recount_enter_secure();
1196 	result = sk_enter(endpoint, &regs);
1197 	recount_leave_secure();
1198 	if (result == RINGGATE_STATUS_ERROR) {
1199 		return KERN_FAILURE;
1200 	}
1201 
1202 	*early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1203 	*out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1204 
1205 	return KERN_SUCCESS;
1206 }
1207 
1208 /* -------------------------------------------------------------------------- */
1209 
1210 #pragma mark exclaves scheduler communication
1211 
1212 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1213 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1214 
1215 static void
exclaves_init_multicore(void)1216 exclaves_init_multicore(void)
1217 {
1218 	assert(exclaves_multicore);
1219 
1220 	XrtHosted_Buffer_t **req, **res;
1221 
1222 	exclaves_wait_for_cpu_init();
1223 
1224 	DTEntry entry, child;
1225 	OpaqueDTEntryIterator iter;
1226 	int err = SecureDTLookupEntry(NULL, "/cpus", &entry);
1227 	assert(err == kSuccess);
1228 	err = SecureDTInitEntryIterator(entry, &iter);
1229 	assert(err == kSuccess);
1230 
1231 	bool exclaves_uses_mpidr = (exclaves_callbacks->v1.global()->v2.smpStatus == XrtHosted_SmpStatus_MulticoreMpidr);
1232 	if (exclaves_uses_mpidr) {
1233 		exclaves_debug_printf(show_progress, "Using MPIDR for exclave scheduler core IDs\n");
1234 	} else {
1235 		// TODO(rdar://120679733) - clean up non-MPIDR identification logic.
1236 		exclaves_debug_printf(show_progress, "Not using MPIDR for exclave scheduler core IDs\n");
1237 	}
1238 
1239 	/*
1240 	 * Match the hardwareID to the physical ID and stash the pointers to the
1241 	 * request/response buffers in per-cpu data for quick access.
1242 	 */
1243 	size_t core_count = exclaves_callbacks->v1.cores();
1244 	for (size_t i = 0; i < core_count; i++) {
1245 		const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1246 		uint32_t dt_phys_id = 0;
1247 		if (exclaves_uses_mpidr) {
1248 			dt_phys_id = (uint32_t)core->v2.hardwareId;
1249 		} else {
1250 			/* Find the physical ID of the entry at position hardwareId in the
1251 			 * DeviceTree "cpus" array */
1252 			uint32_t dt_index = 0;
1253 			bool dt_entry_found = false;
1254 			err = SecureDTRestartEntryIteration(&iter);
1255 			assert(err == kSuccess);
1256 			while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
1257 				if (core->v2.hardwareId == dt_index) {
1258 					void const *dt_prop;
1259 					unsigned int dt_prop_sz;
1260 					err = SecureDTGetProperty(child, "reg", &dt_prop, &dt_prop_sz);
1261 					assert(err == kSuccess);
1262 					assert(dt_prop_sz == sizeof(uint32_t));
1263 					dt_phys_id = *((uint32_t const *)dt_prop);
1264 					dt_entry_found = true;
1265 					break;
1266 				}
1267 				dt_index++;
1268 			}
1269 			if (!dt_entry_found) {
1270 				continue;
1271 			}
1272 		}
1273 		percpu_foreach(cpu_data, cpu_data) {
1274 			if (cpu_data->cpu_phys_id != dt_phys_id) {
1275 				continue;
1276 			}
1277 			req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1278 			*req = exclaves_callbacks->v1.Core.request(i);
1279 
1280 			res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1281 			*res = exclaves_callbacks->v1.Core.response(i);
1282 
1283 			break;
1284 		}
1285 	}
1286 }
1287 
1288 static void
exclaves_init_unicore(void)1289 exclaves_init_unicore(void)
1290 {
1291 	assert(!exclaves_multicore);
1292 
1293 	XrtHosted_Buffer_t *breq, *bres, **req, **res;
1294 
1295 	exclaves_wait_for_cpu_init();
1296 
1297 	breq = exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
1298 	bres = exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
1299 
1300 	/* Always use the boot request/response buffers. */
1301 	percpu_foreach(cpu_data, cpu_data) {
1302 		req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1303 		*req = breq;
1304 
1305 		res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1306 		*res = bres;
1307 	}
1308 }
1309 
1310 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1311 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1312 {
1313 	kern_return_t kr = KERN_SUCCESS;
1314 	XrtHosted_Error_t hosted_error;
1315 
1316 	lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1317 
1318 	if (!pmap_valid_address(boot_info)) {
1319 		exclaves_debug_printf(show_errors,
1320 		    "exclaves: %s: 0x%012llx\n",
1321 		    "Invalid root physical address",
1322 		    boot_info);
1323 		return KERN_FAILURE;
1324 	}
1325 
1326 	if (exclaves_callbacks == NULL) {
1327 		exclaves_debug_printf(show_errors,
1328 		    "exclaves: Callbacks not registered\n");
1329 		return KERN_FAILURE;
1330 	}
1331 
1332 	/* Initialise XrtHostedXnu kext */
1333 	kr = exclaves_hosted_error(
1334 		exclaves_callbacks->v1.init(
1335 			XrtHosted_Version_current,
1336 			phystokv(boot_info),
1337 			&hosted_error),
1338 		&hosted_error);
1339 	if (kr != KERN_SUCCESS) {
1340 		return kr;
1341 	}
1342 
1343 	/* Record aperture addresses in buffer */
1344 	size_t frames = exclaves_callbacks->v1.frames();
1345 	XrtHosted_Mapped_t **pages = zalloc_permanent(
1346 		frames * sizeof(XrtHosted_Mapped_t *),
1347 		ZALIGN(XrtHosted_Mapped_t *));
1348 	size_t index = 0;
1349 	uint64_t phys = boot_info;
1350 	while (index < frames) {
1351 		if (!pmap_valid_address(phys)) {
1352 			exclaves_debug_printf(show_errors,
1353 			    "exclaves: %s: 0x%012llx\n",
1354 			    "Invalid shared physical address",
1355 			    phys);
1356 			return KERN_FAILURE;
1357 		}
1358 		pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1359 		kr = exclaves_hosted_error(
1360 			exclaves_callbacks->v1.nextPhys(
1361 				pages[index],
1362 				&index,
1363 				&phys,
1364 				&hosted_error),
1365 			&hosted_error);
1366 		if (kr != KERN_SUCCESS) {
1367 			return kr;
1368 		}
1369 	}
1370 
1371 	/* Initialise the mapped region */
1372 	exclaves_callbacks->v1.setMapping(
1373 		XrtHosted_Region_scattered(frames, pages));
1374 
1375 	/* Boot the scheduler. */
1376 	kr = exclaves_scheduler_boot();
1377 	if (kr != KERN_SUCCESS) {
1378 		return kr;
1379 	}
1380 
1381 	XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1382 
1383 	exclaves_multicore = (global->v2.smpStatus ==
1384 	    XrtHosted_SmpStatus_Multicore ||
1385 	    global->v2.smpStatus == XrtHosted_SmpStatus_MulticoreMpidr);
1386 	exclaves_multicore ? exclaves_init_multicore() : exclaves_init_unicore();
1387 
1388 	/* Initialise the XNU proxy */
1389 	if (!pmap_valid_address(global->v1.proxyInit)) {
1390 		exclaves_debug_printf(show_errors,
1391 		    "exclaves: %s: 0x%012llx\n",
1392 		    "Invalid xnu prpoxy physical address",
1393 		    phys);
1394 		return KERN_FAILURE;
1395 	}
1396 	*xnuproxy_boot_info = global->v1.proxyInit;
1397 
1398 	return kr;
1399 }
1400 
1401 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1402 #define exclaves_scheduler_debug_save_buffer(_buf_in, _buf_out) \
1403 	*(_buf_out) = *(_buf_in)
1404 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1405 	    _response_buf) ({ \
1406 	if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1407 	        printf("exclaves: Scheduler request = %p\n", _request_buf); \
1408 	        printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1409 	            (_request_buf)->tag); \
1410 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1411 	                printf("exclaves: Scheduler request.arguments[%02zu] = " \
1412 	                    "0x%04llx\n", arg, \
1413 	                    (_request_buf)->arguments[arg]); \
1414 	        } \
1415 	        printf("exclaves: Scheduler response = %p\n", _response_buf); \
1416 	        printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1417 	                (_response_buf)->tag); \
1418 	        for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1419 	                printf("exclaves: Scheduler response.arguments[%02zu] = " \
1420 	                    "0x%04llx\n", arg, \
1421 	                    (_response_buf)->arguments[arg]); \
1422 	        } \
1423 	}})
1424 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1425 #define exclaves_scheduler_debug_save_buffer(_buf_in, _buf_out) (void)_buf_out
1426 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1427 	    _response_buf) ({ })
1428 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1429 
1430 __attribute__((always_inline))
1431 static kern_return_t
exclaves_scheduler_send(const XrtHosted_Request_t * request,XrtHosted_Response_t * response,XrtHosted_Buffer_t * save_out_ptr,XrtHosted_Buffer_t * save_in_ptr)1432 exclaves_scheduler_send(const XrtHosted_Request_t *request,
1433     XrtHosted_Response_t *response, XrtHosted_Buffer_t *save_out_ptr, XrtHosted_Buffer_t *save_in_ptr)
1434 {
1435 	/* Must be called with preemption and interrupts disabled */
1436 	kern_return_t kr;
1437 
1438 	XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1439 	assert3p(request_buf, !=, NULL);
1440 
1441 	exclaves_callbacks->v1.Request.encode(request_buf, request);
1442 	exclaves_scheduler_debug_save_buffer(request_buf, save_out_ptr);
1443 
1444 	kr = exclaves_enter();
1445 
1446 	/* The response may have come back on a different core. */
1447 	XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1448 	assert3p(response_buf, !=, NULL);
1449 
1450 	exclaves_scheduler_debug_save_buffer(response_buf, save_in_ptr);
1451 	exclaves_callbacks->v1.Response.decode(response_buf, response);
1452 
1453 	return kr;
1454 }
1455 
1456 __attribute__((always_inline))
1457 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1458 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1459     XrtHosted_Response_t *response)
1460 {
1461 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1462 	XrtHosted_Buffer_t save_in[3], save_out[3] = {{ .tag = XrtHosted_Message_Invalid }, { .tag = XrtHosted_Message_Invalid }, { .tag = XrtHosted_Message_Invalid }};
1463 	XrtHosted_Buffer_t *save_out_ptr = save_out, *save_in_ptr = save_in;
1464 #else
1465 	XrtHosted_Buffer_t *save_out_ptr = NULL, *save_in_ptr = NULL;
1466 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1467 
1468 	assert3u(request->tag, >, XrtHosted_Request_Invalid);
1469 	assert3u(request->tag, <, XrtHosted_Request_Limit);
1470 
1471 	kern_return_t kr = KERN_SUCCESS;
1472 	bool istate;
1473 
1474 	if (!exclaves_multicore || !exclaves_smp_enabled) {
1475 		lck_mtx_lock(&exclaves_scheduler_lock);
1476 	}
1477 
1478 	/*
1479 	 * Disable preemption and interrupts as the xrt hosted scheduler data
1480 	 * structures are per-core.
1481 	 * Preemption disabled and interrupt disabled timeouts are disabled for
1482 	 * now until we can co-ordinate the measurements with the exclaves side of
1483 	 * things.
1484 	 */
1485 	istate = ml_set_interrupts_enabled_with_debug(false, false);
1486 
1487 	/*
1488 	 * This needs to be done with interrupts disabled, otherwise stackshot could
1489 	 * mark the thread blocked just after this function exits and a thread marked
1490 	 * as AST blocked would go into exclaves.
1491 	 */
1492 
1493 	while ((os_atomic_load(&current_thread()->th_exclaves_inspection_state, relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1494 		/* Enable interrupts */
1495 		(void) ml_set_interrupts_enabled_with_debug(true, false);
1496 
1497 		if (!exclaves_multicore || !exclaves_smp_enabled) {
1498 			lck_mtx_unlock(&exclaves_scheduler_lock);
1499 		}
1500 
1501 		/* Wait until the thread is collected on exclaves side */
1502 		exclaves_inspection_check_ast();
1503 
1504 		if (!exclaves_multicore || !exclaves_smp_enabled) {
1505 			lck_mtx_lock(&exclaves_scheduler_lock);
1506 		}
1507 
1508 		/* Disable interrupts and preemption before next AST check */
1509 		ml_set_interrupts_enabled_with_debug(false, false);
1510 	}
1511 	/* Interrupts are disabled and exclaves_stackshot_ast is clean */
1512 
1513 	disable_preemption_without_measurements();
1514 
1515 	/* Update clock offsets before any other scheduler operation */
1516 	exclaves_clock_t *clocks[] = { &exclaves_absolute_clock,
1517 		                       &exclaves_continuous_clock };
1518 	for (unsigned i = 0; i < ARRAY_COUNT(clocks); ++i) {
1519 		if (exclaves_clock_needs_update(clocks[i])) {
1520 			kr = exclaves_clock_update(clocks[i], &save_out_ptr[i], &save_in_ptr[i]);
1521 			if (kr != KERN_SUCCESS) {
1522 				break;
1523 			}
1524 		}
1525 	}
1526 
1527 	if (kr == KERN_SUCCESS) {
1528 		kr = exclaves_scheduler_send(request, response, &save_out_ptr[2], &save_in_ptr[2]);
1529 	}
1530 
1531 	enable_preemption();
1532 	(void) ml_set_interrupts_enabled_with_debug(istate, false);
1533 
1534 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1535 	for (unsigned i = 0; i < ARRAY_COUNT(save_out); ++i) {
1536 		if (save_out_ptr[i].tag != XrtHosted_Message_Invalid) {
1537 			exclaves_scheduler_debug_show_request_response(&save_out_ptr[i], &save_in_ptr[i]);
1538 		}
1539 	}
1540 #endif // EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1541 
1542 	if (!exclaves_multicore || !exclaves_smp_enabled) {
1543 		lck_mtx_unlock(&exclaves_scheduler_lock);
1544 	}
1545 
1546 	if (kr == KERN_ABORTED) {
1547 		/* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1548 		 * another core has paniced in exclaves and is on the way to call xnu
1549 		 * panic() via SPTM, so wait here for that to happen. */
1550 		exclaves_wait_for_panic();
1551 	}
1552 
1553 	return kr;
1554 }
1555 
1556 OS_NORETURN OS_NOINLINE
1557 static void
exclaves_wait_for_panic(void)1558 exclaves_wait_for_panic(void)
1559 {
1560 	assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1561 	    NSEC_PER_SEC);
1562 	wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1563 	panic("Unexpected wait for panic result: %d", wr);
1564 }
1565 
1566 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1567 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1568     const XrtHosted_Yield_t *yield)
1569 {
1570 	Exclaves_L4_Word_t responding_scid = yield->thread;
1571 	Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1572 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1573 
1574 	exclaves_debug_printf(show_progress,
1575 	    "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1576 	    early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1577 	/* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1578 	 * th_exclaves_scheduling_context_id so we know to resume it later
1579 	 * 2. translate yield_to to thread_switch()-style handoff.
1580 	 */
1581 	if (!early) {
1582 		assert3u(responding_scid, ==, scid);
1583 		assert3u(yield->threadHostId, ==, ctid);
1584 	}
1585 
1586 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1587 	    MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1588 
1589 	return KERN_SUCCESS;
1590 }
1591 
1592 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1593 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1594     const XrtHosted_Spawned_t *spawned)
1595 {
1596 	Exclaves_L4_Word_t responding_scid = spawned->thread;
1597 	thread_t thread = current_thread();
1598 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1599 
1600 	/*
1601 	 * There are only a few places an exclaves thread is expected to be
1602 	 * spawned. Any other cases are considered errors.
1603 	 */
1604 	if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1605 		exclaves_debug_printf(show_errors,
1606 		    "exclaves: Scheduler: Unexpected thread spawn: "
1607 		    "scid 0x%lx spawned scid 0x%llx\n",
1608 		    responding_scid, spawned->spawned);
1609 		return KERN_FAILURE;
1610 	}
1611 
1612 	exclaves_debug_printf(show_progress,
1613 	    "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1614 	    responding_scid, (unsigned long)spawned->spawned);
1615 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1616 	    MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1617 
1618 	assert3u(responding_scid, ==, scid);
1619 	assert3u(spawned->threadHostId, ==, ctid);
1620 
1621 	return KERN_SUCCESS;
1622 }
1623 
1624 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1625 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1626 {
1627 	Exclaves_L4_Word_t responding_scid = terminated->thread;
1628 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1629 
1630 	exclaves_debug_printf(show_errors,
1631 	    "exclaves: Scheduler: Unexpected thread terminate: "
1632 	    "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1633 	    terminated->terminated);
1634 	assert3u(terminated->threadHostId, ==, ctid);
1635 
1636 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1637 	    MACH_EXCLAVES_SCHEDULER_TERMINATED),
1638 	    terminated->terminated);
1639 
1640 	return KERN_TERMINATED;
1641 }
1642 
1643 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1644 handle_response_wait(const XrtHosted_Wait_t *wait)
1645 {
1646 	Exclaves_L4_Word_t responding_scid = wait->waiter;
1647 	thread_t thread = current_thread();
1648 	__assert_only ctid_t ctid = thread_get_ctid(thread);
1649 
1650 	exclaves_debug_printf(show_progress,
1651 	    "exclaves: Scheduler: Wait: "
1652 	    "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1653 	    "epoch 0x%llx\n", responding_scid, wait->owner,
1654 	    wait->queueId, wait->epoch);
1655 	assert3u(wait->waiterHostId, ==, ctid);
1656 
1657 	/* The exclaves inspection thread should never wait. */
1658 	if ((thread->th_exclaves_state & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1659 		panic("Exclaves inspection thread tried to wait\n");
1660 	}
1661 
1662 	/*
1663 	 * Note, "owner" may not be safe to access directly, for example
1664 	 * the thread may have exited and been freed. esync_wait will
1665 	 * only access it under a lock if the epoch is fresh thus
1666 	 * ensuring safety.
1667 	 */
1668 	const ctid_t owner = (ctid_t)wait->ownerHostId;
1669 	const XrtHosted_Word_t id = wait->queueId;
1670 	const uint64_t epoch = wait->epoch;
1671 
1672 	wait_interrupt_t interruptible;
1673 	esync_policy_t policy;
1674 
1675 	switch (wait->interruptible) {
1676 	case XrtHosted_Interruptibility_None:
1677 		interruptible = THREAD_UNINT;
1678 		policy = ESYNC_POLICY_KERNEL;
1679 		break;
1680 
1681 	case XrtHosted_Interruptibility_Voluntary:
1682 		interruptible = THREAD_INTERRUPTIBLE;
1683 		policy = ESYNC_POLICY_KERNEL;
1684 		break;
1685 
1686 	case XrtHosted_Interruptibility_DynamicQueue:
1687 		interruptible = THREAD_INTERRUPTIBLE;
1688 		policy = ESYNC_POLICY_USER;
1689 		break;
1690 
1691 	default:
1692 		panic("Unknown exclaves interruptibility: %llu",
1693 		    wait->interruptible);
1694 	}
1695 
1696 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1697 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1698 	    wait->interruptible);
1699 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1700 	    exclaves_get_queue_counter(id), owner, policy, interruptible);
1701 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1702 	    MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1703 
1704 	switch (wr) {
1705 	case THREAD_INTERRUPTED:
1706 		return KERN_ABORTED;
1707 
1708 	case THREAD_NOT_WAITING:
1709 	case THREAD_AWAKENED:
1710 		return KERN_SUCCESS;
1711 
1712 	default:
1713 		panic("Unexpected wait result from esync_wait: %d", wr);
1714 	}
1715 }
1716 
1717 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1718 handle_response_wake(const XrtHosted_Wake_t *wake)
1719 {
1720 	Exclaves_L4_Word_t responding_scid = wake->waker;
1721 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1722 
1723 	exclaves_debug_printf(show_progress,
1724 	    "exclaves: Scheduler: Wake: "
1725 	    "scid 0x%lx wake of queue id 0x%llx, "
1726 	    "epoch 0x%llx, all 0x%llx\n", responding_scid,
1727 	    wake->queueId, wake->epoch, wake->all);
1728 	assert3u(wake->wakerHostId, ==, ctid);
1729 
1730 	const XrtHosted_Word_t id = wake->queueId;
1731 	const uint64_t epoch = wake->epoch;
1732 	const esync_wake_mode_t mode = wake->all != 0 ?
1733 	    ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1734 
1735 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1736 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1737 
1738 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1739 	    exclaves_get_queue_counter(id), mode, 0);
1740 
1741 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1742 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1743 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1744 
1745 	return KERN_SUCCESS;
1746 }
1747 
1748 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1749 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1750 {
1751 	Exclaves_L4_Word_t responding_scid = wake->waker;
1752 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1753 
1754 	exclaves_debug_printf(show_progress,
1755 	    "exclaves: Scheduler: WakeWithOwner: "
1756 	    "scid 0x%lx wake of queue id 0x%llx, "
1757 	    "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1758 	    wake->queueId, wake->epoch,
1759 	    wake->owner);
1760 
1761 	assert3u(wake->wakerHostId, ==, ctid);
1762 
1763 	const ctid_t owner = (ctid_t)wake->ownerHostId;
1764 	const XrtHosted_Word_t id = wake->queueId;
1765 	const uint64_t epoch = wake->epoch;
1766 
1767 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1768 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1769 	    ESYNC_WAKE_ONE_WITH_OWNER);
1770 
1771 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1772 	    exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1773 
1774 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1775 	    MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1776 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1777 
1778 	return KERN_SUCCESS;
1779 }
1780 
1781 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1782 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1783 {
1784 	Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1785 	__assert_only thread_t thread = current_thread();
1786 
1787 	exclaves_debug_printf(show_progress,
1788 	    "exclaves: Scheduler: PanicWait: "
1789 	    "Panic thread SCID %lx\n",
1790 	    panic_thread_scid);
1791 
1792 	assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1793 
1794 	exclaves_panic_thread_wait();
1795 
1796 	/* NOT REACHABLE */
1797 	return KERN_SUCCESS;
1798 }
1799 
1800 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1801 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1802 {
1803 	Exclaves_L4_Word_t responding_scid = suspended->suspended;
1804 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1805 
1806 	exclaves_debug_printf(show_progress,
1807 	    "exclaves: Scheduler: Suspended: "
1808 	    "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1809 	assert3u(suspended->suspendedHostId, ==, ctid);
1810 
1811 	const uint64_t id = suspended->suspended;
1812 	const uint64_t epoch = suspended->epoch;
1813 
1814 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1815 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1816 
1817 	const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1818 	    exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1819 
1820 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1821 	    MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1822 
1823 	switch (wr) {
1824 	case THREAD_INTERRUPTED:
1825 		return KERN_ABORTED;
1826 
1827 	case THREAD_NOT_WAITING:
1828 	case THREAD_AWAKENED:
1829 		return KERN_SUCCESS;
1830 
1831 	default:
1832 		panic("Unexpected wait result from esync_wait: %d", wr);
1833 	}
1834 }
1835 
1836 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1837 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1838 {
1839 	Exclaves_L4_Word_t responding_scid = resumed->thread;
1840 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1841 
1842 	exclaves_debug_printf(show_progress,
1843 	    "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1844 	    "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1845 	    resumed->resumedHostId, resumed->epoch);
1846 	assert3u(resumed->threadHostId, ==, ctid);
1847 
1848 	const ctid_t target = (ctid_t)resumed->resumedHostId;
1849 	const XrtHosted_Word_t id = resumed->resumed;
1850 	const uint64_t epoch = resumed->epoch;
1851 
1852 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1853 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1854 	    target);
1855 
1856 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1857 	    exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1858 
1859 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1860 	    MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1861 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1862 
1863 	return KERN_SUCCESS;
1864 }
1865 
1866 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1867 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1868 {
1869 	Exclaves_L4_Word_t responding_scid = interrupted->thread;
1870 	__assert_only ctid_t ctid = thread_get_ctid(current_thread());
1871 
1872 	exclaves_debug_printf(show_progress,
1873 	    "exclaves: Scheduler: Interrupted: "
1874 	    "scid 0x%lx interrupt on queue id 0x%llx, "
1875 	    "epoch 0x%llx, target 0x%llx\n", responding_scid,
1876 	    interrupted->queueId, interrupted->epoch,
1877 	    interrupted->interruptedHostId);
1878 	assert3u(interrupted->threadHostId, ==, ctid);
1879 
1880 	const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1881 	const XrtHosted_Word_t id = interrupted->queueId;
1882 	const uint64_t epoch = interrupted->epoch;
1883 
1884 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1885 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1886 	    target);
1887 
1888 	kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1889 	    exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1890 
1891 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1892 	    MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1893 	    kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1894 
1895 	return KERN_SUCCESS;
1896 }
1897 
1898 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1899 handle_response_nothing_scheduled(
1900 	__unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1901 {
1902 	exclaves_debug_printf(show_progress,
1903 	    "exclaves: Scheduler: nothing scheduled\n");
1904 
1905 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1906 	    MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1907 
1908 	return KERN_SUCCESS;
1909 }
1910 
1911 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)1912 handle_response_all_exclaves_booted(
1913 	__unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
1914 {
1915 	exclaves_debug_printf(show_progress,
1916 	    "exclaves: scheduler: all exclaves booted\n");
1917 
1918 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1919 	    MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
1920 
1921 	return KERN_SUCCESS;
1922 }
1923 
1924 /*
1925  * The Early Alloc response asks for npages to be allocated. The list of
1926  * allocated pages is written into the first allocated page in the form of 32bit
1927  * page numbers. The physical address of the first page is passed back to the
1928  * exclaves scheduler as part of the next request.
1929  */
1930 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)1931 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
1932     uint64_t *pagelist_pa)
1933 {
1934 	const uint32_t npages = (uint32_t)pmm_early_alloc->a;
1935 	const uint64_t flags = pmm_early_alloc->b;
1936 
1937 	exclaves_debug_printf(show_progress,
1938 	    "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
1939 	    npages, flags);
1940 
1941 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1942 	    MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
1943 
1944 	if (npages == 0) {
1945 		return KERN_SUCCESS;
1946 	}
1947 
1948 	if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
1949 		exclaves_debug_printf(show_errors,
1950 		    "exclaves: request to allocate too many pages: %u\n",
1951 		    npages);
1952 		return KERN_NO_SPACE;
1953 	}
1954 
1955 	/*
1956 	 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
1957 	 * stack allocation is sufficient and fast. If
1958 	 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
1959 	 * to the heap.
1960 	 */
1961 	uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
1962 	exclaves_memory_alloc(npages, page, XNUUPCALLS_PAGEKIND_ROOTDOMAIN);
1963 
1964 	/* Now copy the list of pages into the first page. */
1965 	uint64_t first_page_pa = ptoa(page[0]);
1966 #if 0
1967 	// move to before sptm retype
1968 	uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
1969 	for (int i = 0; i < npages; i++) {
1970 		first_page[i] = page[i];
1971 	}
1972 #endif
1973 
1974 	*pagelist_pa = first_page_pa;
1975 	return KERN_SUCCESS;
1976 }
1977 
1978 static inline bool
exclaves_clock_needs_update(const exclaves_clock_t * clock)1979 exclaves_clock_needs_update(const exclaves_clock_t *clock)
1980 {
1981 	exclaves_clock_t local = {
1982 		.u128 = os_atomic_load(&clock->a_u128, relaxed),
1983 	};
1984 
1985 	return local.u64.sent_offset != local.u64.latest_offset;
1986 }
1987 
1988 OS_NOINLINE
1989 static kern_return_t
exclaves_clock_update(exclaves_clock_t * clock,XrtHosted_Buffer_t * save_out_ptr,XrtHosted_Buffer_t * save_in_ptr)1990 exclaves_clock_update(exclaves_clock_t *clock, XrtHosted_Buffer_t *save_out_ptr, XrtHosted_Buffer_t *save_in_ptr)
1991 {
1992 	XrtHosted_Response_t response = { .tag = XrtHosted_Response_NothingScheduled, };
1993 	kern_return_t kr = KERN_SUCCESS;
1994 	exclaves_clock_t local;
1995 
1996 	local.u128 = os_atomic_load(&clock->a_u128, relaxed);
1997 	while (local.u64.sent_offset != local.u64.latest_offset) {
1998 		XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
1999 			.timer =
2000 			(clock == &exclaves_absolute_clock ?
2001 			XrtHosted_Timer_Absolute : XrtHosted_Timer_Continuous),
2002 			.offset = local.u64.latest_offset,
2003 			);
2004 
2005 		kr = exclaves_scheduler_send(&request, &response, save_out_ptr, save_in_ptr);
2006 		if (kr) {
2007 			return kr;
2008 		}
2009 
2010 		/* Swap the sent offset with the local latest offset. If it fails,
2011 		 * the sent offset will be reloaded. */
2012 		os_atomic_cmpxchgv(&clock->a_u64.sent_offset, local.u64.sent_offset,
2013 		    local.u64.latest_offset, &local.u64.sent_offset, relaxed);
2014 
2015 		/* Fetch the latest offset again, in case we are stale. */
2016 		local.u64.latest_offset = os_atomic_load(&clock->a_u64.latest_offset,
2017 		    relaxed);
2018 	}
2019 
2020 	if (response.tag != XrtHosted_Response_NothingScheduled) {
2021 		kr = KERN_FAILURE;
2022 	}
2023 
2024 	return kr;
2025 }
2026 
2027 static kern_return_t
exclaves_scheduler_boot(void)2028 exclaves_scheduler_boot(void)
2029 {
2030 	kern_return_t kr = KERN_FAILURE;
2031 	thread_t thread = current_thread();
2032 
2033 	exclaves_debug_printf(show_progress,
2034 	    "exclaves: Scheduler: Request to boot exclave\n");
2035 
2036 	/* This must happen on the boot CPU - bind the thread. */
2037 	bind_to_boot_core();
2038 
2039 	assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2040 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2041 
2042 	/*
2043 	 * Set the request/response buffers. These may be overriden later when
2044 	 * doing multicore setup.
2045 	 */
2046 	*PERCPU_GET(exclaves_request) =
2047 	    exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2048 	*PERCPU_GET(exclaves_response) =
2049 	    exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2050 
2051 	XrtHosted_Response_t response = {.tag = XrtHosted_Response_Invalid};
2052 	uint64_t pagelist_pa = 0;
2053 
2054 	while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2055 		const XrtHosted_Request_t request = pagelist_pa != 0 ?
2056 		    XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2057 		    XrtHosted_Request_BootExclavesMsg();
2058 		pagelist_pa = 0;
2059 
2060 		kr = exclaves_scheduler_request(&request, &response);
2061 		if (kr != KERN_SUCCESS) {
2062 			exclaves_debug_printf(show_errors,
2063 			    "exclaves: Enter failed\n");
2064 			break;
2065 		}
2066 
2067 		thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2068 
2069 		switch (response.tag) {
2070 		case XrtHosted_Response_Yield:
2071 			kr = handle_response_yield(true, 0, &response.Yield);
2072 			break;
2073 
2074 		case XrtHosted_Response_NothingScheduled:
2075 			kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2076 			break;
2077 
2078 		case XrtHosted_Response_AllExclavesBooted:
2079 			kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2080 			break;
2081 
2082 		case XrtHosted_Response_PmmEarlyAlloc:
2083 			kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2084 			break;
2085 
2086 		case XrtHosted_Response_PanicBufferAddress:
2087 			handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2088 			break;
2089 
2090 		default:
2091 			exclaves_debug_printf(show_errors,
2092 			    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2093 			    response.tag);
2094 			kr = KERN_FAILURE;
2095 			break;
2096 		}
2097 
2098 		thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2099 
2100 		/* Bail out if an error is hit. */
2101 		if (kr != KERN_SUCCESS) {
2102 			break;
2103 		}
2104 	}
2105 
2106 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2107 
2108 	unbind_from_boot_core();
2109 
2110 	return kr;
2111 }
2112 
2113 kern_return_t
exclaves_scheduler_resume_scheduling_context(const exclaves_ctx_t * ctx,bool interrupted)2114 exclaves_scheduler_resume_scheduling_context(const exclaves_ctx_t *ctx,
2115     bool interrupted)
2116 {
2117 	kern_return_t kr = KERN_SUCCESS;
2118 	thread_t thread = current_thread();
2119 	const ctid_t ctid = thread_get_ctid(thread);
2120 
2121 	exclaves_debug_printf(show_progress,
2122 	    "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2123 
2124 	XrtHosted_Response_t response = {};
2125 	const XrtHosted_Request_t request = interrupted ?
2126 	    XrtHosted_Request_InterruptWithHostIdMsg(
2127 		.thread = ctx->scid,
2128 		.hostId = ctid,
2129 		) :
2130 	    XrtHosted_Request_ResumeWithHostIdMsg(
2131 		.thread = ctx->scid,
2132 		.hostId = ctid,
2133 		);
2134 	kr = exclaves_scheduler_request(&request, &response);
2135 	if (kr) {
2136 		exclaves_debug_printf(show_errors, "exclaves: Enter failed\n");
2137 		return kr;
2138 	}
2139 
2140 	thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2141 
2142 	switch (response.tag) {
2143 	case XrtHosted_Response_Wait:
2144 		kr = handle_response_wait(&response.Wait);
2145 		goto out;
2146 
2147 	case XrtHosted_Response_Wake:
2148 		kr = handle_response_wake(&response.Wake);
2149 		goto out;
2150 
2151 	case XrtHosted_Response_Yield:
2152 		kr = handle_response_yield(false, ctx->scid, &response.Yield);
2153 		goto out;
2154 
2155 	case XrtHosted_Response_Spawned:
2156 		kr = handle_response_spawned(ctx->scid, &response.Spawned);
2157 		goto out;
2158 
2159 	case XrtHosted_Response_Terminated:
2160 		kr = handle_response_terminated(&response.Terminated);
2161 		goto out;
2162 
2163 	case XrtHosted_Response_WakeWithOwner:
2164 		kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2165 		goto out;
2166 
2167 	case XrtHosted_Response_PanicWait:
2168 		kr = handle_response_panic_wait(&response.PanicWait);
2169 		goto out;
2170 
2171 	case XrtHosted_Response_Suspended:
2172 		kr = handle_response_suspended(&response.Suspended);
2173 		goto out;
2174 
2175 	case XrtHosted_Response_Resumed:
2176 		kr = handle_response_resumed(&response.Resumed);
2177 		goto out;
2178 
2179 	case XrtHosted_Response_Interrupted:
2180 		kr = handle_response_interrupted(&response.Interrupted);
2181 		goto out;
2182 
2183 	case XrtHosted_Response_Invalid:
2184 	case XrtHosted_Response_Failure:
2185 	case XrtHosted_Response_Pong:
2186 	case XrtHosted_Response_SleepUntil:
2187 	case XrtHosted_Response_Awaken:
2188 	default:
2189 		exclaves_debug_printf(show_errors,
2190 		    "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2191 		    response.tag);
2192 		kr = KERN_FAILURE;
2193 		goto out;
2194 	}
2195 
2196 out:
2197 	thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2198 	return kr;
2199 }
2200 
2201 /* -------------------------------------------------------------------------- */
2202 
2203 #pragma mark exclaves xnu proxy communication
2204 
2205 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2206 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2207 {
2208 	if (success) {
2209 		return KERN_SUCCESS;
2210 	} else {
2211 		exclaves_debug_printf(show_errors,
2212 		    "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2213 		    error->file,
2214 		    error->line,
2215 		    error->function,
2216 		    error->expression
2217 		    );
2218 		return KERN_FAILURE;
2219 	}
2220 }
2221 
2222 
2223 #pragma mark exclaves privilege management
2224 
2225 /*
2226  * All entitlement checking enabled by default.
2227  */
2228 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2229 
2230 /*
2231  * boot-arg to control the use of entitlements.
2232  * Eventually this should be removed and entitlement checking should be gated on
2233  * the EXCLAVES_R_ENTITLEMENTS requirement.
2234  * This will be addressed with rdar://125153460.
2235  */
2236 TUNABLE(unsigned int, exclaves_entitlement_flags,
2237     "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2238 
2239 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2240 has_entitlement(task_t task, const exclaves_priv_t priv,
2241     const char *entitlement)
2242 {
2243 	/* Skip the entitlement if not enabled. */
2244 	if ((exclaves_entitlement_flags & priv) == 0) {
2245 		return true;
2246 	}
2247 
2248 	return IOTaskHasEntitlement(task, entitlement);
2249 }
2250 
2251 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2252 has_entitlement_vnode(void *vnode, const int64_t off,
2253     const exclaves_priv_t priv, const char *entitlement)
2254 {
2255 	/* Skip the entitlement if not enabled. */
2256 	if ((exclaves_entitlement_flags & priv) == 0) {
2257 		return true;
2258 	}
2259 
2260 	return IOVnodeHasEntitlement(vnode, off, entitlement);
2261 }
2262 
2263 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2264 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2265 {
2266 	const bool is_kernel = task == kernel_task;
2267 	const bool is_launchd = task_pid(task) == 1;
2268 
2269 	switch (priv) {
2270 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2271 		/* Both launchd and entitled tasks can spawn new conclaves. */
2272 		if (is_launchd) {
2273 			return true;
2274 		}
2275 		return has_entitlement(task, priv,
2276 		           "com.apple.private.exclaves.conclave-spawn");
2277 
2278 	case EXCLAVES_PRIV_KERNEL_DOMAIN:
2279 		/*
2280 		 * Both the kernel itself and user tasks with the right
2281 		 * privilege can access exclaves resources in the kernel domain.
2282 		 */
2283 		if (is_kernel) {
2284 			return true;
2285 		}
2286 
2287 		/*
2288 		 * If the task was entitled and has been through this path
2289 		 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2290 		 */
2291 		if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2292 			return true;
2293 		}
2294 
2295 		if (has_entitlement(task, priv,
2296 		    "com.apple.private.exclaves.kernel-domain")) {
2297 			task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2298 			return true;
2299 		}
2300 
2301 		return false;
2302 
2303 	case EXCLAVES_PRIV_BOOT:
2304 		/* Both launchd and entitled tasks can boot exclaves. */
2305 		if (is_launchd) {
2306 			return true;
2307 		}
2308 		/* BEGIN IGNORE CODESTYLE */
2309 		return has_entitlement(task, priv,
2310 		    "com.apple.private.exclaves.boot");
2311 		/* END IGNORE CODESTYLE */
2312 
2313 	/* The CONCLAVE HOST priv is always checked by vnode. */
2314 	case EXCLAVES_PRIV_CONCLAVE_HOST:
2315 	default:
2316 		panic("bad exclaves privilege (%u)", priv);
2317 	}
2318 }
2319 
2320 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2321 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2322 {
2323 	switch (priv) {
2324 	case EXCLAVES_PRIV_CONCLAVE_HOST: {
2325 		const bool has_conclave_host = has_entitlement_vnode(vnode,
2326 		    off, priv, "com.apple.private.exclaves.conclave-host");
2327 
2328 		/*
2329 		 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2330 		 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2331 		 */
2332 
2333 		/* Don't check if neither entitlemenent is being enforced.*/
2334 		if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2335 		    (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2336 			return has_conclave_host;
2337 		}
2338 
2339 		const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2340 		    EXCLAVES_PRIV_KERNEL_DOMAIN,
2341 		    "com.apple.private.exclaves.kernel-domain");
2342 
2343 		/* See if it has both. */
2344 		if (has_conclave_host && has_domain_kernel) {
2345 			exclaves_debug_printf(show_errors,
2346 			    "exclaves: task has both conclave-host and "
2347 			    "kernel-domain entitlements which is forbidden\n");
2348 			return false;
2349 		}
2350 
2351 		return has_conclave_host;
2352 	}
2353 
2354 	case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2355 		return has_entitlement_vnode(vnode, off, priv,
2356 		           "com.apple.private.exclaves.conclave-spawn");
2357 
2358 	default:
2359 		panic("bad exclaves privilege (%u)", priv);
2360 	}
2361 }
2362 
2363 
2364 #pragma mark exclaves stackshot range
2365 
2366 /* Unslid pointers defining the range of code which switches threads into
2367  * secure world */
2368 uintptr_t exclaves_enter_range_start;
2369 uintptr_t exclaves_enter_range_end;
2370 
2371 
2372 __startup_func
2373 static void
initialize_exclaves_enter_range(void)2374 initialize_exclaves_enter_range(void)
2375 {
2376 	exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2377 	assert3u(exclaves_enter_range_start, !=, 0);
2378 	exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2379 	assert3u(exclaves_enter_range_end, !=, 0);
2380 }
2381 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_enter_range);
2382 
2383 /*
2384  * Return true if the specified address is in exclaves_enter.
2385  */
2386 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2387 exclaves_enter_in_range(uintptr_t addr, bool slid)
2388 {
2389 	return slid ?
2390 	       exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2391 	       exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2392 }
2393 
2394 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2395 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2396 {
2397 	size_t i = 0;
2398 
2399 	// Check for a frame matching upcall code range
2400 	for (i = 0; i < nframes; i++) {
2401 		if (exclaves_upcall_in_range(addr[i], slid)) {
2402 			break;
2403 		}
2404 	}
2405 
2406 	// Insert exclaves stacks before the upcall frame when found
2407 	if (i < nframes) {
2408 		return (uint32_t)(i + 1);
2409 	}
2410 
2411 	// Check for a frame matching exclaves enter range
2412 	for (i = 0; i < nframes; i++) {
2413 		if (exclaves_enter_in_range(addr[i], slid)) {
2414 			break;
2415 		}
2416 	}
2417 
2418 	// Put exclaves stacks on top of kernel stacks by default
2419 	if (i == nframes) {
2420 		i = 0;
2421 	}
2422 	return (uint32_t)i;
2423 }
2424 
2425 #endif /* CONFIG_EXCLAVES */
2426 
2427 
2428 #ifndef CONFIG_EXCLAVES
2429 /* stubs for sensor functions which are not compiled in from exclaves.c when
2430  * CONFIG_EXCLAVE is disabled */
2431 
2432 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2433 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2434     exclaves_sensor_status_t *status)
2435 {
2436 #pragma unused(sensor_type, flags, status)
2437 	return KERN_NOT_SUPPORTED;
2438 }
2439 
2440 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2441 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2442     exclaves_sensor_status_t *status)
2443 {
2444 #pragma unused(sensor_type, flags, status)
2445 	return KERN_NOT_SUPPORTED;
2446 }
2447 
2448 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2449 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2450     exclaves_sensor_status_t *status)
2451 {
2452 #pragma unused(sensor_type, flags, status)
2453 	return KERN_NOT_SUPPORTED;
2454 }
2455 
2456 #endif /* ! CONFIG_EXCLAVES */
2457